From 25b979bbede2d88e805a1dc682f17fc16bd4fb6d Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Thu, 19 Feb 2026 23:10:39 -0600 Subject: [PATCH 1/2] [scheduler] Use relaxed memory ordering for atomic reads under lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the scheduler lock is already held, atomic loads of the remove flag don't need acquire ordering — the mutex provides all necessary synchronization guarantees. Add is_item_removed_locked_() helper that uses memory_order_relaxed and use it in all _locked_ call sites. This eliminates redundant memw barriers on Xtensa, saving 12 bytes in matches_item_locked_ (152 → 140 B) and reducing pipeline stalls in the scheduler hot path. --- esphome/core/scheduler.cpp | 6 +++--- esphome/core/scheduler.h | 18 +++++++++++++++--- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/esphome/core/scheduler.cpp b/esphome/core/scheduler.cpp index 3294f689e8..3ff8fa4475 100644 --- a/esphome/core/scheduler.cpp +++ b/esphome/core/scheduler.cpp @@ -406,7 +406,7 @@ void Scheduler::full_cleanup_removed_items_() { // Compact in-place: move valid items forward, recycle removed ones size_t write = 0; for (size_t read = 0; read < this->items_.size(); ++read) { - if (!is_item_removed_(this->items_[read].get())) { + if (!is_item_removed_locked_(this->items_[read].get())) { if (write != read) { this->items_[write] = std::move(this->items_[read]); } @@ -508,7 +508,7 @@ void HOT Scheduler::call(uint32_t now) { // Multi-threaded platforms without atomics: must take lock to safely read remove flag { LockGuard guard{this->lock_}; - if (is_item_removed_(item.get())) { + if (is_item_removed_locked_(item.get())) { this->recycle_item_main_loop_(this->pop_raw_locked_()); this->to_remove_--; continue; @@ -572,7 +572,7 @@ void HOT Scheduler::call(uint32_t now) { void HOT Scheduler::process_to_add() { LockGuard guard{this->lock_}; for (auto &it : this->to_add_) { - if (is_item_removed_(it.get())) { + if (is_item_removed_locked_(it.get())) { // Recycle cancelled items this->recycle_item_main_loop_(std::move(it)); continue; diff --git a/esphome/core/scheduler.h b/esphome/core/scheduler.h index 394178a831..cf9a2a3660 100644 --- a/esphome/core/scheduler.h +++ b/esphome/core/scheduler.h @@ -311,8 +311,8 @@ class Scheduler { // Fixes: https://github.com/esphome/esphome/issues/11940 if (!item) return false; - if (item->component != component || item->type != type || (skip_removed && item->remove) || - (match_retry && !item->is_retry)) { + if (item->component != component || item->type != type || + (skip_removed && this->is_item_removed_locked_(item.get())) || (match_retry && !item->is_retry)) { return false; } // Name type must match @@ -465,6 +465,18 @@ class Scheduler { #endif } + // Helper to check if item is marked for removal when lock is already held. + // Uses relaxed ordering since the mutex provides all necessary synchronization. + // IMPORTANT: Caller must hold the scheduler lock before calling this function. + bool is_item_removed_locked_(SchedulerItem *item) const { +#ifdef ESPHOME_THREAD_MULTI_ATOMICS + // Lock already held - relaxed is sufficient, mutex provides ordering + return item->remove.load(std::memory_order_relaxed); +#else + return item->remove; +#endif + } + // Helper to set item removal flag (platform-specific) // For ESPHOME_THREAD_MULTI_NO_ATOMICS platforms, the caller must hold the scheduler lock before calling this // function. Uses memory_order_release when setting to true (for cancellation synchronization), @@ -521,7 +533,7 @@ class Scheduler { // it will iterate over these nullptr items. This check prevents crashes. if (!item) continue; - if (is_item_removed_(item.get()) && + if (this->is_item_removed_locked_(item.get()) && this->matches_item_locked_(item, component, name_type, static_name, hash_or_id, SchedulerItem::TIMEOUT, match_retry, /* skip_removed= */ false)) { return true; From 7568b64a4a776643fbed46eda9173050743acd38 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Thu, 19 Feb 2026 23:20:28 -0600 Subject: [PATCH 2/2] Fix remaining direct atomic reads under lock Replace two more direct item->remove accesses (implicit seq_cst) with is_item_removed_locked_() in call() and cleanup_(), both under lock. --- esphome/core/scheduler.cpp | 4 ++-- esphome/core/scheduler.h | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/esphome/core/scheduler.cpp b/esphome/core/scheduler.cpp index 3ff8fa4475..c783a788cc 100644 --- a/esphome/core/scheduler.cpp +++ b/esphome/core/scheduler.cpp @@ -545,7 +545,7 @@ void HOT Scheduler::call(uint32_t now) { // during the function call and know if we were cancelled. auto executed_item = this->pop_raw_locked_(); - if (executed_item->remove) { + if (this->is_item_removed_locked_(executed_item.get())) { // We were removed/cancelled in the function call, recycle and continue this->to_remove_--; this->recycle_item_main_loop_(std::move(executed_item)); @@ -605,7 +605,7 @@ size_t HOT Scheduler::cleanup_() { LockGuard guard{this->lock_}; while (!this->items_.empty()) { auto &item = this->items_[0]; - if (!item->remove) + if (!this->is_item_removed_locked_(item.get())) break; this->to_remove_--; this->recycle_item_main_loop_(this->pop_raw_locked_()); diff --git a/esphome/core/scheduler.h b/esphome/core/scheduler.h index cf9a2a3660..020d822a0a 100644 --- a/esphome/core/scheduler.h +++ b/esphome/core/scheduler.h @@ -470,8 +470,12 @@ class Scheduler { // IMPORTANT: Caller must hold the scheduler lock before calling this function. bool is_item_removed_locked_(SchedulerItem *item) const { #ifdef ESPHOME_THREAD_MULTI_ATOMICS - // Lock already held - relaxed is sufficient, mutex provides ordering - return item->remove.load(std::memory_order_relaxed); + // Lock already held - relaxed is sufficient, mutex provides ordering. + // Use GCC __atomic_load_n builtin instead of std::atomic::load() because + // GCC for Xtensa emits std::atomic::load() as an out-of-line + // libstdc++ call, adding function call overhead that exceeds the memw + // barrier savings this optimization aims to eliminate. + return __atomic_load_n(reinterpret_cast(&item->remove), __ATOMIC_RELAXED); #else return item->remove; #endif