From 25b979bbede2d88e805a1dc682f17fc16bd4fb6d Mon Sep 17 00:00:00 2001
From: "J. Nick Koston" <nick@home-assistant.io>
Date: Thu, 19 Feb 2026 23:10:39 -0600
Subject: [PATCH 1/2] [scheduler] Use relaxed memory ordering for atomic reads
 under lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the scheduler lock is already held, atomic loads of the remove
flag don't need acquire ordering — the mutex provides all necessary
synchronization guarantees. Add is_item_removed_locked_() helper that
uses memory_order_relaxed and use it in all _locked_ call sites.

This eliminates redundant memw barriers on Xtensa, saving 12 bytes
in matches_item_locked_ (152 → 140 B) and reducing pipeline stalls
in the scheduler hot path.
---
 esphome/core/scheduler.cpp |  6 +++---
 esphome/core/scheduler.h   | 18 +++++++++++++++---
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/esphome/core/scheduler.cpp b/esphome/core/scheduler.cpp
index 3294f689e8..3ff8fa4475 100644
--- a/esphome/core/scheduler.cpp
+++ b/esphome/core/scheduler.cpp
@@ -406,7 +406,7 @@ void Scheduler::full_cleanup_removed_items_() {
   // Compact in-place: move valid items forward, recycle removed ones
   size_t write = 0;
   for (size_t read = 0; read < this->items_.size(); ++read) {
-    if (!is_item_removed_(this->items_[read].get())) {
+    if (!is_item_removed_locked_(this->items_[read].get())) {
       if (write != read) {
         this->items_[write] = std::move(this->items_[read]);
       }
@@ -508,7 +508,7 @@ void HOT Scheduler::call(uint32_t now) {
     // Multi-threaded platforms without atomics: must take lock to safely read remove flag
     {
       LockGuard guard{this->lock_};
-      if (is_item_removed_(item.get())) {
+      if (is_item_removed_locked_(item.get())) {
         this->recycle_item_main_loop_(this->pop_raw_locked_());
         this->to_remove_--;
         continue;
@@ -572,7 +572,7 @@ void HOT Scheduler::call(uint32_t now) {
 void HOT Scheduler::process_to_add() {
   LockGuard guard{this->lock_};
   for (auto &it : this->to_add_) {
-    if (is_item_removed_(it.get())) {
+    if (is_item_removed_locked_(it.get())) {
       // Recycle cancelled items
       this->recycle_item_main_loop_(std::move(it));
       continue;
diff --git a/esphome/core/scheduler.h b/esphome/core/scheduler.h
index 394178a831..cf9a2a3660 100644
--- a/esphome/core/scheduler.h
+++ b/esphome/core/scheduler.h
@@ -311,8 +311,8 @@ class Scheduler {
     // Fixes: https://github.com/esphome/esphome/issues/11940
     if (!item)
       return false;
-    if (item->component != component || item->type != type || (skip_removed && item->remove) ||
-        (match_retry && !item->is_retry)) {
+    if (item->component != component || item->type != type ||
+        (skip_removed && this->is_item_removed_locked_(item.get())) || (match_retry && !item->is_retry)) {
       return false;
     }
     // Name type must match
@@ -465,6 +465,18 @@ class Scheduler {
 #endif
   }
 
+  // Helper to check if item is marked for removal when lock is already held.
+  // Uses relaxed ordering since the mutex provides all necessary synchronization.
+  // IMPORTANT: Caller must hold the scheduler lock before calling this function.
+  bool is_item_removed_locked_(SchedulerItem *item) const {
+#ifdef ESPHOME_THREAD_MULTI_ATOMICS
+    // Lock already held - relaxed is sufficient, mutex provides ordering
+    return item->remove.load(std::memory_order_relaxed);
+#else
+    return item->remove;
+#endif
+  }
+
   // Helper to set item removal flag (platform-specific)
   // For ESPHOME_THREAD_MULTI_NO_ATOMICS platforms, the caller must hold the scheduler lock before calling this
   // function. Uses memory_order_release when setting to true (for cancellation synchronization),
@@ -521,7 +533,7 @@ class Scheduler {
       // it will iterate over these nullptr items. This check prevents crashes.
       if (!item)
         continue;
-      if (is_item_removed_(item.get()) &&
+      if (this->is_item_removed_locked_(item.get()) &&
           this->matches_item_locked_(item, component, name_type, static_name, hash_or_id, SchedulerItem::TIMEOUT,
                                      match_retry, /* skip_removed= */ false)) {
         return true;

From 7568b64a4a776643fbed46eda9173050743acd38 Mon Sep 17 00:00:00 2001
From: "J. Nick Koston" <nick@home-assistant.io>
Date: Thu, 19 Feb 2026 23:20:28 -0600
Subject: [PATCH 2/2] Fix remaining direct atomic reads under lock

Replace two more direct item->remove accesses (implicit seq_cst) with
is_item_removed_locked_() in call() and cleanup_(), both under lock.
---
 esphome/core/scheduler.cpp | 4 ++--
 esphome/core/scheduler.h   | 8 ++++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/esphome/core/scheduler.cpp b/esphome/core/scheduler.cpp
index 3ff8fa4475..c783a788cc 100644
--- a/esphome/core/scheduler.cpp
+++ b/esphome/core/scheduler.cpp
@@ -545,7 +545,7 @@ void HOT Scheduler::call(uint32_t now) {
     // during the function call and know if we were cancelled.
     auto executed_item = this->pop_raw_locked_();
 
-    if (executed_item->remove) {
+    if (this->is_item_removed_locked_(executed_item.get())) {
       // We were removed/cancelled in the function call, recycle and continue
       this->to_remove_--;
       this->recycle_item_main_loop_(std::move(executed_item));
@@ -605,7 +605,7 @@ size_t HOT Scheduler::cleanup_() {
   LockGuard guard{this->lock_};
   while (!this->items_.empty()) {
     auto &item = this->items_[0];
-    if (!item->remove)
+    if (!this->is_item_removed_locked_(item.get()))
       break;
     this->to_remove_--;
     this->recycle_item_main_loop_(this->pop_raw_locked_());
diff --git a/esphome/core/scheduler.h b/esphome/core/scheduler.h
index cf9a2a3660..020d822a0a 100644
--- a/esphome/core/scheduler.h
+++ b/esphome/core/scheduler.h
@@ -470,8 +470,12 @@ class Scheduler {
   // IMPORTANT: Caller must hold the scheduler lock before calling this function.
   bool is_item_removed_locked_(SchedulerItem *item) const {
 #ifdef ESPHOME_THREAD_MULTI_ATOMICS
-    // Lock already held - relaxed is sufficient, mutex provides ordering
-    return item->remove.load(std::memory_order_relaxed);
+    // Lock already held - relaxed is sufficient, mutex provides ordering.
+    // Use GCC __atomic_load_n builtin instead of std::atomic::load() because
+    // GCC for Xtensa emits std::atomic<bool>::load() as an out-of-line
+    // libstdc++ call, adding function call overhead that exceeds the memw
+    // barrier savings this optimization aims to eliminate.
+    return __atomic_load_n(reinterpret_cast<const bool *>(&item->remove), __ATOMIC_RELAXED);
 #else
     return item->remove;
 #endif