[core] Use custom deleter for SchedulerItem unique_ptr to prevent destructor inlining

On BK7231N (Thumb-1/Cortex-M0), GCC inlines ~unique_ptr<SchedulerItem> (~30 bytes: null check + ~std::function + operator delete) at every destruction site, while ESP32/ESP8266/RTL8720CF outline it into a single shared helper. This causes significant flash bloat in scheduler functions. Use a custom deleter (SchedulerItemDeleter) with its operator() defined in the .cpp file, ensuring the compiler emits exactly one copy of the destruction code. All destruction sites now generate a simple function call instead of inlining the full body. BK7231N savings (bytes): - call(): 816 -> 670 (-146) - process_to_add(): 390 -> 308 (-82) - __adjust_heap: 430 -> 312 (-118) - pop_raw_locked_(): 192 -> 140 (-52) - cleanup_(): 130 -> 112 (-18) - SchedulerItemDeleter: +32 (new, single copy) - Net: ~384 bytes saved ESP32/ESP8266/RTL8720CF are unaffected (already outline the destructor).
2026-02-24 20:35:30 -07:00 · 2026-02-24 11:48:48 -06:00
parent 6554ad7c7e
commit e3bf2d78d1
2 changed files with 42 additions and 25 deletions
--- a/esphome/core/scheduler.cpp
+++ b/esphome/core/scheduler.cpp
@@ -33,6 +33,11 @@ static constexpr uint32_t HALF_MAX_UINT32 = std::numeric_limits<uint32_t>::max()
 // max delay to start an interval sequence
 static constexpr uint32_t MAX_INTERVAL_DELAY = 5000;

+// Prevent inlining of SchedulerItem deletion. On BK7231N (Thumb-1), GCC inlines
+// ~unique_ptr<SchedulerItem> (~30 bytes each) at every destruction site. Defining
+// the deleter in the .cpp file ensures a single copy of the destructor + operator delete.
+void Scheduler::SchedulerItemDeleter::operator()(SchedulerItem *ptr) const noexcept { delete ptr; }
+
 #if defined(ESPHOME_LOG_HAS_VERBOSE) || defined(ESPHOME_DEBUG_SCHEDULER)
 // Helper struct for formatting scheduler item names consistently in logs
 // Uses a stack buffer to avoid heap allocation
@@ -467,7 +472,7 @@ void HOT Scheduler::call(uint32_t now) {

  if (now_64 - last_print > 2000) {
    last_print = now_64;
-    std::vector<std::unique_ptr<SchedulerItem>> old_items;
+    std::vector<SchedulerItemPtr> old_items;
 #ifdef ESPHOME_THREAD_MULTI_ATOMICS
    const auto last_dbg = this->last_millis_.load(std::memory_order_relaxed);
    const auto major_dbg = this->millis_major_.load(std::memory_order_relaxed);
@@ -480,7 +485,7 @@ void HOT Scheduler::call(uint32_t now) {
    // Cleanup before debug output
    this->cleanup_();
    while (!this->items_.empty()) {
-      std::unique_ptr<SchedulerItem> item;
+      SchedulerItemPtr item;
      {
        LockGuard guard{this->lock_};
        item = this->pop_raw_locked_();
@@ -641,7 +646,7 @@ size_t HOT Scheduler::cleanup_() {
  }
  return this->items_.size();
 }
-std::unique_ptr<Scheduler::SchedulerItem> HOT Scheduler::pop_raw_locked_() {
+Scheduler::SchedulerItemPtr HOT Scheduler::pop_raw_locked_() {
  std::pop_heap(this->items_.begin(), this->items_.end(), SchedulerItem::cmp);

  // Move the item out before popping - this is the item that was at the front of the heap
@@ -864,8 +869,7 @@ uint64_t Scheduler::millis_64_(uint32_t now) {
 #endif
 }

-bool HOT Scheduler::SchedulerItem::cmp(const std::unique_ptr<SchedulerItem> &a,
-                                       const std::unique_ptr<SchedulerItem> &b) {
+bool HOT Scheduler::SchedulerItem::cmp(const SchedulerItemPtr &a, const SchedulerItemPtr &b) {
  // High bits are almost always equal (change only on 32-bit rollover ~49 days)
  // Optimize for common case: check low bits first when high bits are equal
  return (a->next_execution_high_ == b->next_execution_high_) ? (a->next_execution_low_ > b->next_execution_low_)
@@ -876,7 +880,7 @@ bool HOT Scheduler::SchedulerItem::cmp(const std::unique_ptr<SchedulerItem> &a,
 // IMPORTANT: Caller must hold the scheduler lock before calling this function.
 // This protects scheduler_item_pool_ from concurrent access by other threads
 // that may be acquiring items from the pool in set_timer_common_().
-void Scheduler::recycle_item_main_loop_(std::unique_ptr<SchedulerItem> item) {
+void Scheduler::recycle_item_main_loop_(SchedulerItemPtr item) {
  if (!item)
    return;

@@ -919,8 +923,8 @@ void Scheduler::debug_log_timer_(const SchedulerItem *item, NameType name_type,

 // Helper to get or create a scheduler item from the pool
 // IMPORTANT: Caller must hold the scheduler lock before calling this function.
-std::unique_ptr<Scheduler::SchedulerItem> Scheduler::get_item_from_pool_locked_() {
-  std::unique_ptr<SchedulerItem> item;
+Scheduler::SchedulerItemPtr Scheduler::get_item_from_pool_locked_() {
+  SchedulerItemPtr item;
  if (!this->scheduler_item_pool_.empty()) {
    item = std::move(this->scheduler_item_pool_.back());
    this->scheduler_item_pool_.pop_back();
@@ -928,7 +932,7 @@ std::unique_ptr<Scheduler::SchedulerItem> Scheduler::get_item_from_pool_locked_(
    ESP_LOGD(TAG, "Reused item from pool (pool size now: %zu)", this->scheduler_item_pool_.size());
 #endif
  } else {
-    item = make_unique<SchedulerItem>();
+    item = SchedulerItemPtr(new SchedulerItem());
 #ifdef ESPHOME_DEBUG_SCHEDULER
    ESP_LOGD(TAG, "Allocated new item (pool empty)");
 #endif
--- a/esphome/core/scheduler.h
+++ b/esphome/core/scheduler.h
@@ -142,6 +142,19 @@ class Scheduler {
  };

 protected:
+  struct SchedulerItem;
+
+  // Custom deleter for SchedulerItem unique_ptr that prevents the compiler from
+  // inlining the destructor at every destruction site. On BK7231N (Thumb-1), GCC
+  // inlines ~unique_ptr<SchedulerItem> (~30 bytes: null check + ~std::function +
+  // operator delete) at every destruction site, while ESP32/ESP8266/RTL8720CF outline
+  // it into a single helper. This noinline deleter ensures only one copy exists.
+  // operator() is defined in scheduler.cpp to prevent inlining.
+  struct SchedulerItemDeleter {
+    void operator()(SchedulerItem *ptr) const noexcept;
+  };
+  using SchedulerItemPtr = std::unique_ptr<SchedulerItem, SchedulerItemDeleter>;
+
  struct SchedulerItem {
    // Ordered by size to minimize padding
    Component *component;
@@ -233,7 +246,7 @@ class Scheduler {
      name_type_ = type;
    }

-    static bool cmp(const std::unique_ptr<SchedulerItem> &a, const std::unique_ptr<SchedulerItem> &b);
+    static bool cmp(const SchedulerItemPtr &a, const SchedulerItemPtr &b);

    // Note: We use 48 bits total (32 + 16), stored in a 64-bit value for API compatibility.
    // The upper 16 bits of the 64-bit value are always zero, which is fine since
@@ -276,10 +289,10 @@ class Scheduler {
  size_t cleanup_();
  // Remove and return the front item from the heap
  // IMPORTANT: Caller must hold the scheduler lock before calling this function.
-  std::unique_ptr<SchedulerItem> pop_raw_locked_();
+  SchedulerItemPtr pop_raw_locked_();
  // Get or create a scheduler item from the pool
  // IMPORTANT: Caller must hold the scheduler lock before calling this function.
-  std::unique_ptr<SchedulerItem> get_item_from_pool_locked_();
+  SchedulerItemPtr get_item_from_pool_locked_();

 private:
  // Helper to cancel items - must be called with lock held
@@ -303,9 +316,9 @@ class Scheduler {
  // Helper function to check if item matches criteria for cancellation
  // name_type determines matching: STATIC_STRING uses static_name, others use hash_or_id
  // IMPORTANT: Must be called with scheduler lock held
-  inline bool HOT matches_item_locked_(const std::unique_ptr<SchedulerItem> &item, Component *component,
-                                       NameType name_type, const char *static_name, uint32_t hash_or_id,
-                                       SchedulerItem::Type type, bool match_retry, bool skip_removed = true) const {
+  inline bool HOT matches_item_locked_(const SchedulerItemPtr &item, Component *component, NameType name_type,
+                                       const char *static_name, uint32_t hash_or_id, SchedulerItem::Type type,
+                                       bool match_retry, bool skip_removed = true) const {
    // THREAD SAFETY: Check for nullptr first to prevent LoadProhibited crashes. On multi-threaded
    // platforms, items can be moved out of defer_queue_ during processing, leaving nullptr entries.
    // PR #11305 added nullptr checks in callers (mark_matching_items_removed_locked_()), but this check
@@ -340,7 +353,7 @@ class Scheduler {
  // IMPORTANT: Only call from main loop context! Recycling clears the callback,
  // so calling from another thread while the callback is executing causes use-after-free.
  // IMPORTANT: Caller must hold the scheduler lock before calling this function.
-  void recycle_item_main_loop_(std::unique_ptr<SchedulerItem> item);
+  void recycle_item_main_loop_(SchedulerItemPtr item);

  // Helper to perform full cleanup when too many items are cancelled
  void full_cleanup_removed_items_();
@@ -396,7 +409,7 @@ class Scheduler {
    // Merge lock acquisitions: instead of separate locks for move-out and recycle (2N+1 total),
    // recycle each item after re-acquiring the lock for the next iteration (N+1 total).
    // The lock is held across: recycle → loop condition → move-out, then released for execution.
-    std::unique_ptr<SchedulerItem> item;
+    SchedulerItemPtr item;

    this->lock_.lock();
    while (this->defer_queue_front_ < defer_queue_end) {
@@ -497,8 +510,8 @@ class Scheduler {
  // Returns the number of items marked for removal
  // IMPORTANT: Must be called with scheduler lock held
  __attribute__((noinline)) size_t mark_matching_items_removed_locked_(
-      std::vector<std::unique_ptr<SchedulerItem>> &container, Component *component, NameType name_type,
-      const char *static_name, uint32_t hash_or_id, SchedulerItem::Type type, bool match_retry) {
+      std::vector<SchedulerItemPtr> &container, Component *component, NameType name_type, const char *static_name,
+      uint32_t hash_or_id, SchedulerItem::Type type, bool match_retry) {
    size_t count = 0;
    for (auto &item : container) {
      // Skip nullptr items (can happen in defer_queue_ when items are being processed)
@@ -514,15 +527,15 @@ class Scheduler {
  }

  Mutex lock_;
-  std::vector<std::unique_ptr<SchedulerItem>> items_;
-  std::vector<std::unique_ptr<SchedulerItem>> to_add_;
+  std::vector<SchedulerItemPtr> items_;
+  std::vector<SchedulerItemPtr> to_add_;
 #ifndef ESPHOME_THREAD_SINGLE
  // Single-core platforms don't need the defer queue and save ~32 bytes of RAM
  // Using std::vector instead of std::deque avoids 512-byte chunked allocations
  // Index tracking avoids O(n) erase() calls when draining the queue each loop
-  std::vector<std::unique_ptr<SchedulerItem>> defer_queue_;  // FIFO queue for defer() calls
-  size_t defer_queue_front_{0};  // Index of first valid item in defer_queue_ (tracks consumed items)
-#endif                           /* ESPHOME_THREAD_SINGLE */
+  std::vector<SchedulerItemPtr> defer_queue_;  // FIFO queue for defer() calls
+  size_t defer_queue_front_{0};                // Index of first valid item in defer_queue_ (tracks consumed items)
+#endif                                         /* ESPHOME_THREAD_SINGLE */
  uint32_t to_remove_{0};

  // Memory pool for recycling SchedulerItem objects to reduce heap churn.
@@ -533,7 +546,7 @@ class Scheduler {
  // - The pool significantly reduces heap fragmentation which is critical because heap allocation/deallocation
  //   can stall the entire system, causing timing issues and dropped events for any components that need
  //   to synchronize between tasks (see https://github.com/esphome/backlog/issues/52)
-  std::vector<std::unique_ptr<SchedulerItem>> scheduler_item_pool_;
+  std::vector<SchedulerItemPtr> scheduler_item_pool_;

 #ifdef ESPHOME_THREAD_MULTI_ATOMICS
  /*