[core] Use custom deleter for SchedulerItem unique_ptr to prevent destructor inlining

On BK7231N (Thumb-1/Cortex-M0), GCC inlines ~unique_ptr<SchedulerItem>
(~30 bytes: null check + ~std::function + operator delete) at every
destruction site, while ESP32/ESP8266/RTL8720CF outline it into a single
shared helper. This causes significant flash bloat in scheduler functions.

Use a custom deleter (SchedulerItemDeleter) with its operator() defined
in the .cpp file, ensuring the compiler emits exactly one copy of the
destruction code. All destruction sites now generate a simple function
call instead of inlining the full body.

BK7231N savings (bytes):
- call():           816 -> 670  (-146)
- process_to_add(): 390 -> 308  (-82)
- __adjust_heap:    430 -> 312  (-118)
- pop_raw_locked_(): 192 -> 140  (-52)
- cleanup_():       130 -> 112  (-18)
- SchedulerItemDeleter: +32 (new, single copy)
- Net: ~384 bytes saved

ESP32/ESP8266/RTL8720CF are unaffected (already outline the destructor).
This commit is contained in:
J. Nick Koston
2026-02-24 11:48:48 -06:00
parent 6554ad7c7e
commit e3bf2d78d1
2 changed files with 42 additions and 25 deletions

View File

@@ -33,6 +33,11 @@ static constexpr uint32_t HALF_MAX_UINT32 = std::numeric_limits<uint32_t>::max()
// max delay to start an interval sequence
static constexpr uint32_t MAX_INTERVAL_DELAY = 5000;
// Prevent inlining of SchedulerItem deletion. On BK7231N (Thumb-1), GCC inlines
// ~unique_ptr<SchedulerItem> (~30 bytes each) at every destruction site. Defining
// the deleter in the .cpp file ensures a single copy of the destructor + operator delete.
void Scheduler::SchedulerItemDeleter::operator()(SchedulerItem *ptr) const noexcept { delete ptr; }
#if defined(ESPHOME_LOG_HAS_VERBOSE) || defined(ESPHOME_DEBUG_SCHEDULER)
// Helper struct for formatting scheduler item names consistently in logs
// Uses a stack buffer to avoid heap allocation
@@ -467,7 +472,7 @@ void HOT Scheduler::call(uint32_t now) {
if (now_64 - last_print > 2000) {
last_print = now_64;
std::vector<std::unique_ptr<SchedulerItem>> old_items;
std::vector<SchedulerItemPtr> old_items;
#ifdef ESPHOME_THREAD_MULTI_ATOMICS
const auto last_dbg = this->last_millis_.load(std::memory_order_relaxed);
const auto major_dbg = this->millis_major_.load(std::memory_order_relaxed);
@@ -480,7 +485,7 @@ void HOT Scheduler::call(uint32_t now) {
// Cleanup before debug output
this->cleanup_();
while (!this->items_.empty()) {
std::unique_ptr<SchedulerItem> item;
SchedulerItemPtr item;
{
LockGuard guard{this->lock_};
item = this->pop_raw_locked_();
@@ -641,7 +646,7 @@ size_t HOT Scheduler::cleanup_() {
}
return this->items_.size();
}
std::unique_ptr<Scheduler::SchedulerItem> HOT Scheduler::pop_raw_locked_() {
Scheduler::SchedulerItemPtr HOT Scheduler::pop_raw_locked_() {
std::pop_heap(this->items_.begin(), this->items_.end(), SchedulerItem::cmp);
// Move the item out before popping - this is the item that was at the front of the heap
@@ -864,8 +869,7 @@ uint64_t Scheduler::millis_64_(uint32_t now) {
#endif
}
bool HOT Scheduler::SchedulerItem::cmp(const std::unique_ptr<SchedulerItem> &a,
const std::unique_ptr<SchedulerItem> &b) {
bool HOT Scheduler::SchedulerItem::cmp(const SchedulerItemPtr &a, const SchedulerItemPtr &b) {
// High bits are almost always equal (change only on 32-bit rollover ~49 days)
// Optimize for common case: check low bits first when high bits are equal
return (a->next_execution_high_ == b->next_execution_high_) ? (a->next_execution_low_ > b->next_execution_low_)
@@ -876,7 +880,7 @@ bool HOT Scheduler::SchedulerItem::cmp(const std::unique_ptr<SchedulerItem> &a,
// IMPORTANT: Caller must hold the scheduler lock before calling this function.
// This protects scheduler_item_pool_ from concurrent access by other threads
// that may be acquiring items from the pool in set_timer_common_().
void Scheduler::recycle_item_main_loop_(std::unique_ptr<SchedulerItem> item) {
void Scheduler::recycle_item_main_loop_(SchedulerItemPtr item) {
if (!item)
return;
@@ -919,8 +923,8 @@ void Scheduler::debug_log_timer_(const SchedulerItem *item, NameType name_type,
// Helper to get or create a scheduler item from the pool
// IMPORTANT: Caller must hold the scheduler lock before calling this function.
std::unique_ptr<Scheduler::SchedulerItem> Scheduler::get_item_from_pool_locked_() {
std::unique_ptr<SchedulerItem> item;
Scheduler::SchedulerItemPtr Scheduler::get_item_from_pool_locked_() {
SchedulerItemPtr item;
if (!this->scheduler_item_pool_.empty()) {
item = std::move(this->scheduler_item_pool_.back());
this->scheduler_item_pool_.pop_back();
@@ -928,7 +932,7 @@ std::unique_ptr<Scheduler::SchedulerItem> Scheduler::get_item_from_pool_locked_(
ESP_LOGD(TAG, "Reused item from pool (pool size now: %zu)", this->scheduler_item_pool_.size());
#endif
} else {
item = make_unique<SchedulerItem>();
item = SchedulerItemPtr(new SchedulerItem());
#ifdef ESPHOME_DEBUG_SCHEDULER
ESP_LOGD(TAG, "Allocated new item (pool empty)");
#endif

View File

@@ -142,6 +142,19 @@ class Scheduler {
};
protected:
struct SchedulerItem;
// Custom deleter for SchedulerItem unique_ptr that prevents the compiler from
// inlining the destructor at every destruction site. On BK7231N (Thumb-1), GCC
// inlines ~unique_ptr<SchedulerItem> (~30 bytes: null check + ~std::function +
// operator delete) at every destruction site, while ESP32/ESP8266/RTL8720CF outline
// it into a single helper. This noinline deleter ensures only one copy exists.
// operator() is defined in scheduler.cpp to prevent inlining.
struct SchedulerItemDeleter {
void operator()(SchedulerItem *ptr) const noexcept;
};
using SchedulerItemPtr = std::unique_ptr<SchedulerItem, SchedulerItemDeleter>;
struct SchedulerItem {
// Ordered by size to minimize padding
Component *component;
@@ -233,7 +246,7 @@ class Scheduler {
name_type_ = type;
}
static bool cmp(const std::unique_ptr<SchedulerItem> &a, const std::unique_ptr<SchedulerItem> &b);
static bool cmp(const SchedulerItemPtr &a, const SchedulerItemPtr &b);
// Note: We use 48 bits total (32 + 16), stored in a 64-bit value for API compatibility.
// The upper 16 bits of the 64-bit value are always zero, which is fine since
@@ -276,10 +289,10 @@ class Scheduler {
size_t cleanup_();
// Remove and return the front item from the heap
// IMPORTANT: Caller must hold the scheduler lock before calling this function.
std::unique_ptr<SchedulerItem> pop_raw_locked_();
SchedulerItemPtr pop_raw_locked_();
// Get or create a scheduler item from the pool
// IMPORTANT: Caller must hold the scheduler lock before calling this function.
std::unique_ptr<SchedulerItem> get_item_from_pool_locked_();
SchedulerItemPtr get_item_from_pool_locked_();
private:
// Helper to cancel items - must be called with lock held
@@ -303,9 +316,9 @@ class Scheduler {
// Helper function to check if item matches criteria for cancellation
// name_type determines matching: STATIC_STRING uses static_name, others use hash_or_id
// IMPORTANT: Must be called with scheduler lock held
inline bool HOT matches_item_locked_(const std::unique_ptr<SchedulerItem> &item, Component *component,
NameType name_type, const char *static_name, uint32_t hash_or_id,
SchedulerItem::Type type, bool match_retry, bool skip_removed = true) const {
inline bool HOT matches_item_locked_(const SchedulerItemPtr &item, Component *component, NameType name_type,
const char *static_name, uint32_t hash_or_id, SchedulerItem::Type type,
bool match_retry, bool skip_removed = true) const {
// THREAD SAFETY: Check for nullptr first to prevent LoadProhibited crashes. On multi-threaded
// platforms, items can be moved out of defer_queue_ during processing, leaving nullptr entries.
// PR #11305 added nullptr checks in callers (mark_matching_items_removed_locked_()), but this check
@@ -340,7 +353,7 @@ class Scheduler {
// IMPORTANT: Only call from main loop context! Recycling clears the callback,
// so calling from another thread while the callback is executing causes use-after-free.
// IMPORTANT: Caller must hold the scheduler lock before calling this function.
void recycle_item_main_loop_(std::unique_ptr<SchedulerItem> item);
void recycle_item_main_loop_(SchedulerItemPtr item);
// Helper to perform full cleanup when too many items are cancelled
void full_cleanup_removed_items_();
@@ -396,7 +409,7 @@ class Scheduler {
// Merge lock acquisitions: instead of separate locks for move-out and recycle (2N+1 total),
// recycle each item after re-acquiring the lock for the next iteration (N+1 total).
// The lock is held across: recycle → loop condition → move-out, then released for execution.
std::unique_ptr<SchedulerItem> item;
SchedulerItemPtr item;
this->lock_.lock();
while (this->defer_queue_front_ < defer_queue_end) {
@@ -497,8 +510,8 @@ class Scheduler {
// Returns the number of items marked for removal
// IMPORTANT: Must be called with scheduler lock held
__attribute__((noinline)) size_t mark_matching_items_removed_locked_(
std::vector<std::unique_ptr<SchedulerItem>> &container, Component *component, NameType name_type,
const char *static_name, uint32_t hash_or_id, SchedulerItem::Type type, bool match_retry) {
std::vector<SchedulerItemPtr> &container, Component *component, NameType name_type, const char *static_name,
uint32_t hash_or_id, SchedulerItem::Type type, bool match_retry) {
size_t count = 0;
for (auto &item : container) {
// Skip nullptr items (can happen in defer_queue_ when items are being processed)
@@ -514,15 +527,15 @@ class Scheduler {
}
Mutex lock_;
std::vector<std::unique_ptr<SchedulerItem>> items_;
std::vector<std::unique_ptr<SchedulerItem>> to_add_;
std::vector<SchedulerItemPtr> items_;
std::vector<SchedulerItemPtr> to_add_;
#ifndef ESPHOME_THREAD_SINGLE
// Single-core platforms don't need the defer queue and save ~32 bytes of RAM
// Using std::vector instead of std::deque avoids 512-byte chunked allocations
// Index tracking avoids O(n) erase() calls when draining the queue each loop
std::vector<std::unique_ptr<SchedulerItem>> defer_queue_; // FIFO queue for defer() calls
size_t defer_queue_front_{0}; // Index of first valid item in defer_queue_ (tracks consumed items)
#endif /* ESPHOME_THREAD_SINGLE */
std::vector<SchedulerItemPtr> defer_queue_; // FIFO queue for defer() calls
size_t defer_queue_front_{0}; // Index of first valid item in defer_queue_ (tracks consumed items)
#endif /* ESPHOME_THREAD_SINGLE */
uint32_t to_remove_{0};
// Memory pool for recycling SchedulerItem objects to reduce heap churn.
@@ -533,7 +546,7 @@ class Scheduler {
// - The pool significantly reduces heap fragmentation which is critical because heap allocation/deallocation
// can stall the entire system, causing timing issues and dropped events for any components that need
// to synchronize between tasks (see https://github.com/esphome/backlog/issues/52)
std::vector<std::unique_ptr<SchedulerItem>> scheduler_item_pool_;
std::vector<SchedulerItemPtr> scheduler_item_pool_;
#ifdef ESPHOME_THREAD_MULTI_ATOMICS
/*