Merge branch 'wifi-memcpy-sort' into integration

This commit is contained in:
J. Nick Koston
2026-02-12 12:33:55 -06:00
4 changed files with 96 additions and 46 deletions

View File

@@ -38,8 +38,7 @@ void PulseMeterSensor::setup() {
}
void PulseMeterSensor::loop() {
// Reset the count in get before we pass it back to the ISR as set
this->get_->count_ = 0;
State state;
{
// Lock the interrupt so the interrupt code doesn't interfere with itself
@@ -58,31 +57,35 @@ void PulseMeterSensor::loop() {
}
this->last_pin_val_ = current;
// Swap out set and get to get the latest state from the ISR
std::swap(this->set_, this->get_);
// Get the latest state from the ISR and reset the count in the ISR
state.last_detected_edge_us_ = this->state_.last_detected_edge_us_;
state.last_rising_edge_us_ = this->state_.last_rising_edge_us_;
state.count_ = this->state_.count_;
this->state_.count_ = 0;
}
const uint32_t now = micros();
// If an edge was peeked, repay the debt
if (this->peeked_edge_ && this->get_->count_ > 0) {
if (this->peeked_edge_ && state.count_ > 0) {
this->peeked_edge_ = false;
this->get_->count_--; // NOLINT(clang-diagnostic-deprecated-volatile)
state.count_--;
}
// If there is an unprocessed edge, and filter_us_ has passed since, count this edge early
if (this->get_->last_rising_edge_us_ != this->get_->last_detected_edge_us_ &&
now - this->get_->last_rising_edge_us_ >= this->filter_us_) {
// If there is an unprocessed edge, and filter_us_ has passed since, count this edge early.
// Wait for the debt to be repaid before counting another unprocessed edge early.
if (!this->peeked_edge_ && state.last_rising_edge_us_ != state.last_detected_edge_us_ &&
now - state.last_rising_edge_us_ >= this->filter_us_) {
this->peeked_edge_ = true;
this->get_->last_detected_edge_us_ = this->get_->last_rising_edge_us_;
this->get_->count_++; // NOLINT(clang-diagnostic-deprecated-volatile)
state.last_detected_edge_us_ = state.last_rising_edge_us_;
state.count_++;
}
// Check if we detected a pulse this loop
if (this->get_->count_ > 0) {
if (state.count_ > 0) {
// Keep a running total of pulses if a total sensor is configured
if (this->total_sensor_ != nullptr) {
this->total_pulses_ += this->get_->count_;
this->total_pulses_ += state.count_;
const uint32_t total = this->total_pulses_;
this->total_sensor_->publish_state(total);
}
@@ -94,15 +97,15 @@ void PulseMeterSensor::loop() {
this->meter_state_ = MeterState::RUNNING;
} break;
case MeterState::RUNNING: {
uint32_t delta_us = this->get_->last_detected_edge_us_ - this->last_processed_edge_us_;
float pulse_width_us = delta_us / float(this->get_->count_);
ESP_LOGV(TAG, "New pulse, delta: %" PRIu32 " µs, count: %" PRIu32 ", width: %.5f µs", delta_us,
this->get_->count_, pulse_width_us);
uint32_t delta_us = state.last_detected_edge_us_ - this->last_processed_edge_us_;
float pulse_width_us = delta_us / float(state.count_);
ESP_LOGV(TAG, "New pulse, delta: %" PRIu32 " µs, count: %" PRIu32 ", width: %.5f µs", delta_us, state.count_,
pulse_width_us);
this->publish_state((60.0f * 1000000.0f) / pulse_width_us);
} break;
}
this->last_processed_edge_us_ = this->get_->last_detected_edge_us_;
this->last_processed_edge_us_ = state.last_detected_edge_us_;
}
// No detected edges this loop
else {
@@ -141,14 +144,14 @@ void IRAM_ATTR PulseMeterSensor::edge_intr(PulseMeterSensor *sensor) {
// This is an interrupt handler - we can't call any virtual method from this method
// Get the current time before we do anything else so the measurements are consistent
const uint32_t now = micros();
auto &state = sensor->edge_state_;
auto &set = *sensor->set_;
auto &edge_state = sensor->edge_state_;
auto &state = sensor->state_;
if ((now - state.last_sent_edge_us_) >= sensor->filter_us_) {
state.last_sent_edge_us_ = now;
set.last_detected_edge_us_ = now;
set.last_rising_edge_us_ = now;
set.count_++; // NOLINT(clang-diagnostic-deprecated-volatile)
if ((now - edge_state.last_sent_edge_us_) >= sensor->filter_us_) {
edge_state.last_sent_edge_us_ = now;
state.last_detected_edge_us_ = now;
state.last_rising_edge_us_ = now;
state.count_++; // NOLINT(clang-diagnostic-deprecated-volatile)
}
// This ISR is bound to rising edges, so the pin is high
@@ -160,26 +163,26 @@ void IRAM_ATTR PulseMeterSensor::pulse_intr(PulseMeterSensor *sensor) {
// Get the current time before we do anything else so the measurements are consistent
const uint32_t now = micros();
const bool pin_val = sensor->isr_pin_.digital_read();
auto &state = sensor->pulse_state_;
auto &set = *sensor->set_;
auto &pulse_state = sensor->pulse_state_;
auto &state = sensor->state_;
// Filter length has passed since the last interrupt
const bool length = now - state.last_intr_ >= sensor->filter_us_;
const bool length = now - pulse_state.last_intr_ >= sensor->filter_us_;
if (length && state.latched_ && !sensor->last_pin_val_) { // Long enough low edge
state.latched_ = false;
} else if (length && !state.latched_ && sensor->last_pin_val_) { // Long enough high edge
state.latched_ = true;
set.last_detected_edge_us_ = state.last_intr_;
set.count_++; // NOLINT(clang-diagnostic-deprecated-volatile)
if (length && pulse_state.latched_ && !sensor->last_pin_val_) { // Long enough low edge
pulse_state.latched_ = false;
} else if (length && !pulse_state.latched_ && sensor->last_pin_val_) { // Long enough high edge
pulse_state.latched_ = true;
state.last_detected_edge_us_ = pulse_state.last_intr_;
state.count_++; // NOLINT(clang-diagnostic-deprecated-volatile)
}
// Due to order of operations this includes
// length && latched && rising (just reset from a long low edge)
// !latched && (rising || high) (noise on the line resetting the potential rising edge)
set.last_rising_edge_us_ = !state.latched_ && pin_val ? now : set.last_detected_edge_us_;
state.last_rising_edge_us_ = !pulse_state.latched_ && pin_val ? now : state.last_detected_edge_us_;
state.last_intr_ = now;
pulse_state.last_intr_ = now;
sensor->last_pin_val_ = pin_val;
}

View File

@@ -46,17 +46,16 @@ class PulseMeterSensor : public sensor::Sensor, public Component {
uint32_t total_pulses_ = 0;
uint32_t last_processed_edge_us_ = 0;
// This struct (and the two pointers) are used to pass data between the ISR and loop.
// These two pointers are exchanged each loop.
// Use these to send data from the ISR to the loop not the other way around (except for resetting the values).
// This struct and variable are used to pass data between the ISR and loop.
// The data from state_ is read and then count_ in state_ is reset in each loop.
// This must be done while guarded by an InterruptLock. Use this variable to send data
// from the ISR to the loop not the other way around (except for resetting count_).
struct State {
uint32_t last_detected_edge_us_ = 0;
uint32_t last_rising_edge_us_ = 0;
uint32_t count_ = 0;
};
State state_[2];
volatile State *set_ = state_;
volatile State *get_ = state_ + 1;
volatile State state_{};
// Only use the following variables in the ISR or while guarded by an InterruptLock
ISRInternalGPIOPin isr_pin_;

View File

@@ -3,6 +3,7 @@
#include <cassert>
#include <cinttypes>
#include <cmath>
#include <type_traits>
#ifdef USE_ESP32
#if (ESP_IDF_VERSION_MAJOR >= 5 && ESP_IDF_VERSION_MINOR >= 1)
@@ -1319,20 +1320,58 @@ void WiFiComponent::start_scanning() {
// Using insertion sort instead of std::stable_sort saves flash memory
// by avoiding template instantiations (std::rotate, std::stable_sort, lambdas)
// IMPORTANT: This sort is stable (preserves relative order of equal elements)
//
// Uses raw memcpy instead of copy assignment to avoid CompactString's
// destructor/constructor overhead (heap delete[]/new[] for long SSIDs).
// Copy assignment calls ~CompactString() then placement-new for every shift,
// which means delete[]/new[] per shift for heap-allocated SSIDs. With 70+
// networks (e.g., captive portal showing full scan results), this caused
// event loop blocking from hundreds of heap operations in a tight loop.
//
// This is safe because we're permuting elements within the same array —
// each slot is overwritten exactly once, so no ownership duplication occurs.
// All members of WiFiScanResult are either trivially copyable (bssid, channel,
// rssi, priority, flags) or CompactString, which stores either inline data or
// a heap pointer — never a self-referential pointer (unlike std::string's SSO
// on some implementations). This was not possible before PR#13472 replaced
// std::string with CompactString, since std::string's internal layout is
// implementation-defined and may use self-referential pointers.
template<typename VectorType> static void insertion_sort_scan_results(VectorType &results) {
// memcpy-based sort requires no self-referential pointers or virtual dispatch.
// These static_asserts guard the assumptions. If any fire, the memcpy sort
// must be reviewed for safety before updating the expected values.
//
// No vtable pointers (memcpy would corrupt vptr)
static_assert(!std::is_polymorphic<WiFiScanResult>::value, "WiFiScanResult must not have vtable");
static_assert(!std::is_polymorphic<CompactString>::value, "CompactString must not have vtable");
// Standard layout ensures predictable memory layout with no virtual bases
// and no mixed-access-specifier reordering
static_assert(std::is_standard_layout<WiFiScanResult>::value, "WiFiScanResult must be standard layout");
static_assert(std::is_standard_layout<CompactString>::value, "CompactString must be standard layout");
// Size checks catch added/removed fields that may need safety review
static_assert(sizeof(WiFiScanResult) == 32, "WiFiScanResult size changed - verify memcpy sort is still safe");
static_assert(sizeof(CompactString) == 20, "CompactString size changed - verify memcpy sort is still safe");
// Alignment must match for reinterpret_cast of key_buf to be valid
static_assert(alignof(WiFiScanResult) <= alignof(std::max_align_t), "WiFiScanResult alignment exceeds max_align_t");
const size_t size = results.size();
constexpr size_t elem_size = sizeof(WiFiScanResult);
// Suppress warnings for intentional memcpy on non-trivially-copyable type.
// Safety is guaranteed by the static_asserts above and the permutation invariant.
// NOLINTNEXTLINE(bugprone-undefined-memory-manipulation)
auto *memcpy_fn = &memcpy;
for (size_t i = 1; i < size; i++) {
// Make a copy to avoid issues with move semantics during comparison
WiFiScanResult key = results[i];
alignas(WiFiScanResult) uint8_t key_buf[elem_size];
memcpy_fn(key_buf, &results[i], elem_size);
const auto &key = *reinterpret_cast<const WiFiScanResult *>(key_buf);
int32_t j = i - 1;
// Move elements that are worse than key to the right
// For stability, we only move if key is strictly better than results[j]
while (j >= 0 && wifi_scan_result_is_better(key, results[j])) {
results[j + 1] = results[j];
memcpy_fn(&results[j + 1], &results[j], elem_size);
j--;
}
results[j + 1] = key;
memcpy_fn(&results[j + 1], key_buf, elem_size);
}
}

View File

@@ -10,6 +10,7 @@
#include <span>
#include <string>
#include <type_traits>
#include <vector>
#ifdef USE_LIBRETINY
@@ -219,6 +220,14 @@ class CompactString {
};
static_assert(sizeof(CompactString) == 20, "CompactString must be exactly 20 bytes");
// CompactString is not trivially copyable (non-trivial destructor/copy for heap case).
// However, its layout has no self-referential pointers: storage_[] contains either inline
// data or an external heap pointer — never a pointer to itself. This is unlike libstdc++
// std::string SSO where _M_p points to _M_local_buf within the same object.
// This property allows memcpy-based permutation sorting where each element ends up in
// exactly one slot (no ownership duplication). These asserts document that layout property.
static_assert(std::is_standard_layout<CompactString>::value, "CompactString must be standard layout");
static_assert(!std::is_polymorphic<CompactString>::value, "CompactString must not have vtable");
class WiFiAP {
friend class WiFiComponent;