mirror of
https://github.com/esphome/esphome.git
synced 2026-02-18 15:35:59 -07:00
[api] Limit Nagle batching for log messages to reduce LWIP buffer pressure (#13439)
This commit is contained in:
committed by
Jonathan Swoboda
parent
3c3d5c2fca
commit
95eebcd74f
@@ -1844,23 +1844,8 @@ bool APIConnection::send_buffer(ProtoWriteBuffer buffer, uint8_t message_type) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Toggle Nagle's algorithm based on message type to prevent log messages from
|
// Set TCP_NODELAY based on message type - see set_nodelay_for_message() for details
|
||||||
// filling the TCP send buffer and crowding out important state updates.
|
this->helper_->set_nodelay_for_message(is_log_message);
|
||||||
//
|
|
||||||
// This honors the `no_delay` proto option - SubscribeLogsResponse is the only
|
|
||||||
// message with `option (no_delay) = false;` in api.proto, indicating it should
|
|
||||||
// allow Nagle coalescing. This option existed since 2019 but was never implemented.
|
|
||||||
//
|
|
||||||
// - Log messages: Enable Nagle (NODELAY=false) so small log packets coalesce
|
|
||||||
// into fewer, larger packets. They flush naturally via TCP delayed ACK timer
|
|
||||||
// (~200ms), buffer filling, or when a state update triggers a flush.
|
|
||||||
//
|
|
||||||
// - All other messages (state updates, responses): Disable Nagle (NODELAY=true)
|
|
||||||
// for immediate delivery. These are time-sensitive and should not be delayed.
|
|
||||||
//
|
|
||||||
// This must be done proactively BEFORE the buffer fills up - checking buffer
|
|
||||||
// state here would be too late since we'd already be in a degraded state.
|
|
||||||
this->helper_->set_nodelay(!is_log_message);
|
|
||||||
|
|
||||||
APIError err = this->helper_->write_protobuf_packet(message_type, buffer);
|
APIError err = this->helper_->write_protobuf_packet(message_type, buffer);
|
||||||
if (err == APIError::WOULD_BLOCK)
|
if (err == APIError::WOULD_BLOCK)
|
||||||
|
|||||||
@@ -120,26 +120,39 @@ class APIFrameHelper {
|
|||||||
}
|
}
|
||||||
return APIError::OK;
|
return APIError::OK;
|
||||||
}
|
}
|
||||||
/// Toggle TCP_NODELAY socket option to control Nagle's algorithm.
|
// Manage TCP_NODELAY (Nagle's algorithm) based on message type.
|
||||||
///
|
//
|
||||||
/// This is used to allow log messages to coalesce (Nagle enabled) while keeping
|
// For non-log messages (sensor data, state updates): Always disable Nagle
|
||||||
/// state updates low-latency (NODELAY enabled). Without this, many small log
|
// (NODELAY on) for immediate delivery - these are time-sensitive.
|
||||||
/// packets fill the TCP send buffer, crowding out important state updates.
|
//
|
||||||
///
|
// For log messages: Use Nagle to coalesce multiple small log packets into
|
||||||
/// State is tracked to minimize setsockopt() overhead - on lwip_raw (ESP8266/RP2040)
|
// fewer larger packets, reducing WiFi overhead. However, we limit batching
|
||||||
/// this is just a boolean assignment; on other platforms it's a lightweight syscall.
|
// to 3 messages to avoid excessive LWIP buffer pressure on memory-constrained
|
||||||
///
|
// devices like ESP8266. LWIP's TCP_OVERSIZE option coalesces the data into
|
||||||
/// @param enable true to enable NODELAY (disable Nagle), false to enable Nagle
|
// shared pbufs, but holding data too long waiting for Nagle's timer causes
|
||||||
/// @return true if successful or already in desired state
|
// buffer exhaustion and dropped messages.
|
||||||
bool set_nodelay(bool enable) {
|
//
|
||||||
if (this->nodelay_enabled_ == enable)
|
// Flow: Log 1 (Nagle on) -> Log 2 (Nagle on) -> Log 3 (NODELAY, flush all)
|
||||||
return true;
|
//
|
||||||
int val = enable ? 1 : 0;
|
void set_nodelay_for_message(bool is_log_message) {
|
||||||
int err = this->socket_->setsockopt(IPPROTO_TCP, TCP_NODELAY, &val, sizeof(int));
|
if (!is_log_message) {
|
||||||
if (err == 0) {
|
if (this->nodelay_state_ != NODELAY_ON) {
|
||||||
this->nodelay_enabled_ = enable;
|
this->set_nodelay_raw_(true);
|
||||||
|
this->nodelay_state_ = NODELAY_ON;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Log messages 1-3: state transitions -1 -> 1 -> 2 -> -1 (flush on 3rd)
|
||||||
|
if (this->nodelay_state_ == NODELAY_ON) {
|
||||||
|
this->set_nodelay_raw_(false);
|
||||||
|
this->nodelay_state_ = 1;
|
||||||
|
} else if (this->nodelay_state_ >= LOG_NAGLE_COUNT) {
|
||||||
|
this->set_nodelay_raw_(true);
|
||||||
|
this->nodelay_state_ = NODELAY_ON;
|
||||||
|
} else {
|
||||||
|
this->nodelay_state_++;
|
||||||
}
|
}
|
||||||
return err == 0;
|
|
||||||
}
|
}
|
||||||
virtual APIError write_protobuf_packet(uint8_t type, ProtoWriteBuffer buffer) = 0;
|
virtual APIError write_protobuf_packet(uint8_t type, ProtoWriteBuffer buffer) = 0;
|
||||||
// Write multiple protobuf messages in a single operation
|
// Write multiple protobuf messages in a single operation
|
||||||
@@ -229,10 +242,18 @@ class APIFrameHelper {
|
|||||||
uint8_t tx_buf_head_{0};
|
uint8_t tx_buf_head_{0};
|
||||||
uint8_t tx_buf_tail_{0};
|
uint8_t tx_buf_tail_{0};
|
||||||
uint8_t tx_buf_count_{0};
|
uint8_t tx_buf_count_{0};
|
||||||
// Tracks TCP_NODELAY state to minimize setsockopt() calls. Initialized to true
|
// Nagle batching state for log messages. NODELAY_ON (-1) means NODELAY is enabled
|
||||||
// since init_common_() enables NODELAY. Used by set_nodelay() to allow log
|
// (immediate send). Values 1-2 count log messages in the current Nagle batch.
|
||||||
// messages to coalesce while keeping state updates low-latency.
|
// After LOG_NAGLE_COUNT logs, we switch to NODELAY to flush and reset.
|
||||||
bool nodelay_enabled_{true};
|
static constexpr int8_t NODELAY_ON = -1;
|
||||||
|
static constexpr int8_t LOG_NAGLE_COUNT = 2;
|
||||||
|
int8_t nodelay_state_{NODELAY_ON};
|
||||||
|
|
||||||
|
// Internal helper to set TCP_NODELAY socket option
|
||||||
|
void set_nodelay_raw_(bool enable) {
|
||||||
|
int val = enable ? 1 : 0;
|
||||||
|
this->socket_->setsockopt(IPPROTO_TCP, TCP_NODELAY, &val, sizeof(int));
|
||||||
|
}
|
||||||
|
|
||||||
// Common initialization for both plaintext and noise protocols
|
// Common initialization for both plaintext and noise protocols
|
||||||
APIError init_common_();
|
APIError init_common_();
|
||||||
|
|||||||
Reference in New Issue
Block a user