From e4ad2082bcc22d60244426e6b9fbf0dde51e73bb Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 6 Feb 2026 15:26:16 +0100 Subject: [PATCH 1/2] [core] Add PROGMEM_STRING_TABLE macro for flash-optimized string lookups (#13659) Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- esphome/components/cover/cover.cpp | 14 +--- .../components/light/light_json_schema.cpp | 37 +++------ esphome/components/logger/logger.cpp | 34 +++----- esphome/components/sensor/sensor.cpp | 20 ++--- esphome/components/sensor/sensor.h | 1 + esphome/components/valve/valve.cpp | 14 +--- .../wifi/wifi_component_esp8266.cpp | 43 ++++------ esphome/core/progmem.h | 83 +++++++++++++++++++ 8 files changed, 137 insertions(+), 109 deletions(-) diff --git a/esphome/components/cover/cover.cpp b/esphome/components/cover/cover.cpp index 37cb908d9f..0589aa2379 100644 --- a/esphome/components/cover/cover.cpp +++ b/esphome/components/cover/cover.cpp @@ -19,17 +19,11 @@ const LogString *cover_command_to_str(float pos) { return LOG_STR("UNKNOWN"); } } +// Cover operation strings indexed by CoverOperation enum (0-2): IDLE, OPENING, CLOSING, plus UNKNOWN +PROGMEM_STRING_TABLE(CoverOperationStrings, "IDLE", "OPENING", "CLOSING", "UNKNOWN"); + const LogString *cover_operation_to_str(CoverOperation op) { - switch (op) { - case COVER_OPERATION_IDLE: - return LOG_STR("IDLE"); - case COVER_OPERATION_OPENING: - return LOG_STR("OPENING"); - case COVER_OPERATION_CLOSING: - return LOG_STR("CLOSING"); - default: - return LOG_STR("UNKNOWN"); - } + return CoverOperationStrings::get_log_str(static_cast(op), CoverOperationStrings::LAST_INDEX); } Cover::Cover() : position{COVER_OPEN} {} diff --git a/esphome/components/light/light_json_schema.cpp b/esphome/components/light/light_json_schema.cpp index 631f59221f..aaa1176f9f 100644 --- a/esphome/components/light/light_json_schema.cpp +++ b/esphome/components/light/light_json_schema.cpp @@ -9,32 +9,19 @@ namespace esphome::light { // See https://www.home-assistant.io/integrations/light.mqtt/#json-schema for documentation on the schema -// Get JSON string for color mode. -// ColorMode enum values are sparse bitmasks (0, 1, 3, 7, 11, 19, 35, 39, 47, 51) which would -// generate a large jump table. Converting to bit index (0-9) allows a compact switch. +// Color mode JSON strings - packed into flash with compile-time generated offsets. +// Indexed by ColorModeBitPolicy bit index (1-9), so index 0 maps to bit 1 ("onoff"). +PROGMEM_STRING_TABLE(ColorModeStrings, "onoff", "brightness", "white", "color_temp", "cwww", "rgb", "rgbw", "rgbct", + "rgbww"); + +// Get JSON string for color mode. Returns nullptr for UNKNOWN (bit 0). +// Returns ProgmemStr so ArduinoJson knows to handle PROGMEM strings on ESP8266. static ProgmemStr get_color_mode_json_str(ColorMode mode) { - switch (ColorModeBitPolicy::to_bit(mode)) { - case 1: - return ESPHOME_F("onoff"); - case 2: - return ESPHOME_F("brightness"); - case 3: - return ESPHOME_F("white"); - case 4: - return ESPHOME_F("color_temp"); - case 5: - return ESPHOME_F("cwww"); - case 6: - return ESPHOME_F("rgb"); - case 7: - return ESPHOME_F("rgbw"); - case 8: - return ESPHOME_F("rgbct"); - case 9: - return ESPHOME_F("rgbww"); - default: - return nullptr; - } + unsigned bit = ColorModeBitPolicy::to_bit(mode); + if (bit == 0) + return nullptr; + // bit is 1-9 for valid modes, so bit-1 is always valid (0-8). LAST_INDEX fallback never used. + return ColorModeStrings::get_progmem_str(bit - 1, ColorModeStrings::LAST_INDEX); } void LightJSONSchema::dump_json(LightState &state, JsonObject root) { diff --git a/esphome/components/logger/logger.cpp b/esphome/components/logger/logger.cpp index 54b5670016..4cbd4f1bf1 100644 --- a/esphome/components/logger/logger.cpp +++ b/esphome/components/logger/logger.cpp @@ -4,6 +4,7 @@ #include "esphome/core/application.h" #include "esphome/core/hal.h" #include "esphome/core/log.h" +#include "esphome/core/progmem.h" namespace esphome::logger { @@ -241,34 +242,20 @@ UARTSelection Logger::get_uart() const { return this->uart_; } float Logger::get_setup_priority() const { return setup_priority::BUS + 500.0f; } -#ifdef USE_STORE_LOG_STR_IN_FLASH -// ESP8266: PSTR() cannot be used in array initializers, so we need to declare -// each string separately as a global constant first -static const char LOG_LEVEL_NONE[] PROGMEM = "NONE"; -static const char LOG_LEVEL_ERROR[] PROGMEM = "ERROR"; -static const char LOG_LEVEL_WARN[] PROGMEM = "WARN"; -static const char LOG_LEVEL_INFO[] PROGMEM = "INFO"; -static const char LOG_LEVEL_CONFIG[] PROGMEM = "CONFIG"; -static const char LOG_LEVEL_DEBUG[] PROGMEM = "DEBUG"; -static const char LOG_LEVEL_VERBOSE[] PROGMEM = "VERBOSE"; -static const char LOG_LEVEL_VERY_VERBOSE[] PROGMEM = "VERY_VERBOSE"; +// Log level strings - packed into flash on ESP8266, indexed by log level (0-7) +PROGMEM_STRING_TABLE(LogLevelStrings, "NONE", "ERROR", "WARN", "INFO", "CONFIG", "DEBUG", "VERBOSE", "VERY_VERBOSE"); -static const LogString *const LOG_LEVELS[] = { - reinterpret_cast(LOG_LEVEL_NONE), reinterpret_cast(LOG_LEVEL_ERROR), - reinterpret_cast(LOG_LEVEL_WARN), reinterpret_cast(LOG_LEVEL_INFO), - reinterpret_cast(LOG_LEVEL_CONFIG), reinterpret_cast(LOG_LEVEL_DEBUG), - reinterpret_cast(LOG_LEVEL_VERBOSE), reinterpret_cast(LOG_LEVEL_VERY_VERBOSE), -}; -#else -static const char *const LOG_LEVELS[] = {"NONE", "ERROR", "WARN", "INFO", "CONFIG", "DEBUG", "VERBOSE", "VERY_VERBOSE"}; -#endif +static const LogString *get_log_level_str(uint8_t level) { + return LogLevelStrings::get_log_str(level, LogLevelStrings::LAST_INDEX); +} void Logger::dump_config() { ESP_LOGCONFIG(TAG, "Logger:\n" " Max Level: %s\n" " Initial Level: %s", - LOG_STR_ARG(LOG_LEVELS[ESPHOME_LOG_LEVEL]), LOG_STR_ARG(LOG_LEVELS[this->current_level_])); + LOG_STR_ARG(get_log_level_str(ESPHOME_LOG_LEVEL)), + LOG_STR_ARG(get_log_level_str(this->current_level_))); #ifndef USE_HOST ESP_LOGCONFIG(TAG, " Log Baud Rate: %" PRIu32 "\n" @@ -287,7 +274,7 @@ void Logger::dump_config() { #ifdef USE_LOGGER_RUNTIME_TAG_LEVELS for (auto &it : this->log_levels_) { - ESP_LOGCONFIG(TAG, " Level for '%s': %s", it.first, LOG_STR_ARG(LOG_LEVELS[it.second])); + ESP_LOGCONFIG(TAG, " Level for '%s': %s", it.first, LOG_STR_ARG(get_log_level_str(it.second))); } #endif } @@ -295,7 +282,8 @@ void Logger::dump_config() { void Logger::set_log_level(uint8_t level) { if (level > ESPHOME_LOG_LEVEL) { level = ESPHOME_LOG_LEVEL; - ESP_LOGW(TAG, "Cannot set log level higher than pre-compiled %s", LOG_STR_ARG(LOG_LEVELS[ESPHOME_LOG_LEVEL])); + ESP_LOGW(TAG, "Cannot set log level higher than pre-compiled %s", + LOG_STR_ARG(get_log_level_str(ESPHOME_LOG_LEVEL))); } this->current_level_ = level; #ifdef USE_LOGGER_LEVEL_LISTENERS diff --git a/esphome/components/sensor/sensor.cpp b/esphome/components/sensor/sensor.cpp index 3f2be02af2..ae2ee3e3d1 100644 --- a/esphome/components/sensor/sensor.cpp +++ b/esphome/components/sensor/sensor.cpp @@ -2,6 +2,7 @@ #include "esphome/core/defines.h" #include "esphome/core/controller_registry.h" #include "esphome/core/log.h" +#include "esphome/core/progmem.h" namespace esphome::sensor { @@ -30,20 +31,13 @@ void log_sensor(const char *tag, const char *prefix, const char *type, Sensor *o } } +// State class strings indexed by StateClass enum (0-4): NONE, MEASUREMENT, TOTAL_INCREASING, TOTAL, MEASUREMENT_ANGLE +PROGMEM_STRING_TABLE(StateClassStrings, "", "measurement", "total_increasing", "total", "measurement_angle"); +static_assert(StateClassStrings::COUNT == STATE_CLASS_LAST + 1, "StateClassStrings must match StateClass enum"); + const LogString *state_class_to_string(StateClass state_class) { - switch (state_class) { - case STATE_CLASS_MEASUREMENT: - return LOG_STR("measurement"); - case STATE_CLASS_TOTAL_INCREASING: - return LOG_STR("total_increasing"); - case STATE_CLASS_TOTAL: - return LOG_STR("total"); - case STATE_CLASS_MEASUREMENT_ANGLE: - return LOG_STR("measurement_angle"); - case STATE_CLASS_NONE: - default: - return LOG_STR(""); - } + // Fallback to index 0 (empty string for STATE_CLASS_NONE) if out of range + return StateClassStrings::get_log_str(static_cast(state_class), 0); } Sensor::Sensor() : state(NAN), raw_state(NAN) {} diff --git a/esphome/components/sensor/sensor.h b/esphome/components/sensor/sensor.h index d9046020f6..f9a45cb1d0 100644 --- a/esphome/components/sensor/sensor.h +++ b/esphome/components/sensor/sensor.h @@ -32,6 +32,7 @@ enum StateClass : uint8_t { STATE_CLASS_TOTAL = 3, STATE_CLASS_MEASUREMENT_ANGLE = 4 }; +constexpr uint8_t STATE_CLASS_LAST = static_cast(STATE_CLASS_MEASUREMENT_ANGLE); const LogString *state_class_to_string(StateClass state_class); diff --git a/esphome/components/valve/valve.cpp b/esphome/components/valve/valve.cpp index 607f614ef7..493ffd8da2 100644 --- a/esphome/components/valve/valve.cpp +++ b/esphome/components/valve/valve.cpp @@ -23,17 +23,11 @@ const LogString *valve_command_to_str(float pos) { return LOG_STR("UNKNOWN"); } } +// Valve operation strings indexed by ValveOperation enum (0-2): IDLE, OPENING, CLOSING, plus UNKNOWN +PROGMEM_STRING_TABLE(ValveOperationStrings, "IDLE", "OPENING", "CLOSING", "UNKNOWN"); + const LogString *valve_operation_to_str(ValveOperation op) { - switch (op) { - case VALVE_OPERATION_IDLE: - return LOG_STR("IDLE"); - case VALVE_OPERATION_OPENING: - return LOG_STR("OPENING"); - case VALVE_OPERATION_CLOSING: - return LOG_STR("CLOSING"); - default: - return LOG_STR("UNKNOWN"); - } + return ValveOperationStrings::get_log_str(static_cast(op), ValveOperationStrings::LAST_INDEX); } Valve::Valve() : position{VALVE_OPEN} {} diff --git a/esphome/components/wifi/wifi_component_esp8266.cpp b/esphome/components/wifi/wifi_component_esp8266.cpp index c6bd40037d..0765fdc03b 100644 --- a/esphome/components/wifi/wifi_component_esp8266.cpp +++ b/esphome/components/wifi/wifi_component_esp8266.cpp @@ -36,6 +36,7 @@ extern "C" { #include "esphome/core/hal.h" #include "esphome/core/helpers.h" #include "esphome/core/log.h" +#include "esphome/core/progmem.h" #include "esphome/core/util.h" namespace esphome::wifi { @@ -398,37 +399,23 @@ class WiFiMockClass : public ESP8266WiFiGenericClass { static void _event_callback(void *event) { ESP8266WiFiGenericClass::_eventCallback(event); } // NOLINT }; +// Auth mode strings indexed by AUTH_* constants (0-4), with UNKNOWN at last index +// Static asserts verify the SDK constants are contiguous as expected +static_assert(AUTH_OPEN == 0 && AUTH_WEP == 1 && AUTH_WPA_PSK == 2 && AUTH_WPA2_PSK == 3 && AUTH_WPA_WPA2_PSK == 4, + "AUTH_* constants are not contiguous"); +PROGMEM_STRING_TABLE(AuthModeStrings, "OPEN", "WEP", "WPA PSK", "WPA2 PSK", "WPA/WPA2 PSK", "UNKNOWN"); + const LogString *get_auth_mode_str(uint8_t mode) { - switch (mode) { - case AUTH_OPEN: - return LOG_STR("OPEN"); - case AUTH_WEP: - return LOG_STR("WEP"); - case AUTH_WPA_PSK: - return LOG_STR("WPA PSK"); - case AUTH_WPA2_PSK: - return LOG_STR("WPA2 PSK"); - case AUTH_WPA_WPA2_PSK: - return LOG_STR("WPA/WPA2 PSK"); - default: - return LOG_STR("UNKNOWN"); - } -} -const LogString *get_op_mode_str(uint8_t mode) { - switch (mode) { - case WIFI_OFF: - return LOG_STR("OFF"); - case WIFI_STA: - return LOG_STR("STA"); - case WIFI_AP: - return LOG_STR("AP"); - case WIFI_AP_STA: - return LOG_STR("AP+STA"); - default: - return LOG_STR("UNKNOWN"); - } + return AuthModeStrings::get_log_str(mode, AuthModeStrings::LAST_INDEX); } +// WiFi op mode strings indexed by WIFI_* constants (0-3), with UNKNOWN at last index +static_assert(WIFI_OFF == 0 && WIFI_STA == 1 && WIFI_AP == 2 && WIFI_AP_STA == 3, + "WIFI_* op mode constants are not contiguous"); +PROGMEM_STRING_TABLE(OpModeStrings, "OFF", "STA", "AP", "AP+STA", "UNKNOWN"); + +const LogString *get_op_mode_str(uint8_t mode) { return OpModeStrings::get_log_str(mode, OpModeStrings::LAST_INDEX); } + const LogString *get_disconnect_reason_str(uint8_t reason) { /* If this were one big switch statement, GCC would generate a lookup table for it. However, the values of the * REASON_* constants aren't continuous, and GCC will fill in the gap with the default value -- wasting 4 bytes of RAM diff --git a/esphome/core/progmem.h b/esphome/core/progmem.h index 4b897fb2de..6c6a5252cf 100644 --- a/esphome/core/progmem.h +++ b/esphome/core/progmem.h @@ -1,5 +1,11 @@ #pragma once +#include +#include +#include + +#include "esphome/core/hal.h" // For PROGMEM definition + // Platform-agnostic macros for PROGMEM string handling // On ESP8266/Arduino: Use Arduino's F() macro for PROGMEM strings // On other platforms: Use plain strings (no PROGMEM) @@ -32,3 +38,80 @@ using ProgmemStr = const __FlashStringHelper *; // Type for pointers to strings (no PROGMEM on non-ESP8266 platforms) using ProgmemStr = const char *; #endif + +namespace esphome { + +/// Helper for C++20 string literal template arguments +template struct FixedString { + char data[N]{}; + constexpr FixedString(const char (&str)[N]) { + for (size_t i = 0; i < N; ++i) + data[i] = str[i]; + } + constexpr size_t size() const { return N - 1; } // exclude null terminator +}; + +/// Compile-time string table that packs strings into a single blob with offset lookup. +/// Use PROGMEM_STRING_TABLE macro to instantiate with proper flash placement on ESP8266. +/// +/// Example: +/// PROGMEM_STRING_TABLE(MyStrings, "foo", "bar", "baz"); +/// ProgmemStr str = MyStrings::get_progmem_str(idx, MyStrings::LAST_INDEX); // For ArduinoJson +/// const LogString *log_str = MyStrings::get_log_str(idx, MyStrings::LAST_INDEX); // For logging +/// +template struct ProgmemStringTable { + static constexpr size_t COUNT = sizeof...(Strs); + static constexpr size_t BLOB_SIZE = (0 + ... + (Strs.size() + 1)); + + /// Generate packed string blob at compile time + static constexpr auto make_blob() { + std::array result{}; + size_t pos = 0; + auto copy = [&](const auto &str) { + for (size_t i = 0; i <= str.size(); ++i) + result[pos++] = str.data[i]; + }; + (copy(Strs), ...); + return result; + } + + /// Generate offset table at compile time (uint8_t limits blob to 255 bytes) + static constexpr auto make_offsets() { + static_assert(COUNT > 0, "PROGMEM_STRING_TABLE must contain at least one string"); + static_assert(COUNT <= 255, "PROGMEM_STRING_TABLE supports at most 255 strings with uint8_t indices"); + static_assert(BLOB_SIZE <= 255, "PROGMEM_STRING_TABLE blob exceeds 255 bytes; use fewer/shorter strings"); + std::array result{}; + size_t pos = 0, idx = 0; + ((result[idx++] = static_cast(pos), pos += Strs.size() + 1), ...); + return result; + } +}; + +// Forward declaration for LogString (defined in log.h) +struct LogString; + +/// Instantiate a ProgmemStringTable with PROGMEM storage. +/// Creates: Name::get_progmem_str(idx, fallback), Name::get_log_str(idx, fallback) +/// If idx >= COUNT, returns string at fallback. Use LAST_INDEX for common patterns. +#define PROGMEM_STRING_TABLE(Name, ...) \ + struct Name { \ + using Table = ::esphome::ProgmemStringTable<__VA_ARGS__>; \ + static constexpr size_t COUNT = Table::COUNT; \ + static constexpr uint8_t LAST_INDEX = COUNT - 1; \ + static constexpr size_t BLOB_SIZE = Table::BLOB_SIZE; \ + static constexpr auto BLOB PROGMEM = Table::make_blob(); \ + static constexpr auto OFFSETS PROGMEM = Table::make_offsets(); \ + static const char *get_(uint8_t idx, uint8_t fallback) { \ + if (idx >= COUNT) \ + idx = fallback; \ + return &BLOB[::esphome::progmem_read_byte(&OFFSETS[idx])]; \ + } \ + static ::ProgmemStr get_progmem_str(uint8_t idx, uint8_t fallback) { \ + return reinterpret_cast<::ProgmemStr>(get_(idx, fallback)); \ + } \ + static const ::esphome::LogString *get_log_str(uint8_t idx, uint8_t fallback) { \ + return reinterpret_cast(get_(idx, fallback)); \ + } \ + } + +} // namespace esphome From c3622ef7fb18aed84c1b59357a37945955bb7086 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 6 Feb 2026 15:52:41 +0100 Subject: [PATCH 2/2] [http_request] Fix chunked transfer encoding on Arduino platforms (#13790) --- .../http_request/http_request_arduino.cpp | 193 ++++++++++++++++-- .../http_request/http_request_arduino.h | 18 ++ .../http_request/ota/ota_http_request.cpp | 6 +- esphome/core/helpers.cpp | 2 +- esphome/core/helpers.h | 5 +- 5 files changed, 198 insertions(+), 26 deletions(-) diff --git a/esphome/components/http_request/http_request_arduino.cpp b/esphome/components/http_request/http_request_arduino.cpp index 2f12b58766..aee1f651bf 100644 --- a/esphome/components/http_request/http_request_arduino.cpp +++ b/esphome/components/http_request/http_request_arduino.cpp @@ -133,20 +133,10 @@ std::shared_ptr HttpRequestArduino::perform(const std::string &ur // HTTPClient::getSize() returns -1 for chunked transfer encoding (no Content-Length). // When cast to size_t, -1 becomes SIZE_MAX (4294967295 on 32-bit). - // The read() method handles this: bytes_read_ can never reach SIZE_MAX, so the - // early return check (bytes_read_ >= content_length) will never trigger. - // - // TODO: Chunked transfer encoding is NOT properly supported on Arduino. - // The implementation in #7884 was incomplete - it only works correctly on ESP-IDF where - // esp_http_client_read() decodes chunks internally. On Arduino, using getStreamPtr() - // returns raw TCP data with chunk framing (e.g., "12a\r\n{json}\r\n0\r\n\r\n") instead - // of decoded content. This wasn't noticed because requests would complete and payloads - // were only examined on IDF. The long transfer times were also masked by the misleading - // "HTTP on Arduino version >= 3.1 is **very** slow" warning above. This causes two issues: - // 1. Response body is corrupted - contains chunk size headers mixed with data - // 2. Cannot detect end of transfer - connection stays open (keep-alive), causing timeout - // The proper fix would be to use getString() for chunked responses, which decodes chunks - // internally, but this buffers the entire response in memory. + // The read() method uses a chunked transfer encoding decoder (read_chunked_) to strip + // chunk framing and deliver only decoded content. When the final 0-size chunk is received, + // is_chunked_ is cleared and content_length is set to the actual decoded size, so + // is_read_complete() returns true and callers exit their read loops correctly. int content_length = container->client_.getSize(); ESP_LOGD(TAG, "Content-Length: %d", content_length); container->content_length = (size_t) content_length; @@ -174,6 +164,10 @@ std::shared_ptr HttpRequestArduino::perform(const std::string &ur // > 0: bytes read // 0: no data yet, retry <-- NOTE: 0 means retry, NOT EOF! // < 0: error/connection closed <-- connection closed returns -1, not 0 +// +// For chunked transfer encoding, read_chunked_() decodes chunk framing and delivers +// only the payload data. When the final 0-size chunk is received, it clears is_chunked_ +// and sets content_length = bytes_read_ so is_read_complete() returns true. int HttpContainerArduino::read(uint8_t *buf, size_t max_len) { const uint32_t start = millis(); watchdog::WatchdogManager wdm(this->parent_->get_watchdog_timeout()); @@ -184,24 +178,42 @@ int HttpContainerArduino::read(uint8_t *buf, size_t max_len) { return HTTP_ERROR_CONNECTION_CLOSED; } + if (this->is_chunked_) { + int result = this->read_chunked_(buf, max_len, stream_ptr); + this->duration_ms += (millis() - start); + if (result > 0) { + return result; + } + // result <= 0: check for completion or errors + if (this->is_read_complete()) { + return 0; // Chunked transfer complete (final 0-size chunk received) + } + if (result < 0) { + return result; // Stream error during chunk decoding + } + // read_chunked_ returned 0: no data was available (available() was 0). + // This happens when the TCP buffer is empty - either more data is in flight, + // or the connection dropped. Arduino's connected() returns false only when + // both the remote has closed AND the receive buffer is empty, so any buffered + // data is fully drained before we report the drop. + if (!stream_ptr->connected()) { + return HTTP_ERROR_CONNECTION_CLOSED; + } + return 0; // No data yet, caller should retry + } + + // Non-chunked path int available_data = stream_ptr->available(); - // For chunked transfer encoding, HTTPClient::getSize() returns -1, which becomes SIZE_MAX when - // cast to size_t. SIZE_MAX - bytes_read_ is still huge, so it won't limit the read. size_t remaining = (this->content_length > 0) ? (this->content_length - this->bytes_read_) : max_len; int bufsize = std::min(max_len, std::min(remaining, (size_t) available_data)); if (bufsize == 0) { this->duration_ms += (millis() - start); - // Check if we've read all expected content (non-chunked only) - // For chunked encoding (content_length == SIZE_MAX), is_read_complete() returns false if (this->is_read_complete()) { return 0; // All content read successfully } - // No data available - check if connection is still open - // For chunked encoding, !connected() after reading means EOF (all chunks received) - // For known content_length with bytes_read_ < content_length, it means connection dropped if (!stream_ptr->connected()) { - return HTTP_ERROR_CONNECTION_CLOSED; // Connection closed or EOF for chunked + return HTTP_ERROR_CONNECTION_CLOSED; } return 0; // No data yet, caller should retry } @@ -215,6 +227,143 @@ int HttpContainerArduino::read(uint8_t *buf, size_t max_len) { return read_len; } +void HttpContainerArduino::chunk_header_complete_() { + if (this->chunk_remaining_ == 0) { + this->chunk_state_ = ChunkedState::CHUNK_TRAILER; + this->chunk_remaining_ = 1; // repurpose as at-start-of-line flag + } else { + this->chunk_state_ = ChunkedState::CHUNK_DATA; + } +} + +// Chunked transfer encoding decoder +// +// On Arduino, getStreamPtr() returns raw TCP data. For chunked responses, this includes +// chunk framing (size headers, CRLF delimiters) mixed with payload data. This decoder +// strips the framing and delivers only decoded content to the caller. +// +// Chunk format (RFC 9112 Section 7.1): +// [;extension]\r\n +// \r\n +// ... +// 0\r\n +// [trailer-field\r\n]* +// \r\n +// +// Non-blocking: only processes bytes already in the TCP receive buffer. +// State (chunk_state_, chunk_remaining_) is preserved between calls, so partial +// chunk headers or split \r\n sequences resume correctly on the next call. +// Framing bytes (hex sizes, \r\n) may be consumed without producing output; +// the caller sees 0 and retries via the normal read timeout logic. +// +// WiFiClient::read() returns -1 on error despite available() > 0 (connection reset +// between check and read). On any stream error (c < 0 or readBytes <= 0), we return +// already-decoded data if any; otherwise HTTP_ERROR_CONNECTION_CLOSED. The error +// will surface again on the next call since the stream stays broken. +// +// Returns: > 0 decoded bytes, 0 no data available, < 0 error +int HttpContainerArduino::read_chunked_(uint8_t *buf, size_t max_len, WiFiClient *stream) { + int total_decoded = 0; + + while (total_decoded < (int) max_len && this->chunk_state_ != ChunkedState::COMPLETE) { + // Non-blocking: only process what's already buffered + if (stream->available() == 0) + break; + + // CHUNK_DATA reads multiple bytes; handle before the single-byte switch + if (this->chunk_state_ == ChunkedState::CHUNK_DATA) { + // Only read what's available, what fits in buf, and what remains in this chunk + size_t to_read = + std::min({max_len - (size_t) total_decoded, this->chunk_remaining_, (size_t) stream->available()}); + if (to_read == 0) + break; + App.feed_wdt(); + int read_len = stream->readBytes(buf + total_decoded, to_read); + if (read_len <= 0) + return total_decoded > 0 ? total_decoded : HTTP_ERROR_CONNECTION_CLOSED; + total_decoded += read_len; + this->chunk_remaining_ -= read_len; + this->bytes_read_ += read_len; + if (this->chunk_remaining_ == 0) + this->chunk_state_ = ChunkedState::CHUNK_DATA_TRAIL; + continue; + } + + // All other states consume a single byte + int c = stream->read(); + if (c < 0) + return total_decoded > 0 ? total_decoded : HTTP_ERROR_CONNECTION_CLOSED; + + switch (this->chunk_state_) { + // Parse hex chunk size, one byte at a time: "[;ext]\r\n" + // Note: if no hex digits are parsed (e.g., bare \r\n), chunk_remaining_ stays 0 + // and is treated as the final chunk. This is intentionally lenient — on embedded + // devices, rejecting malformed framing is less useful than terminating cleanly. + // Overflow of chunk_remaining_ from extremely long hex strings (>8 digits on + // 32-bit) is not checked; >4GB chunks are unrealistic on embedded targets and + // would simply cause fewer bytes to be read from that chunk. + case ChunkedState::CHUNK_HEADER: + if (c == '\n') { + // \n terminates the size line; chunk_remaining_ == 0 means last chunk + this->chunk_header_complete_(); + } else { + uint8_t hex = parse_hex_char(c); + if (hex != INVALID_HEX_CHAR) { + this->chunk_remaining_ = (this->chunk_remaining_ << 4) | hex; + } else if (c != '\r') { + this->chunk_state_ = ChunkedState::CHUNK_HEADER_EXT; // ';' starts extension, skip to \n + } + } + break; + + // Skip chunk extension bytes until \n (e.g., ";name=value\r\n") + case ChunkedState::CHUNK_HEADER_EXT: + if (c == '\n') { + this->chunk_header_complete_(); + } + break; + + // Consume \r\n trailing each chunk's data + case ChunkedState::CHUNK_DATA_TRAIL: + if (c == '\n') { + this->chunk_state_ = ChunkedState::CHUNK_HEADER; + this->chunk_remaining_ = 0; // reset for next chunk's hex accumulation + } + // else: \r is consumed silently, next iteration gets \n + break; + + // Consume optional trailer headers and terminating empty line after final chunk. + // Per RFC 9112 Section 7.1: "0\r\n" is followed by optional "field\r\n" lines + // and a final "\r\n". chunk_remaining_ is repurposed as a flag: 1 = at start + // of line (may be the empty terminator), 0 = mid-line (reading a trailer field). + case ChunkedState::CHUNK_TRAILER: + if (c == '\n') { + if (this->chunk_remaining_ != 0) { + this->chunk_state_ = ChunkedState::COMPLETE; // Empty line terminates trailers + } else { + this->chunk_remaining_ = 1; // End of trailer field, at start of next line + } + } else if (c != '\r') { + this->chunk_remaining_ = 0; // Non-CRLF char: reading a trailer field + } + // \r doesn't change the flag — it's part of \r\n line endings + break; + + default: + break; + } + + if (this->chunk_state_ == ChunkedState::COMPLETE) { + // Clear chunked flag and set content_length to actual decoded size so + // is_read_complete() returns true and callers exit their read loops + this->is_chunked_ = false; + this->content_length = this->bytes_read_; + } + } + + return total_decoded; +} + void HttpContainerArduino::end() { watchdog::WatchdogManager wdm(this->parent_->get_watchdog_timeout()); this->client_.end(); diff --git a/esphome/components/http_request/http_request_arduino.h b/esphome/components/http_request/http_request_arduino.h index d9b5af9d81..a1084b12d5 100644 --- a/esphome/components/http_request/http_request_arduino.h +++ b/esphome/components/http_request/http_request_arduino.h @@ -18,6 +18,17 @@ namespace esphome::http_request { class HttpRequestArduino; + +/// State machine for decoding chunked transfer encoding on Arduino +enum class ChunkedState : uint8_t { + CHUNK_HEADER, ///< Reading hex digits of chunk size + CHUNK_HEADER_EXT, ///< Skipping chunk extensions until \n + CHUNK_DATA, ///< Reading chunk data bytes + CHUNK_DATA_TRAIL, ///< Skipping \r\n after chunk data + CHUNK_TRAILER, ///< Consuming trailer headers after final 0-size chunk + COMPLETE, ///< Finished: final chunk and trailers consumed +}; + class HttpContainerArduino : public HttpContainer { public: int read(uint8_t *buf, size_t max_len) override; @@ -26,6 +37,13 @@ class HttpContainerArduino : public HttpContainer { protected: friend class HttpRequestArduino; HTTPClient client_{}; + + /// Decode chunked transfer encoding from the raw stream + int read_chunked_(uint8_t *buf, size_t max_len, WiFiClient *stream); + /// Transition from chunk header to data or trailer based on parsed size + void chunk_header_complete_(); + ChunkedState chunk_state_{ChunkedState::CHUNK_HEADER}; + size_t chunk_remaining_{0}; ///< Bytes remaining in current chunk }; class HttpRequestArduino : public HttpRequestComponent { diff --git a/esphome/components/http_request/ota/ota_http_request.cpp b/esphome/components/http_request/ota/ota_http_request.cpp index 8f4ecfab2d..882def4d7f 100644 --- a/esphome/components/http_request/ota/ota_http_request.cpp +++ b/esphome/components/http_request/ota/ota_http_request.cpp @@ -133,8 +133,10 @@ uint8_t OtaHttpRequestComponent::do_ota_() { auto result = http_read_loop_result(bufsize_or_error, last_data_time, read_timeout, container->is_read_complete()); if (result == HttpReadLoopResult::RETRY) continue; - // Note: COMPLETE is currently unreachable since the loop condition checks bytes_read < content_length, - // but this is defensive code in case chunked transfer encoding support is added for OTA in the future. + // For non-chunked responses, COMPLETE is unreachable (loop condition checks bytes_read < content_length). + // For chunked responses, the decoder sets content_length = bytes_read when the final chunk arrives, + // which causes the loop condition to terminate. But COMPLETE can still be returned if the decoder + // finishes mid-read, so this is needed for correctness. if (result == HttpReadLoopResult::COMPLETE) break; if (result != HttpReadLoopResult::DATA) { diff --git a/esphome/core/helpers.cpp b/esphome/core/helpers.cpp index 1a5d22f8d8..c2f7f67d9a 100644 --- a/esphome/core/helpers.cpp +++ b/esphome/core/helpers.cpp @@ -295,7 +295,7 @@ size_t parse_hex(const char *str, size_t length, uint8_t *data, size_t count) { size_t chars = std::min(length, 2 * count); for (size_t i = 2 * count - chars; i < 2 * count; i++, str++) { uint8_t val = parse_hex_char(*str); - if (val > 15) + if (val == INVALID_HEX_CHAR) return 0; data[i >> 1] = (i & 1) ? data[i >> 1] | val : val << 4; } diff --git a/esphome/core/helpers.h b/esphome/core/helpers.h index 9c7060cd1d..f7de34b6d5 100644 --- a/esphome/core/helpers.h +++ b/esphome/core/helpers.h @@ -874,6 +874,9 @@ template::value, int> = 0> optional< } /// Parse a hex character to its nibble value (0-15), returns 255 on invalid input +/// Returned by parse_hex_char() for non-hex characters. +static constexpr uint8_t INVALID_HEX_CHAR = 255; + constexpr uint8_t parse_hex_char(char c) { if (c >= '0' && c <= '9') return c - '0'; @@ -881,7 +884,7 @@ constexpr uint8_t parse_hex_char(char c) { return c - 'A' + 10; if (c >= 'a' && c <= 'f') return c - 'a' + 10; - return 255; + return INVALID_HEX_CHAR; } /// Convert a nibble (0-15) to hex char with specified base ('a' for lowercase, 'A' for uppercase)