From 341b22c7e45dbce5d9287d970c0d1db18e5baa3a Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 21 Feb 2026 17:45:13 -0600 Subject: [PATCH 1/9] [socket] Include component details in platform lwIP log messages get_socket_counts() now returns component detail strings alongside counts so platforms can include them in their log messages. ESP32 INFO log now shows which components consume each socket type. Co-Authored-By: Claude Opus 4.6 --- esphome/components/esp32/__init__.py | 15 +++++++++-- esphome/components/libretiny/__init__.py | 2 +- esphome/components/socket/__init__.py | 33 +++++++++++++----------- 3 files changed, 32 insertions(+), 18 deletions(-) diff --git a/esphome/components/esp32/__init__.py b/esphome/components/esp32/__init__.py index 6bfa8b9053..54cba78ae2 100644 --- a/esphome/components/esp32/__init__.py +++ b/esphome/components/esp32/__init__.py @@ -1265,7 +1265,14 @@ def _configure_lwip_max_sockets(conf: dict) -> None: # CONFIG_LWIP_MAX_SOCKETS is a single VFS socket pool shared by all socket # types (TCP clients, TCP listeners, and UDP). Include all three counts. - tcp_sockets, udp_sockets, tcp_listen = get_socket_counts() + ( + tcp_sockets, + udp_sockets, + tcp_listen, + tcp_details, + udp_details, + tcp_listen_details, + ) = get_socket_counts() total_sockets = tcp_sockets + udp_sockets + tcp_listen # User specified their own value - respect it but warn if insufficient @@ -1302,11 +1309,15 @@ def _configure_lwip_max_sockets(conf: dict) -> None: log_level = logging.INFO if max_sockets > DEFAULT_MAX_SOCKETS else logging.DEBUG _LOGGER.log( log_level, - "Setting CONFIG_LWIP_MAX_SOCKETS to %d (%d TCP + %d UDP + %d TCP_LISTEN)", + "Setting CONFIG_LWIP_MAX_SOCKETS to %d " + "(TCP=%d [%s], UDP=%d [%s], TCP_LISTEN=%d [%s])", max_sockets, tcp_sockets, + tcp_details, udp_sockets, + udp_details, tcp_listen, + tcp_listen_details, ) add_idf_sdkconfig_option("CONFIG_LWIP_MAX_SOCKETS", max_sockets) diff --git a/esphome/components/libretiny/__init__.py b/esphome/components/libretiny/__init__.py index 0daf9733b8..5f04c816fe 100644 --- a/esphome/components/libretiny/__init__.py +++ b/esphome/components/libretiny/__init__.py @@ -321,7 +321,7 @@ def _configure_lwip(config: dict) -> None: get_socket_counts, ) - raw_tcp, raw_udp, raw_tcp_listen = get_socket_counts() + raw_tcp, raw_udp, raw_tcp_listen, *_ = get_socket_counts() # Apply platform minimums — ensure headroom for ESPHome's needs tcp_sockets = max(MIN_TCP_SOCKETS, raw_tcp) udp_sockets = max(MIN_UDP_SOCKETS, raw_udp) diff --git a/esphome/components/socket/__init__.py b/esphome/components/socket/__init__.py index de5c6d2dd6..4d195634a9 100644 --- a/esphome/components/socket/__init__.py +++ b/esphome/components/socket/__init__.py @@ -68,8 +68,17 @@ def consume_sockets( return _consume_sockets -def get_socket_counts() -> tuple[int, int, int]: - """Return (tcp_count, udp_count, tcp_listen_count) of raw registered socket needs. +def _format_consumers(consumers: dict[str, int]) -> str: + """Format consumer dict as 'name=count, ...' or 'none'.""" + if not consumers: + return "none" + return ", ".join(f"{name}={count}" for name, count in sorted(consumers.items())) + + +def get_socket_counts() -> tuple[int, int, int, str, str, str]: + """Return socket counts and component details for platform configuration. + + Returns (tcp, udp, tcp_listen, tcp_details, udp_details, tcp_listen_details). Platforms call this during code generation to configure lwIP socket limits. All components will have registered their needs by then. @@ -83,25 +92,19 @@ def get_socket_counts() -> tuple[int, int, int]: udp = sum(udp_consumers.values()) tcp_listen = sum(tcp_listen_consumers.values()) - tcp_list = ", ".join( - f"{name}={count}" for name, count in sorted(tcp_consumers.items()) - ) - udp_list = ", ".join( - f"{name}={count}" for name, count in sorted(udp_consumers.items()) - ) - tcp_listen_list = ", ".join( - f"{name}={count}" for name, count in sorted(tcp_listen_consumers.items()) - ) + tcp_details = _format_consumers(tcp_consumers) + udp_details = _format_consumers(udp_consumers) + tcp_listen_details = _format_consumers(tcp_listen_consumers) _LOGGER.debug( "Socket counts: TCP=%d (%s), UDP=%d (%s), TCP_LISTEN=%d (%s)", tcp, - tcp_list or "none", + tcp_details, udp, - udp_list or "none", + udp_details, tcp_listen, - tcp_listen_list or "none", + tcp_listen_details, ) - return tcp, udp, tcp_listen + return tcp, udp, tcp_listen, tcp_details, udp_details, tcp_listen_details def require_wake_loop_threadsafe() -> None: From e3154e48bdcaa3be951a97a0f3f856e5fcd20922 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 21 Feb 2026 17:47:06 -0600 Subject: [PATCH 2/9] [libretiny] Log socket component details during lwIP configuration Co-Authored-By: Claude Opus 4.6 --- esphome/components/libretiny/__init__.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/esphome/components/libretiny/__init__.py b/esphome/components/libretiny/__init__.py index 5f04c816fe..e36c261877 100644 --- a/esphome/components/libretiny/__init__.py +++ b/esphome/components/libretiny/__init__.py @@ -321,7 +321,9 @@ def _configure_lwip(config: dict) -> None: get_socket_counts, ) - raw_tcp, raw_udp, raw_tcp_listen, *_ = get_socket_counts() + raw_tcp, raw_udp, raw_tcp_listen, tcp_details, udp_details, tcp_listen_details = ( + get_socket_counts() + ) # Apply platform minimums — ensure headroom for ESPHome's needs tcp_sockets = max(MIN_TCP_SOCKETS, raw_tcp) udp_sockets = max(MIN_UDP_SOCKETS, raw_udp) @@ -396,6 +398,15 @@ def _configure_lwip(config: dict) -> None: if CORE.is_bk72xx: lwip_opts.append("PBUF_POOL_SIZE=10") + _LOGGER.info( + "Configuring lwIP: TCP=%d [%s], UDP=%d [%s], TCP_LISTEN=%d [%s]", + tcp_sockets, + tcp_details, + udp_sockets, + udp_details, + listening_tcp, + tcp_listen_details, + ) cg.add_platformio_option("custom_options.lwip", lwip_opts) From fadf7a2ba3cfe3157d7bf91c7653a2c6f48bd75b Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 21 Feb 2026 17:48:35 -0600 Subject: [PATCH 3/9] [socket] Lower minimum socket counts (TCP=8, UDP=6) Previous minimums (10/8) were overly conservative. Most configs register their actual needs via consume_sockets(), so the minimums only need to cover unregistered components with modest headroom. Co-Authored-By: Claude Opus 4.6 --- esphome/components/socket/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/esphome/components/socket/__init__.py b/esphome/components/socket/__init__.py index 4d195634a9..b8f971fd05 100644 --- a/esphome/components/socket/__init__.py +++ b/esphome/components/socket/__init__.py @@ -23,10 +23,10 @@ KEY_SOCKET_CONSUMERS_TCP_LISTEN = "socket_consumers_tcp_listen" # Recommended minimum socket counts to ensure headroom. # Platforms should apply these (or their own) on top of get_socket_counts(). -# TCP: Typical setup: api(3) + web_server(5) = 8 registered, +2 headroom for ota-transfer/other = 10 total. -# UDP: dhcp(1) + dns(1) + mdns(2) + wake_loop(1) = 5 base, +3 headroom. -MIN_TCP_SOCKETS = 10 -MIN_UDP_SOCKETS = 8 +# TCP: api(3) = 3 base, +5 headroom for ota-transfer/web_server/other. +# UDP: dhcp(1) + dns(1) + mdns(2) + wake_loop(1) = 5 base, +1 headroom. +MIN_TCP_SOCKETS = 8 +MIN_UDP_SOCKETS = 6 # Wake loop threadsafe support tracking KEY_WAKE_LOOP_THREADSAFE_REQUIRED = "wake_loop_threadsafe_required" From ca82c7c7c6f3cd5c5fdb06637123490569bb5f65 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 21 Feb 2026 17:49:45 -0600 Subject: [PATCH 4/9] [socket] Show (min) indicator when platform minimums are applied Helps users understand why socket counts may be higher than what their components registered. Co-Authored-By: Claude Opus 4.6 --- esphome/components/esp32/__init__.py | 4 +++- esphome/components/libretiny/__init__.py | 8 +++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/esphome/components/esp32/__init__.py b/esphome/components/esp32/__init__.py index 54cba78ae2..f15acfe4a9 100644 --- a/esphome/components/esp32/__init__.py +++ b/esphome/components/esp32/__init__.py @@ -1307,11 +1307,13 @@ def _configure_lwip_max_sockets(conf: dict) -> None: max_sockets = max(DEFAULT_MAX_SOCKETS, total_sockets) log_level = logging.INFO if max_sockets > DEFAULT_MAX_SOCKETS else logging.DEBUG + sock_min = " (min)" if max_sockets > total_sockets else "" _LOGGER.log( log_level, - "Setting CONFIG_LWIP_MAX_SOCKETS to %d " + "Setting CONFIG_LWIP_MAX_SOCKETS to %d%s " "(TCP=%d [%s], UDP=%d [%s], TCP_LISTEN=%d [%s])", max_sockets, + sock_min, tcp_sockets, tcp_details, udp_sockets, diff --git a/esphome/components/libretiny/__init__.py b/esphome/components/libretiny/__init__.py index e36c261877..6b6b1abe1c 100644 --- a/esphome/components/libretiny/__init__.py +++ b/esphome/components/libretiny/__init__.py @@ -398,13 +398,19 @@ def _configure_lwip(config: dict) -> None: if CORE.is_bk72xx: lwip_opts.append("PBUF_POOL_SIZE=10") + tcp_min = " (min)" if tcp_sockets > raw_tcp else "" + udp_min = " (min)" if udp_sockets > raw_udp else "" + listen_min = " (min)" if listening_tcp > raw_tcp_listen else "" _LOGGER.info( - "Configuring lwIP: TCP=%d [%s], UDP=%d [%s], TCP_LISTEN=%d [%s]", + "Configuring lwIP: TCP=%d%s [%s], UDP=%d%s [%s], TCP_LISTEN=%d%s [%s]", tcp_sockets, + tcp_min, tcp_details, udp_sockets, + udp_min, udp_details, listening_tcp, + listen_min, tcp_listen_details, ) cg.add_platformio_option("custom_options.lwip", lwip_opts) From 360a7ba9c79f8d55c0583bead4fea0075ff44888 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 21 Feb 2026 19:29:39 -0600 Subject: [PATCH 5/9] =?UTF-8?q?[libretiny]=20Fix=20TCP=5FWND=20to=20show?= =?UTF-8?q?=203/10=C3=97MSS=20for=20BK=20reduced/default=20plans?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BK SDK sets TCP_SND_BUF=10×MSS on both plans but TCP_WND varies: 3×MSS (reduced plan) vs 10×MSS (default plan). Also update stale minimum counts in docstring (8 TCP / 6 UDP). Co-Authored-By: Claude Opus 4.6 --- esphome/components/libretiny/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/esphome/components/libretiny/__init__.py b/esphome/components/libretiny/__init__.py index 6b6b1abe1c..f6679e2456 100644 --- a/esphome/components/libretiny/__init__.py +++ b/esphome/components/libretiny/__init__.py @@ -290,7 +290,7 @@ def _configure_lwip(config: dict) -> None: Setting ESP8266 ESP32 BK SDK RTL SDK LN SDK New ──────────────────────────────────────────────────────────────────────────── TCP_SND_BUF 2×MSS 4×MSS 10×MSS 5×MSS 7×MSS 4×MSS - TCP_WND 4×MSS 4×MSS 10×MSS 2×MSS 3×MSS 4×MSS + TCP_WND 4×MSS 4×MSS 3/10×MSS 2×MSS 3×MSS 4×MSS MEM_LIBC_MALLOC 1 1 0 0 1 1 MEMP_MEM_MALLOC 1 1 0 0 0 1 MEM_SIZE N/A* N/A* 16/32KB 5KB N/A* N/A* BK @@ -313,7 +313,7 @@ def _configure_lwip(config: dict) -> None: **** RTL/LN LT overlay overrides to flat 7. ***** Not defined in RTL SDK — lwIP opt.h defaults shown. "dynamic" = auto-calculated from component socket registrations via - socket.get_socket_counts() with minimums of 10 TCP / 8 UDP. + socket.get_socket_counts() with minimums of 8 TCP / 6 UDP. """ from esphome.components.socket import ( MIN_TCP_SOCKETS, From 30a2af0d54c9a3db71510b960a1dcd7e117cd406 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 21 Feb 2026 19:39:14 -0600 Subject: [PATCH 6/9] [socket] Add MIN_TCP_LISTEN_SOCKETS constant for consistency - Clarify MIN_TCP_SOCKETS comment: covers minimal configs (api-only); when web_server is present its 5 sockets push past the minimum. - Add MIN_TCP_LISTEN_SOCKETS = 2 alongside MIN_TCP/MIN_UDP for consistency instead of hardcoding the value in libretiny. Co-Authored-By: Claude Opus 4.6 --- esphome/components/libretiny/__init__.py | 5 +++-- esphome/components/socket/__init__.py | 8 +++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/esphome/components/libretiny/__init__.py b/esphome/components/libretiny/__init__.py index f6679e2456..1a424b653f 100644 --- a/esphome/components/libretiny/__init__.py +++ b/esphome/components/libretiny/__init__.py @@ -316,6 +316,7 @@ def _configure_lwip(config: dict) -> None: socket.get_socket_counts() with minimums of 8 TCP / 6 UDP. """ from esphome.components.socket import ( + MIN_TCP_LISTEN_SOCKETS, MIN_TCP_SOCKETS, MIN_UDP_SOCKETS, get_socket_counts, @@ -328,8 +329,8 @@ def _configure_lwip(config: dict) -> None: tcp_sockets = max(MIN_TCP_SOCKETS, raw_tcp) udp_sockets = max(MIN_UDP_SOCKETS, raw_udp) # Listening sockets — registered by components (api, ota, web_server_base, etc.) - # Not all components register yet, so ensure a minimum of 2 (api + ota baseline). - listening_tcp = max(raw_tcp_listen, 2) + # Not all components register yet, so ensure a minimum for baseline operation. + listening_tcp = max(MIN_TCP_LISTEN_SOCKETS, raw_tcp_listen) # TCP_SND_BUF: ESPAsyncWebServer allocates malloc(tcp_sndbuf()) per # response chunk. At 10×MSS=14.6KB (BK default) this causes OOM (#14095). diff --git a/esphome/components/socket/__init__.py b/esphome/components/socket/__init__.py index b8f971fd05..572e7993b9 100644 --- a/esphome/components/socket/__init__.py +++ b/esphome/components/socket/__init__.py @@ -21,12 +21,14 @@ KEY_SOCKET_CONSUMERS_TCP = "socket_consumers_tcp" KEY_SOCKET_CONSUMERS_UDP = "socket_consumers_udp" KEY_SOCKET_CONSUMERS_TCP_LISTEN = "socket_consumers_tcp_listen" -# Recommended minimum socket counts to ensure headroom. +# Recommended minimum socket counts. # Platforms should apply these (or their own) on top of get_socket_counts(). -# TCP: api(3) = 3 base, +5 headroom for ota-transfer/web_server/other. -# UDP: dhcp(1) + dns(1) + mdns(2) + wake_loop(1) = 5 base, +1 headroom. +# These cover minimal configs (e.g. api-only without web_server). +# When web_server is present, its 5 registered sockets push past the TCP minimum. MIN_TCP_SOCKETS = 8 MIN_UDP_SOCKETS = 6 +# Minimum listening sockets — at least api + ota baseline. +MIN_TCP_LISTEN_SOCKETS = 2 # Wake loop threadsafe support tracking KEY_WAKE_LOOP_THREADSAFE_REQUIRED = "wake_loop_threadsafe_required" From a4682615234ac61ff7825e2a922807b11e6772fa Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 21 Feb 2026 19:41:26 -0600 Subject: [PATCH 7/9] [scheduler] De-template and consolidate scheduler helper functions (#14164) --- esphome/core/scheduler.cpp | 14 +++++++++---- esphome/core/scheduler.h | 41 +++++++------------------------------- 2 files changed, 17 insertions(+), 38 deletions(-) diff --git a/esphome/core/scheduler.cpp b/esphome/core/scheduler.cpp index 36b65f6ff7..e4e0751e10 100644 --- a/esphome/core/scheduler.cpp +++ b/esphome/core/scheduler.cpp @@ -119,10 +119,16 @@ uint32_t Scheduler::calculate_interval_offset_(uint32_t delay) { // Remove before 2026.8.0 along with all retry code bool Scheduler::is_retry_cancelled_locked_(Component *component, NameType name_type, const char *static_name, uint32_t hash_or_id) { - return has_cancelled_timeout_in_container_locked_(this->items_, component, name_type, static_name, hash_or_id, - /* match_retry= */ true) || - has_cancelled_timeout_in_container_locked_(this->to_add_, component, name_type, static_name, hash_or_id, - /* match_retry= */ true); + for (auto *container : {&this->items_, &this->to_add_}) { + for (auto &item : *container) { + if (item && this->is_item_removed_locked_(item.get()) && + this->matches_item_locked_(item, component, name_type, static_name, hash_or_id, SchedulerItem::TIMEOUT, + /* match_retry= */ true, /* skip_removed= */ false)) { + return true; + } + } + } + return false; } // Common implementation for both timeout and interval diff --git a/esphome/core/scheduler.h b/esphome/core/scheduler.h index 384d76b6b0..16b0ded312 100644 --- a/esphome/core/scheduler.h +++ b/esphome/core/scheduler.h @@ -308,8 +308,8 @@ class Scheduler { SchedulerItem::Type type, bool match_retry, bool skip_removed = true) const { // THREAD SAFETY: Check for nullptr first to prevent LoadProhibited crashes. On multi-threaded // platforms, items can be moved out of defer_queue_ during processing, leaving nullptr entries. - // PR #11305 added nullptr checks in callers (mark_matching_items_removed_locked_() and - // has_cancelled_timeout_in_container_locked_()), but this check provides defense-in-depth: helper + // PR #11305 added nullptr checks in callers (mark_matching_items_removed_locked_()), but this check + // provides defense-in-depth: helper // functions should be safe regardless of caller behavior. // Fixes: https://github.com/esphome/esphome/issues/11940 if (!item) @@ -403,8 +403,7 @@ class Scheduler { // SAFETY: Moving out the unique_ptr leaves a nullptr in the vector at defer_queue_front_. // This is intentional and safe because: // 1. The vector is only cleaned up by cleanup_defer_queue_locked_() at the end of this function - // 2. Any code iterating defer_queue_ MUST check for nullptr items (see mark_matching_items_removed_locked_ - // and has_cancelled_timeout_in_container_locked_ in scheduler.h) + // 2. Any code iterating defer_queue_ MUST check for nullptr items (see mark_matching_items_removed_locked_) // 3. The lock protects concurrent access, but the nullptr remains until cleanup item = std::move(this->defer_queue_[this->defer_queue_front_]); this->defer_queue_front_++; @@ -497,19 +496,16 @@ class Scheduler { // name_type determines matching: STATIC_STRING uses static_name, others use hash_or_id // Returns the number of items marked for removal // IMPORTANT: Must be called with scheduler lock held - template - size_t mark_matching_items_removed_locked_(Container &container, Component *component, NameType name_type, - const char *static_name, uint32_t hash_or_id, SchedulerItem::Type type, - bool match_retry) { + size_t mark_matching_items_removed_locked_(std::vector> &container, + Component *component, NameType name_type, const char *static_name, + uint32_t hash_or_id, SchedulerItem::Type type, bool match_retry) { size_t count = 0; for (auto &item : container) { // Skip nullptr items (can happen in defer_queue_ when items are being processed) // The defer_queue_ uses index-based processing: items are std::moved out but left in the // vector as nullptr until cleanup. Even though this function is called with lock held, // the vector can still contain nullptr items from the processing loop. This check prevents crashes. - if (!item) - continue; - if (this->matches_item_locked_(item, component, name_type, static_name, hash_or_id, type, match_retry)) { + if (item && this->matches_item_locked_(item, component, name_type, static_name, hash_or_id, type, match_retry)) { this->set_item_removed_(item.get(), true); count++; } @@ -517,29 +513,6 @@ class Scheduler { return count; } - // Template helper to check if any item in a container matches our criteria - // name_type determines matching: STATIC_STRING uses static_name, others use hash_or_id - // IMPORTANT: Must be called with scheduler lock held - template - bool has_cancelled_timeout_in_container_locked_(const Container &container, Component *component, NameType name_type, - const char *static_name, uint32_t hash_or_id, - bool match_retry) const { - for (const auto &item : container) { - // Skip nullptr items (can happen in defer_queue_ when items are being processed) - // The defer_queue_ uses index-based processing: items are std::moved out but left in the - // vector as nullptr until cleanup. If this function is called during defer queue processing, - // it will iterate over these nullptr items. This check prevents crashes. - if (!item) - continue; - if (this->is_item_removed_locked_(item.get()) && - this->matches_item_locked_(item, component, name_type, static_name, hash_or_id, SchedulerItem::TIMEOUT, - match_retry, /* skip_removed= */ false)) { - return true; - } - } - return false; - } - Mutex lock_; std::vector> items_; std::vector> to_add_; From d5c9c56fdfcdd0112e1913f05f84e297908460a7 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 21 Feb 2026 19:41:43 -0600 Subject: [PATCH 8/9] [platformio] Add exponential backoff and session reset to download retries (#14191) --- esphome/platformio_api.py | 41 ++++- tests/unit_tests/test_platformio_api.py | 196 +++++++++++++++++++++++- 2 files changed, 231 insertions(+), 6 deletions(-) diff --git a/esphome/platformio_api.py b/esphome/platformio_api.py index d42f89d029..5d4065207f 100644 --- a/esphome/platformio_api.py +++ b/esphome/platformio_api.py @@ -5,6 +5,7 @@ import os from pathlib import Path import re import subprocess +import time from typing import Any from esphome.const import CONF_COMPILE_PROCESS_LIMIT, CONF_ESPHOME, KEY_CORE @@ -44,31 +45,61 @@ def patch_structhash(): def patch_file_downloader(): - """Patch PlatformIO's FileDownloader to retry on PackageException errors.""" + """Patch PlatformIO's FileDownloader to retry on PackageException errors. + + PlatformIO's FileDownloader uses HTTPSession which lacks built-in retry + for 502/503 errors. We add retries with exponential backoff and close the + session between attempts to force a fresh TCP connection, which may route + to a different CDN edge node. + """ from platformio.package.download import FileDownloader from platformio.package.exception import PackageException + if getattr(FileDownloader.__init__, "_esphome_patched", False): + return + original_init = FileDownloader.__init__ def patched_init(self, *args: Any, **kwargs: Any) -> None: - max_retries = 3 + max_retries = 5 for attempt in range(max_retries): try: - return original_init(self, *args, **kwargs) + original_init(self, *args, **kwargs) + return except PackageException as e: if attempt < max_retries - 1: + # Exponential backoff: 2, 4, 8, 16 seconds + delay = 2 ** (attempt + 1) _LOGGER.warning( - "Package download failed: %s. Retrying... (attempt %d/%d)", + "Package download failed: %s. " + "Retrying in %d seconds... (attempt %d/%d)", str(e), + delay, attempt + 1, max_retries, ) + # Close the response and session to free resources + # and force a new TCP connection on retry, which may + # route to a different CDN edge node + # pylint: disable=protected-access,broad-except + try: + if ( + hasattr(self, "_http_response") + and self._http_response is not None + ): + self._http_response.close() + if hasattr(self, "_http_session"): + self._http_session.close() + except Exception: + pass + # pylint: enable=protected-access,broad-except + time.sleep(delay) else: # Final attempt - re-raise raise - return None + patched_init._esphome_patched = True # type: ignore[attr-defined] # pylint: disable=protected-access FileDownloader.__init__ = patched_init diff --git a/tests/unit_tests/test_platformio_api.py b/tests/unit_tests/test_platformio_api.py index 4d7b635e59..1686144277 100644 --- a/tests/unit_tests/test_platformio_api.py +++ b/tests/unit_tests/test_platformio_api.py @@ -6,7 +6,7 @@ import os from pathlib import Path import shutil from types import SimpleNamespace -from unittest.mock import MagicMock, Mock, patch +from unittest.mock import MagicMock, Mock, call, patch import pytest @@ -673,6 +673,200 @@ def test_process_stacktrace_bad_alloc( assert state is False +def test_patch_file_downloader_succeeds_first_try() -> None: + """Test patch_file_downloader succeeds on first attempt.""" + mock_exception_cls = type("PackageException", (Exception,), {}) + original_init = MagicMock() + + with patch.dict( + "sys.modules", + { + "platformio": MagicMock(), + "platformio.package": MagicMock(), + "platformio.package.download": SimpleNamespace( + FileDownloader=type("FileDownloader", (), {"__init__": original_init}) + ), + "platformio.package.exception": SimpleNamespace( + PackageException=mock_exception_cls + ), + }, + ): + platformio_api.patch_file_downloader() + + from platformio.package.download import FileDownloader + + instance = object.__new__(FileDownloader) + FileDownloader.__init__(instance, "http://example.com/file.zip") + + original_init.assert_called_once() + + +def test_patch_file_downloader_retries_on_failure() -> None: + """Test patch_file_downloader retries with backoff on PackageException.""" + mock_exception_cls = type("PackageException", (Exception,), {}) + call_count = 0 + + def failing_init(self, *args, **kwargs): + nonlocal call_count + call_count += 1 + if call_count < 3: + raise mock_exception_cls(f"502 error attempt {call_count}") + + with ( + patch.dict( + "sys.modules", + { + "platformio": MagicMock(), + "platformio.package": MagicMock(), + "platformio.package.download": SimpleNamespace( + FileDownloader=type( + "FileDownloader", (), {"__init__": failing_init} + ) + ), + "platformio.package.exception": SimpleNamespace( + PackageException=mock_exception_cls + ), + }, + ), + patch("time.sleep") as mock_sleep, + ): + platformio_api.patch_file_downloader() + + from platformio.package.download import FileDownloader + + instance = object.__new__(FileDownloader) + FileDownloader.__init__(instance, "http://example.com/file.zip") + + # Should have been called 3 times (2 failures + 1 success) + assert call_count == 3 + + # Should have slept with exponential backoff: 2s, 4s + assert mock_sleep.call_count == 2 + mock_sleep.assert_any_call(2) + mock_sleep.assert_any_call(4) + + +def test_patch_file_downloader_raises_after_max_retries() -> None: + """Test patch_file_downloader raises after exhausting all retries.""" + mock_exception_cls = type("PackageException", (Exception,), {}) + + def always_failing_init(self, *args, **kwargs): + raise mock_exception_cls("502 error") + + with ( + patch.dict( + "sys.modules", + { + "platformio": MagicMock(), + "platformio.package": MagicMock(), + "platformio.package.download": SimpleNamespace( + FileDownloader=type( + "FileDownloader", (), {"__init__": always_failing_init} + ) + ), + "platformio.package.exception": SimpleNamespace( + PackageException=mock_exception_cls + ), + }, + ), + patch("time.sleep") as mock_sleep, + ): + platformio_api.patch_file_downloader() + + from platformio.package.download import FileDownloader + + instance = object.__new__(FileDownloader) + with pytest.raises(mock_exception_cls, match="502 error"): + FileDownloader.__init__(instance, "http://example.com/file.zip") + + # Should have slept 4 times (before attempts 2-5), not on final attempt + assert mock_sleep.call_count == 4 + mock_sleep.assert_has_calls([call(2), call(4), call(8), call(16)]) + + +def test_patch_file_downloader_closes_session_and_response_between_retries() -> None: + """Test patch_file_downloader closes HTTP session and response between retries.""" + mock_exception_cls = type("PackageException", (Exception,), {}) + mock_session = MagicMock() + mock_response = MagicMock() + call_count = 0 + + def failing_init_with_session(self, *args, **kwargs): + nonlocal call_count + call_count += 1 + self._http_session = mock_session + self._http_response = mock_response + if call_count < 2: + raise mock_exception_cls("502 error") + + with ( + patch.dict( + "sys.modules", + { + "platformio": MagicMock(), + "platformio.package": MagicMock(), + "platformio.package.download": SimpleNamespace( + FileDownloader=type( + "FileDownloader", + (), + {"__init__": failing_init_with_session}, + ) + ), + "platformio.package.exception": SimpleNamespace( + PackageException=mock_exception_cls + ), + }, + ), + patch("time.sleep"), + ): + platformio_api.patch_file_downloader() + + from platformio.package.download import FileDownloader + + instance = object.__new__(FileDownloader) + FileDownloader.__init__(instance, "http://example.com/file.zip") + + # Both response and session should have been closed between retries + mock_response.close.assert_called_once() + mock_session.close.assert_called_once() + + +def test_patch_file_downloader_idempotent() -> None: + """Test patch_file_downloader does not stack wrappers when called multiple times.""" + mock_exception_cls = type("PackageException", (Exception,), {}) + call_count = 0 + + def counting_init(self, *args, **kwargs): + nonlocal call_count + call_count += 1 + + with patch.dict( + "sys.modules", + { + "platformio": MagicMock(), + "platformio.package": MagicMock(), + "platformio.package.download": SimpleNamespace( + FileDownloader=type("FileDownloader", (), {"__init__": counting_init}) + ), + "platformio.package.exception": SimpleNamespace( + PackageException=mock_exception_cls + ), + }, + ): + # Patch multiple times + platformio_api.patch_file_downloader() + platformio_api.patch_file_downloader() + platformio_api.patch_file_downloader() + + from platformio.package.download import FileDownloader + + instance = object.__new__(FileDownloader) + FileDownloader.__init__(instance, "http://example.com/file.zip") + + # Should only be called once, not 3 times from stacked wrappers + assert call_count == 1 + + def test_platformio_log_filter_allows_non_platformio_messages() -> None: """Test that non-platformio logger messages are allowed through.""" log_filter = platformio_api.PlatformioLogFilter() From d5efbfde5c44cb4ee173d70f444f0ae1a9859b55 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 21 Feb 2026 20:03:08 -0600 Subject: [PATCH 9/9] [socket] Return SocketCounts dataclass from get_socket_counts() Replace the unwieldy 6-tuple with a frozen dataclass for clarity and named attribute access at call sites. Co-Authored-By: Claude Opus 4.6 --- esphome/components/esp32/__init__.py | 29 +++++++++--------------- esphome/components/libretiny/__init__.py | 22 ++++++++---------- esphome/components/socket/__init__.py | 21 +++++++++++++---- 3 files changed, 38 insertions(+), 34 deletions(-) diff --git a/esphome/components/esp32/__init__.py b/esphome/components/esp32/__init__.py index f15acfe4a9..4c211b2f2a 100644 --- a/esphome/components/esp32/__init__.py +++ b/esphome/components/esp32/__init__.py @@ -1265,15 +1265,8 @@ def _configure_lwip_max_sockets(conf: dict) -> None: # CONFIG_LWIP_MAX_SOCKETS is a single VFS socket pool shared by all socket # types (TCP clients, TCP listeners, and UDP). Include all three counts. - ( - tcp_sockets, - udp_sockets, - tcp_listen, - tcp_details, - udp_details, - tcp_listen_details, - ) = get_socket_counts() - total_sockets = tcp_sockets + udp_sockets + tcp_listen + sc = get_socket_counts() + total_sockets = sc.tcp + sc.udp + sc.tcp_listen # User specified their own value - respect it but warn if insufficient if user_max_sockets is not None: @@ -1294,9 +1287,9 @@ def _configure_lwip_max_sockets(conf: dict) -> None: "at least %d.", user_sockets_int, total_sockets, - tcp_sockets, - udp_sockets, - tcp_listen, + sc.tcp, + sc.udp, + sc.tcp_listen, total_sockets, ) # User's value already added via sdkconfig_options processing @@ -1314,12 +1307,12 @@ def _configure_lwip_max_sockets(conf: dict) -> None: "(TCP=%d [%s], UDP=%d [%s], TCP_LISTEN=%d [%s])", max_sockets, sock_min, - tcp_sockets, - tcp_details, - udp_sockets, - udp_details, - tcp_listen, - tcp_listen_details, + sc.tcp, + sc.tcp_details, + sc.udp, + sc.udp_details, + sc.tcp_listen, + sc.tcp_listen_details, ) add_idf_sdkconfig_option("CONFIG_LWIP_MAX_SOCKETS", max_sockets) diff --git a/esphome/components/libretiny/__init__.py b/esphome/components/libretiny/__init__.py index 1a424b653f..2291114d9a 100644 --- a/esphome/components/libretiny/__init__.py +++ b/esphome/components/libretiny/__init__.py @@ -322,15 +322,13 @@ def _configure_lwip(config: dict) -> None: get_socket_counts, ) - raw_tcp, raw_udp, raw_tcp_listen, tcp_details, udp_details, tcp_listen_details = ( - get_socket_counts() - ) + sc = get_socket_counts() # Apply platform minimums — ensure headroom for ESPHome's needs - tcp_sockets = max(MIN_TCP_SOCKETS, raw_tcp) - udp_sockets = max(MIN_UDP_SOCKETS, raw_udp) + tcp_sockets = max(MIN_TCP_SOCKETS, sc.tcp) + udp_sockets = max(MIN_UDP_SOCKETS, sc.udp) # Listening sockets — registered by components (api, ota, web_server_base, etc.) # Not all components register yet, so ensure a minimum for baseline operation. - listening_tcp = max(MIN_TCP_LISTEN_SOCKETS, raw_tcp_listen) + listening_tcp = max(MIN_TCP_LISTEN_SOCKETS, sc.tcp_listen) # TCP_SND_BUF: ESPAsyncWebServer allocates malloc(tcp_sndbuf()) per # response chunk. At 10×MSS=14.6KB (BK default) this causes OOM (#14095). @@ -399,20 +397,20 @@ def _configure_lwip(config: dict) -> None: if CORE.is_bk72xx: lwip_opts.append("PBUF_POOL_SIZE=10") - tcp_min = " (min)" if tcp_sockets > raw_tcp else "" - udp_min = " (min)" if udp_sockets > raw_udp else "" - listen_min = " (min)" if listening_tcp > raw_tcp_listen else "" + tcp_min = " (min)" if tcp_sockets > sc.tcp else "" + udp_min = " (min)" if udp_sockets > sc.udp else "" + listen_min = " (min)" if listening_tcp > sc.tcp_listen else "" _LOGGER.info( "Configuring lwIP: TCP=%d%s [%s], UDP=%d%s [%s], TCP_LISTEN=%d%s [%s]", tcp_sockets, tcp_min, - tcp_details, + sc.tcp_details, udp_sockets, udp_min, - udp_details, + sc.udp_details, listening_tcp, listen_min, - tcp_listen_details, + sc.tcp_listen_details, ) cg.add_platformio_option("custom_options.lwip", lwip_opts) diff --git a/esphome/components/socket/__init__.py b/esphome/components/socket/__init__.py index 572e7993b9..d82f0c7aba 100644 --- a/esphome/components/socket/__init__.py +++ b/esphome/components/socket/__init__.py @@ -1,4 +1,5 @@ from collections.abc import Callable, MutableMapping +from dataclasses import dataclass from enum import StrEnum import logging @@ -77,10 +78,20 @@ def _format_consumers(consumers: dict[str, int]) -> str: return ", ".join(f"{name}={count}" for name, count in sorted(consumers.items())) -def get_socket_counts() -> tuple[int, int, int, str, str, str]: - """Return socket counts and component details for platform configuration. +@dataclass(frozen=True) +class SocketCounts: + """Socket counts and component details for platform configuration.""" - Returns (tcp, udp, tcp_listen, tcp_details, udp_details, tcp_listen_details). + tcp: int + udp: int + tcp_listen: int + tcp_details: str + udp_details: str + tcp_listen_details: str + + +def get_socket_counts() -> SocketCounts: + """Return socket counts and component details for platform configuration. Platforms call this during code generation to configure lwIP socket limits. All components will have registered their needs by then. @@ -106,7 +117,9 @@ def get_socket_counts() -> tuple[int, int, int, str, str, str]: tcp_listen, tcp_listen_details, ) - return tcp, udp, tcp_listen, tcp_details, udp_details, tcp_listen_details + return SocketCounts( + tcp, udp, tcp_listen, tcp_details, udp_details, tcp_listen_details + ) def require_wake_loop_threadsafe() -> None: