[platformio] Add exponential backoff and session reset to download retries (#14191)

This commit is contained in:
J. Nick Koston
2026-02-21 19:41:43 -06:00
committed by GitHub
parent a468261523
commit d5c9c56fdf
2 changed files with 231 additions and 6 deletions

View File

@@ -5,6 +5,7 @@ import os
from pathlib import Path
import re
import subprocess
import time
from typing import Any
from esphome.const import CONF_COMPILE_PROCESS_LIMIT, CONF_ESPHOME, KEY_CORE
@@ -44,31 +45,61 @@ def patch_structhash():
def patch_file_downloader():
"""Patch PlatformIO's FileDownloader to retry on PackageException errors."""
"""Patch PlatformIO's FileDownloader to retry on PackageException errors.
PlatformIO's FileDownloader uses HTTPSession which lacks built-in retry
for 502/503 errors. We add retries with exponential backoff and close the
session between attempts to force a fresh TCP connection, which may route
to a different CDN edge node.
"""
from platformio.package.download import FileDownloader
from platformio.package.exception import PackageException
if getattr(FileDownloader.__init__, "_esphome_patched", False):
return
original_init = FileDownloader.__init__
def patched_init(self, *args: Any, **kwargs: Any) -> None:
max_retries = 3
max_retries = 5
for attempt in range(max_retries):
try:
return original_init(self, *args, **kwargs)
original_init(self, *args, **kwargs)
return
except PackageException as e:
if attempt < max_retries - 1:
# Exponential backoff: 2, 4, 8, 16 seconds
delay = 2 ** (attempt + 1)
_LOGGER.warning(
"Package download failed: %s. Retrying... (attempt %d/%d)",
"Package download failed: %s. "
"Retrying in %d seconds... (attempt %d/%d)",
str(e),
delay,
attempt + 1,
max_retries,
)
# Close the response and session to free resources
# and force a new TCP connection on retry, which may
# route to a different CDN edge node
# pylint: disable=protected-access,broad-except
try:
if (
hasattr(self, "_http_response")
and self._http_response is not None
):
self._http_response.close()
if hasattr(self, "_http_session"):
self._http_session.close()
except Exception:
pass
# pylint: enable=protected-access,broad-except
time.sleep(delay)
else:
# Final attempt - re-raise
raise
return None
patched_init._esphome_patched = True # type: ignore[attr-defined] # pylint: disable=protected-access
FileDownloader.__init__ = patched_init

View File

@@ -6,7 +6,7 @@ import os
from pathlib import Path
import shutil
from types import SimpleNamespace
from unittest.mock import MagicMock, Mock, patch
from unittest.mock import MagicMock, Mock, call, patch
import pytest
@@ -673,6 +673,200 @@ def test_process_stacktrace_bad_alloc(
assert state is False
def test_patch_file_downloader_succeeds_first_try() -> None:
"""Test patch_file_downloader succeeds on first attempt."""
mock_exception_cls = type("PackageException", (Exception,), {})
original_init = MagicMock()
with patch.dict(
"sys.modules",
{
"platformio": MagicMock(),
"platformio.package": MagicMock(),
"platformio.package.download": SimpleNamespace(
FileDownloader=type("FileDownloader", (), {"__init__": original_init})
),
"platformio.package.exception": SimpleNamespace(
PackageException=mock_exception_cls
),
},
):
platformio_api.patch_file_downloader()
from platformio.package.download import FileDownloader
instance = object.__new__(FileDownloader)
FileDownloader.__init__(instance, "http://example.com/file.zip")
original_init.assert_called_once()
def test_patch_file_downloader_retries_on_failure() -> None:
"""Test patch_file_downloader retries with backoff on PackageException."""
mock_exception_cls = type("PackageException", (Exception,), {})
call_count = 0
def failing_init(self, *args, **kwargs):
nonlocal call_count
call_count += 1
if call_count < 3:
raise mock_exception_cls(f"502 error attempt {call_count}")
with (
patch.dict(
"sys.modules",
{
"platformio": MagicMock(),
"platformio.package": MagicMock(),
"platformio.package.download": SimpleNamespace(
FileDownloader=type(
"FileDownloader", (), {"__init__": failing_init}
)
),
"platformio.package.exception": SimpleNamespace(
PackageException=mock_exception_cls
),
},
),
patch("time.sleep") as mock_sleep,
):
platformio_api.patch_file_downloader()
from platformio.package.download import FileDownloader
instance = object.__new__(FileDownloader)
FileDownloader.__init__(instance, "http://example.com/file.zip")
# Should have been called 3 times (2 failures + 1 success)
assert call_count == 3
# Should have slept with exponential backoff: 2s, 4s
assert mock_sleep.call_count == 2
mock_sleep.assert_any_call(2)
mock_sleep.assert_any_call(4)
def test_patch_file_downloader_raises_after_max_retries() -> None:
"""Test patch_file_downloader raises after exhausting all retries."""
mock_exception_cls = type("PackageException", (Exception,), {})
def always_failing_init(self, *args, **kwargs):
raise mock_exception_cls("502 error")
with (
patch.dict(
"sys.modules",
{
"platformio": MagicMock(),
"platformio.package": MagicMock(),
"platformio.package.download": SimpleNamespace(
FileDownloader=type(
"FileDownloader", (), {"__init__": always_failing_init}
)
),
"platformio.package.exception": SimpleNamespace(
PackageException=mock_exception_cls
),
},
),
patch("time.sleep") as mock_sleep,
):
platformio_api.patch_file_downloader()
from platformio.package.download import FileDownloader
instance = object.__new__(FileDownloader)
with pytest.raises(mock_exception_cls, match="502 error"):
FileDownloader.__init__(instance, "http://example.com/file.zip")
# Should have slept 4 times (before attempts 2-5), not on final attempt
assert mock_sleep.call_count == 4
mock_sleep.assert_has_calls([call(2), call(4), call(8), call(16)])
def test_patch_file_downloader_closes_session_and_response_between_retries() -> None:
"""Test patch_file_downloader closes HTTP session and response between retries."""
mock_exception_cls = type("PackageException", (Exception,), {})
mock_session = MagicMock()
mock_response = MagicMock()
call_count = 0
def failing_init_with_session(self, *args, **kwargs):
nonlocal call_count
call_count += 1
self._http_session = mock_session
self._http_response = mock_response
if call_count < 2:
raise mock_exception_cls("502 error")
with (
patch.dict(
"sys.modules",
{
"platformio": MagicMock(),
"platformio.package": MagicMock(),
"platformio.package.download": SimpleNamespace(
FileDownloader=type(
"FileDownloader",
(),
{"__init__": failing_init_with_session},
)
),
"platformio.package.exception": SimpleNamespace(
PackageException=mock_exception_cls
),
},
),
patch("time.sleep"),
):
platformio_api.patch_file_downloader()
from platformio.package.download import FileDownloader
instance = object.__new__(FileDownloader)
FileDownloader.__init__(instance, "http://example.com/file.zip")
# Both response and session should have been closed between retries
mock_response.close.assert_called_once()
mock_session.close.assert_called_once()
def test_patch_file_downloader_idempotent() -> None:
"""Test patch_file_downloader does not stack wrappers when called multiple times."""
mock_exception_cls = type("PackageException", (Exception,), {})
call_count = 0
def counting_init(self, *args, **kwargs):
nonlocal call_count
call_count += 1
with patch.dict(
"sys.modules",
{
"platformio": MagicMock(),
"platformio.package": MagicMock(),
"platformio.package.download": SimpleNamespace(
FileDownloader=type("FileDownloader", (), {"__init__": counting_init})
),
"platformio.package.exception": SimpleNamespace(
PackageException=mock_exception_cls
),
},
):
# Patch multiple times
platformio_api.patch_file_downloader()
platformio_api.patch_file_downloader()
platformio_api.patch_file_downloader()
from platformio.package.download import FileDownloader
instance = object.__new__(FileDownloader)
FileDownloader.__init__(instance, "http://example.com/file.zip")
# Should only be called once, not 3 times from stacked wrappers
assert call_count == 1
def test_platformio_log_filter_allows_non_platformio_messages() -> None:
"""Test that non-platformio logger messages are allowed through."""
log_filter = platformio_api.PlatformioLogFilter()