From 398923615420643dbda372bb253acddfcaa3bd4d Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Tue, 17 Feb 2026 18:54:25 -0600 Subject: [PATCH 1/8] [api] Split ProtoVarInt::parse into 32-bit and 64-bit phases On 32-bit platforms (ESP32 Xtensa), 64-bit shifts in varint parsing compile to __ashldi3 library calls. Since the vast majority of protobuf varint fields (message types, sizes, enum values, sensor readings) fit in 4 bytes, the 64-bit arithmetic is unnecessary overhead on the common path. Split parse() into two phases: - Bytes 0-3: uint32_t loop with native 32-bit shifts (0, 7, 14, 21) - Bytes 4-9: noinline parse_wide_() with uint64_t, only for BLE addresses and other 64-bit fields The code generator auto-detects which proto messages use int64/uint64/ sint64 fields and emits USE_API_VARINT64 conditionally. On non-BLE configs, parse_wide_() and the 64-bit accessors (as_uint64, as_int64, as_sint64) are compiled out entirely. Saves ~40 bytes flash on non-BLE configs. Benchmark shows 25-50% faster parsing for 1-4 byte varints (the common case). --- esphome/components/api/api_pb2.h | 3 ++ esphome/components/api/proto.cpp | 17 +++++++ esphome/components/api/proto.h | 73 ++++++++++++++--------------- esphome/core/defines.h | 1 + script/api_protobuf/api_protobuf.py | 50 ++++++++++++++++++-- 5 files changed, 100 insertions(+), 44 deletions(-) diff --git a/esphome/components/api/api_pb2.h b/esphome/components/api/api_pb2.h index d001f869c5..8424f3b629 100644 --- a/esphome/components/api/api_pb2.h +++ b/esphome/components/api/api_pb2.h @@ -3,6 +3,9 @@ #pragma once #include "esphome/core/defines.h" +#ifdef USE_BLUETOOTH_PROXY +#define USE_API_VARINT64 +#endif #include "esphome/core/string_ref.h" #include "proto.h" diff --git a/esphome/components/api/proto.cpp b/esphome/components/api/proto.cpp index 2a0ddf91db..74aca55103 100644 --- a/esphome/components/api/proto.cpp +++ b/esphome/components/api/proto.cpp @@ -7,6 +7,23 @@ namespace esphome::api { static const char *const TAG = "api.proto"; +#ifdef USE_API_VARINT64 +optional ProtoVarInt::parse_wide_(const uint8_t *buffer, uint32_t len, uint32_t *consumed, + uint32_t result32) { + uint64_t result64 = result32; + uint32_t limit = std::min(len, uint32_t(10)); + for (uint32_t i = 4; i < limit; i++) { + uint8_t val = buffer[i]; + result64 |= uint64_t(val & 0x7F) << (i * 7); + if ((val & 0x80) == 0) { + *consumed = i + 1; + return ProtoVarInt(result64); + } + } + return {}; +} +#endif + uint32_t ProtoDecodableMessage::count_repeated_field(const uint8_t *buffer, size_t length, uint32_t target_field_id) { uint32_t count = 0; const uint8_t *ptr = buffer; diff --git a/esphome/components/api/proto.h b/esphome/components/api/proto.h index 41ea0043f9..2dc92e46a7 100644 --- a/esphome/components/api/proto.h +++ b/esphome/components/api/proto.h @@ -94,65 +94,60 @@ class ProtoVarInt { explicit ProtoVarInt(uint64_t value) : value_(value) {} static optional parse(const uint8_t *buffer, uint32_t len, uint32_t *consumed) { - if (len == 0) { - if (consumed != nullptr) - *consumed = 0; - return {}; - } - - // Most common case: single-byte varint (values 0-127) - if ((buffer[0] & 0x80) == 0) { - if (consumed != nullptr) - *consumed = 1; - return ProtoVarInt(buffer[0]); - } - - // General case for multi-byte varints - // Since we know buffer[0]'s high bit is set, initialize with its value - uint64_t result = buffer[0] & 0x7F; - uint8_t bitpos = 7; - - // A 64-bit varint is at most 10 bytes (ceil(64/7)). Reject overlong encodings - // to avoid undefined behavior from shifting uint64_t by >= 64 bits. - uint32_t max_len = std::min(len, uint32_t(10)); - - // Start from the second byte since we've already processed the first - for (uint32_t i = 1; i < max_len; i++) { +#ifdef ESPHOME_DEBUG_API + assert(consumed != nullptr); +#endif + // 32-bit phase: bytes 0-3 (shifts 0, 7, 14, 21 — all native on 32-bit platforms) + uint32_t result32 = 0; + uint32_t limit = std::min(len, uint32_t(4)); + for (uint32_t i = 0; i < limit; i++) { uint8_t val = buffer[i]; - result |= uint64_t(val & 0x7F) << uint64_t(bitpos); - bitpos += 7; + result32 |= uint32_t(val & 0x7F) << (i * 7); if ((val & 0x80) == 0) { - if (consumed != nullptr) - *consumed = i + 1; - return ProtoVarInt(result); + *consumed = i + 1; + return ProtoVarInt(result32); } } - - if (consumed != nullptr) - *consumed = 0; - return {}; // Incomplete or invalid varint + // 64-bit phase for values > 28 bits (BLE addresses etc.) +#ifdef USE_API_VARINT64 + return parse_wide_(buffer, len, consumed, result32); +#else + return {}; +#endif } +#ifdef USE_API_VARINT64 + protected: + /// Continue parsing varint bytes 4-9 with 64-bit arithmetic. + /// Separated to keep 64-bit shift code (__ashldi3 on 32-bit platforms) out of the common path. + static optional parse_wide_(const uint8_t *buffer, uint32_t len, uint32_t *consumed, uint32_t result32) + __attribute__((noinline)); + + public: +#endif + constexpr uint16_t as_uint16() const { return this->value_; } constexpr uint32_t as_uint32() const { return this->value_; } - constexpr uint64_t as_uint64() const { return this->value_; } constexpr bool as_bool() const { return this->value_; } constexpr int32_t as_int32() const { // Not ZigZag encoded - return static_cast(this->as_int64()); - } - constexpr int64_t as_int64() const { - // Not ZigZag encoded - return static_cast(this->value_); + return static_cast(this->value_); } constexpr int32_t as_sint32() const { // with ZigZag encoding return decode_zigzag32(static_cast(this->value_)); } +#ifdef USE_API_VARINT64 + constexpr uint64_t as_uint64() const { return this->value_; } + constexpr int64_t as_int64() const { + // Not ZigZag encoded + return static_cast(this->value_); + } constexpr int64_t as_sint64() const { // with ZigZag encoding return decode_zigzag64(this->value_); } +#endif /** * Encode the varint value to a pre-allocated buffer without bounds checking. * diff --git a/esphome/core/defines.h b/esphome/core/defines.h index ee865a7e65..80f4d228ec 100644 --- a/esphome/core/defines.h +++ b/esphome/core/defines.h @@ -136,6 +136,7 @@ #define USE_API_HOMEASSISTANT_SERVICES #define USE_API_HOMEASSISTANT_STATES #define USE_API_NOISE +#define USE_API_VARINT64 #define USE_API_PLAINTEXT #define USE_API_USER_DEFINED_ACTIONS #define USE_API_CUSTOM_SERVICES diff --git a/script/api_protobuf/api_protobuf.py b/script/api_protobuf/api_protobuf.py index 4fbee49dae..2324708324 100755 --- a/script/api_protobuf/api_protobuf.py +++ b/script/api_protobuf/api_protobuf.py @@ -1905,6 +1905,34 @@ def build_type_usage_map( ) +def get_varint64_ifdef( + file_desc: descriptor.FileDescriptorProto, + message_ifdef_map: dict[str, str | None], +) -> tuple[bool, str | None]: + """Check if 64-bit varint fields exist and get their common ifdef guard. + + Returns: + (has_varint64, ifdef_guard) - has_varint64 is True if any fields exist, + ifdef_guard is the common guard or None if unconditional. + """ + varint64_types = { + FieldDescriptorProto.TYPE_INT64, + FieldDescriptorProto.TYPE_UINT64, + FieldDescriptorProto.TYPE_SINT64, + } + ifdefs: set[str | None] = { + message_ifdef_map.get(msg.name) + for msg in file_desc.message_type + if not msg.options.deprecated + for field in msg.field + if not field.options.deprecated and field.type in varint64_types + } + if not ifdefs: + return False, None + ifdefs.discard(None) + return True, ifdefs.pop() if len(ifdefs) == 1 else None + + def build_enum_type(desc, enum_ifdef_map) -> tuple[str, str, str]: """Builds the enum type. @@ -2559,11 +2587,28 @@ def main() -> None: file = d.file[0] + # Build dynamic ifdef mappings early so we can emit USE_API_VARINT64 before includes + enum_ifdef_map, message_ifdef_map, message_source_map, used_messages = ( + build_type_usage_map(file) + ) + + # Find the ifdef guard for 64-bit varint fields (int64/uint64/sint64). + # Emitted before proto.h so parse_wide_() and 64-bit accessors are available. + has_varint64, varint64_guard = get_varint64_ifdef(file, message_ifdef_map) + content = FILE_HEADER content += """\ #pragma once #include "esphome/core/defines.h" +""" + if has_varint64: + content += "\n".join( + wrap_with_ifdef(["#define USE_API_VARINT64"], varint64_guard) + ) + content += "\n" + + content += """\ #include "esphome/core/string_ref.h" #include "proto.h" @@ -2694,11 +2739,6 @@ static void dump_bytes_field(DumpBuffer &out, const char *field_name, const uint content += "namespace enums {\n\n" - # Build dynamic ifdef mappings for both enums and messages - enum_ifdef_map, message_ifdef_map, message_source_map, used_messages = ( - build_type_usage_map(file) - ) - # Simple grouping of enums by ifdef current_ifdef = None From 9457e54e5d057155272aa939e4e291cb164f06e9 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Tue, 17 Feb 2026 19:00:55 -0600 Subject: [PATCH 2/8] Rename parse_wide_ to parse_wide per clang-tidy naming convention --- esphome/components/api/proto.cpp | 4 ++-- esphome/components/api/proto.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/esphome/components/api/proto.cpp b/esphome/components/api/proto.cpp index bd72decb72..7f8a235ebe 100644 --- a/esphome/components/api/proto.cpp +++ b/esphome/components/api/proto.cpp @@ -8,8 +8,8 @@ namespace esphome::api { static const char *const TAG = "api.proto"; #ifdef USE_API_VARINT64 -optional ProtoVarInt::parse_wide_(const uint8_t *buffer, uint32_t len, uint32_t *consumed, - uint32_t result32) { +optional ProtoVarInt::parse_wide(const uint8_t *buffer, uint32_t len, uint32_t *consumed, + uint32_t result32) { uint64_t result64 = result32; uint32_t limit = std::min(len, uint32_t(10)); for (uint32_t i = 4; i < limit; i++) { diff --git a/esphome/components/api/proto.h b/esphome/components/api/proto.h index b386dfaff6..d8ac7061e1 100644 --- a/esphome/components/api/proto.h +++ b/esphome/components/api/proto.h @@ -121,7 +121,7 @@ class ProtoVarInt { } // 64-bit phase for values > 28 bits (BLE addresses etc.) #ifdef USE_API_VARINT64 - return parse_wide_(buffer, len, consumed, result32); + return parse_wide(buffer, len, consumed, result32); #else return {}; #endif @@ -131,7 +131,7 @@ class ProtoVarInt { protected: /// Continue parsing varint bytes 4-9 with 64-bit arithmetic. /// Separated to keep 64-bit shift code (__ashldi3 on 32-bit platforms) out of the common path. - static optional parse_wide_(const uint8_t *buffer, uint32_t len, uint32_t *consumed, uint32_t result32) + static optional parse_wide(const uint8_t *buffer, uint32_t len, uint32_t *consumed, uint32_t result32) __attribute__((noinline)); public: From 3e08cb595d33db96df0f3a77e12ed4673512f777 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 18 Feb 2026 07:58:14 -0600 Subject: [PATCH 3/8] no widen --- esphome/components/api/proto.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/esphome/components/api/proto.h b/esphome/components/api/proto.h index d8ac7061e1..ee2fd8be7f 100644 --- a/esphome/components/api/proto.h +++ b/esphome/components/api/proto.h @@ -161,7 +161,11 @@ class ProtoVarInt { #endif protected: +#ifdef USE_API_VARINT64 uint64_t value_; +#else + uint32_t value_; +#endif }; // Forward declarations for decode_to_message, encode_message and encode_packed_sint32 From 67034c966d8b01b44f9b104b0fccc001e273f190 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 18 Feb 2026 08:14:37 -0600 Subject: [PATCH 4/8] no widen --- esphome/components/api/proto.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/esphome/components/api/proto.h b/esphome/components/api/proto.h index ee2fd8be7f..bb8cad7b70 100644 --- a/esphome/components/api/proto.h +++ b/esphome/components/api/proto.h @@ -119,12 +119,23 @@ class ProtoVarInt { return ProtoVarInt(result32); } } - // 64-bit phase for values > 28 bits (BLE addresses etc.) + // Byte 4: handles uint32 values >= 2^28 that need 5 varint bytes + // Only lower 4 bits contribute to bits 28-31 of the uint32 result + if (len > 4) { + uint8_t val = buffer[4]; + result32 |= uint32_t(val & 0x0F) << 28; + if ((val & 0x80) == 0) { + *consumed = 5; + return ProtoVarInt(result32); + } + // Varint continues past byte 4 — needs 64-bit (BLE addresses etc.) #ifdef USE_API_VARINT64 - return parse_wide(buffer, len, consumed, result32); + return parse_wide(buffer, len, consumed, result32); #else - return {}; + return {}; #endif + } + return {}; } #ifdef USE_API_VARINT64 From 4d2051ad8f7d969dd46267df8ae0dc5292baa3a6 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 18 Feb 2026 08:16:55 -0600 Subject: [PATCH 5/8] no widen --- esphome/components/api/proto.h | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/esphome/components/api/proto.h b/esphome/components/api/proto.h index bb8cad7b70..ec695b44f0 100644 --- a/esphome/components/api/proto.h +++ b/esphome/components/api/proto.h @@ -108,9 +108,12 @@ class ProtoVarInt { #ifdef ESPHOME_DEBUG_API assert(consumed != nullptr); #endif - // 32-bit phase: bytes 0-3 (shifts 0, 7, 14, 21 — all native on 32-bit platforms) + // 32-bit phase: bytes 0-4 cover all uint32 varint values + // Byte 4 shift (28) may truncate upper 3 bits in uint32, but those are + // always zero for valid uint32 values; parse_wide re-processes byte 4 + // with full 64-bit arithmetic when the varint continues past byte 4. uint32_t result32 = 0; - uint32_t limit = std::min(len, uint32_t(4)); + uint32_t limit = std::min(len, uint32_t(5)); for (uint32_t i = 0; i < limit; i++) { uint8_t val = buffer[i]; result32 |= uint32_t(val & 0x7F) << (i * 7); @@ -119,23 +122,12 @@ class ProtoVarInt { return ProtoVarInt(result32); } } - // Byte 4: handles uint32 values >= 2^28 that need 5 varint bytes - // Only lower 4 bits contribute to bits 28-31 of the uint32 result - if (len > 4) { - uint8_t val = buffer[4]; - result32 |= uint32_t(val & 0x0F) << 28; - if ((val & 0x80) == 0) { - *consumed = 5; - return ProtoVarInt(result32); - } - // Varint continues past byte 4 — needs 64-bit (BLE addresses etc.) + // 64-bit phase for values > 32 bits (BLE addresses etc.) #ifdef USE_API_VARINT64 - return parse_wide(buffer, len, consumed, result32); + return parse_wide(buffer, len, consumed, result32); #else - return {}; -#endif - } return {}; +#endif } #ifdef USE_API_VARINT64 From bd78b546c8e14667f44298d35d8ff228b6b31ee0 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 18 Feb 2026 08:26:04 -0600 Subject: [PATCH 6/8] fix --- esphome/components/api/proto.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/esphome/components/api/proto.h b/esphome/components/api/proto.h index ec695b44f0..bb4abdfc49 100644 --- a/esphome/components/api/proto.h +++ b/esphome/components/api/proto.h @@ -108,12 +108,17 @@ class ProtoVarInt { #ifdef ESPHOME_DEBUG_API assert(consumed != nullptr); #endif - // 32-bit phase: bytes 0-4 cover all uint32 varint values - // Byte 4 shift (28) may truncate upper 3 bits in uint32, but those are - // always zero for valid uint32 values; parse_wide re-processes byte 4 - // with full 64-bit arithmetic when the varint continues past byte 4. + // 32-bit phase: shifts 0, 7, 14, 21 are native on 32-bit platforms. + // Without USE_API_VARINT64: also cover byte 4 (shift 28) — the uint32_t + // shift truncates upper bits but those are always zero for valid uint32 values. + // With USE_API_VARINT64: stop at byte 3 so parse_wide handles byte 4+ + // with full 64-bit arithmetic (avoids truncating values > UINT32_MAX). uint32_t result32 = 0; +#ifdef USE_API_VARINT64 + uint32_t limit = std::min(len, uint32_t(4)); +#else uint32_t limit = std::min(len, uint32_t(5)); +#endif for (uint32_t i = 0; i < limit; i++) { uint8_t val = buffer[i]; result32 |= uint32_t(val & 0x7F) << (i * 7); @@ -122,7 +127,7 @@ class ProtoVarInt { return ProtoVarInt(result32); } } - // 64-bit phase for values > 32 bits (BLE addresses etc.) + // 64-bit phase for remaining bytes (BLE addresses etc.) #ifdef USE_API_VARINT64 return parse_wide(buffer, len, consumed, result32); #else From bc2dbd3cf5096ee81064ba5553ba32388897c59f Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 18 Feb 2026 08:35:00 -0600 Subject: [PATCH 7/8] Add integration test for 5-byte varint device_id parsing Device IDs are FNV hashes (uint32) that frequently exceed 2^28, requiring 5 varint bytes. This test verifies the firmware correctly decodes these values in incoming SwitchCommandRequest messages and encodes them in state responses. --- .../fixtures/varint_five_byte_device_id.yaml | 47 +++++++ .../test_varint_five_byte_device_id.py | 120 ++++++++++++++++++ 2 files changed, 167 insertions(+) create mode 100644 tests/integration/fixtures/varint_five_byte_device_id.yaml create mode 100644 tests/integration/test_varint_five_byte_device_id.py diff --git a/tests/integration/fixtures/varint_five_byte_device_id.yaml b/tests/integration/fixtures/varint_five_byte_device_id.yaml new file mode 100644 index 0000000000..08259869ca --- /dev/null +++ b/tests/integration/fixtures/varint_five_byte_device_id.yaml @@ -0,0 +1,47 @@ +esphome: + name: varint-5byte-test + # Define areas and devices - device_ids will be FNV hashes > 2^28, + # requiring 5-byte varint encoding that exercises the 32-bit parse boundary. + areas: + - id: test_area + name: Test Area + devices: + - id: sub_device_one + name: Sub Device One + area_id: test_area + - id: sub_device_two + name: Sub Device Two + area_id: test_area + +host: +api: +logger: + +# Switches on sub-devices so we can send commands with large device_id varints +switch: + - platform: template + name: Device Switch + device_id: sub_device_one + id: device_switch_one + optimistic: true + turn_on_action: + - logger.log: "Switch one on" + turn_off_action: + - logger.log: "Switch one off" + + - platform: template + name: Device Switch + device_id: sub_device_two + id: device_switch_two + optimistic: true + turn_on_action: + - logger.log: "Switch two on" + turn_off_action: + - logger.log: "Switch two off" + +sensor: + - platform: template + name: Device Sensor + device_id: sub_device_one + lambda: return 42.0; + update_interval: 0.1s diff --git a/tests/integration/test_varint_five_byte_device_id.py b/tests/integration/test_varint_five_byte_device_id.py new file mode 100644 index 0000000000..d34c2f03d6 --- /dev/null +++ b/tests/integration/test_varint_five_byte_device_id.py @@ -0,0 +1,120 @@ +"""Integration test for 5-byte varint parsing of device_id fields. + +Device IDs are FNV hashes (uint32) that frequently exceed 2^28 (268435456), +requiring 5 varint bytes. This test verifies that: +1. The firmware correctly decodes 5-byte varint device_id in incoming commands +2. The firmware correctly encodes large device_id values in state responses +3. Switch commands with large device_id reach the correct entity +""" + +from __future__ import annotations + +import asyncio + +from aioesphomeapi import EntityState, SwitchInfo, SwitchState +import pytest + +from .types import APIClientConnectedFactory, RunCompiledFunction + + +@pytest.mark.asyncio +async def test_varint_five_byte_device_id( + yaml_config: str, + run_compiled: RunCompiledFunction, + api_client_connected: APIClientConnectedFactory, +) -> None: + """Test that device_id values requiring 5-byte varints parse correctly.""" + async with run_compiled(yaml_config), api_client_connected() as client: + device_info = await client.device_info() + devices = device_info.devices + assert len(devices) >= 2, f"Expected at least 2 devices, got {len(devices)}" + + # Verify at least one device_id exceeds the 4-byte varint boundary (2^28) + large_ids = [d for d in devices if d.device_id >= (1 << 28)] + assert len(large_ids) > 0, ( + "Expected at least one device_id >= 2^28 to exercise 5-byte varint path. " + f"Got device_ids: {[d.device_id for d in devices]}" + ) + + # Get entities + all_entities, _ = await client.list_entities_services() + switch_entities = [e for e in all_entities if isinstance(e, SwitchInfo)] + + # Find switches named "Device Switch" — one per sub-device + device_switches = [e for e in switch_entities if e.name == "Device Switch"] + assert len(device_switches) == 2, ( + f"Expected 2 'Device Switch' entities, got {len(device_switches)}" + ) + + # Verify switches have different device_ids matching the sub-devices + switch_device_ids = {s.device_id for s in device_switches} + assert len(switch_device_ids) == 2, "Switches should have different device_ids" + + # Subscribe to states and wait for initial states + loop = asyncio.get_running_loop() + states: dict[tuple[int, int], EntityState] = {} + switch_futures: dict[tuple[int, int], asyncio.Future[EntityState]] = {} + initial_done: asyncio.Future[bool] = loop.create_future() + + def on_state(state: EntityState) -> None: + key = (state.device_id, state.key) + states[key] = state + + if len(states) >= 3 and not initial_done.done(): + initial_done.set_result(True) + + if initial_done.done() and key in switch_futures: + fut = switch_futures[key] + if not fut.done() and isinstance(state, SwitchState): + fut.set_result(state) + + client.subscribe_states(on_state) + + try: + await asyncio.wait_for(initial_done, timeout=10.0) + except TimeoutError: + pytest.fail( + f"Timed out waiting for initial states. Got {len(states)} states" + ) + + # Verify state responses contain correct large device_id values + for device in devices: + device_states = [ + s for (did, _), s in states.items() if did == device.device_id + ] + assert len(device_states) > 0, ( + f"No states received for device '{device.name}' " + f"(device_id={device.device_id})" + ) + + # Test switch commands with large device_id varints — + # this is the critical path: the client encodes device_id as a varint + # in the SwitchCommandRequest, and the firmware must decode it correctly. + for switch in device_switches: + state_key = (switch.device_id, switch.key) + + # Turn on + switch_futures[state_key] = loop.create_future() + client.switch_command(switch.key, True, device_id=switch.device_id) + try: + await asyncio.wait_for(switch_futures[state_key], timeout=2.0) + except TimeoutError: + pytest.fail( + f"Timed out waiting for switch ON state " + f"(device_id={switch.device_id}, key={switch.key}). " + f"This likely means the firmware failed to decode the " + f"5-byte varint device_id in SwitchCommandRequest." + ) + assert states[state_key].state is True + + # Turn off + switch_futures[state_key] = loop.create_future() + client.switch_command(switch.key, False, device_id=switch.device_id) + try: + await asyncio.wait_for(switch_futures[state_key], timeout=2.0) + except TimeoutError: + pytest.fail( + f"Timed out waiting for switch OFF state " + f"(device_id={switch.device_id}, key={switch.key})" + ) + assert states[state_key].state is False From 99a77e95499206487f3d908939d6752ebfa6bd94 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 18 Feb 2026 08:44:13 -0600 Subject: [PATCH 8/8] Add single-byte fast path to ProtoVarInt::parse Single-byte varints (0-127) are the most common case in protobuf messages (booleans, small enums, field tags). Skip the loop entirely for these values by checking the first byte before entering the multi-byte parsing loop. --- esphome/components/api/proto.h | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/esphome/components/api/proto.h b/esphome/components/api/proto.h index bb4abdfc49..69b0440e38 100644 --- a/esphome/components/api/proto.h +++ b/esphome/components/api/proto.h @@ -108,18 +108,26 @@ class ProtoVarInt { #ifdef ESPHOME_DEBUG_API assert(consumed != nullptr); #endif - // 32-bit phase: shifts 0, 7, 14, 21 are native on 32-bit platforms. - // Without USE_API_VARINT64: also cover byte 4 (shift 28) — the uint32_t - // shift truncates upper bits but those are always zero for valid uint32 values. - // With USE_API_VARINT64: stop at byte 3 so parse_wide handles byte 4+ - // with full 64-bit arithmetic (avoids truncating values > UINT32_MAX). - uint32_t result32 = 0; + if (len == 0) + return {}; + // Fast path: single-byte varints (0-127) are the most common case + // (booleans, small enums, field tags). Avoid loop overhead entirely. + if ((buffer[0] & 0x80) == 0) { + *consumed = 1; + return ProtoVarInt(buffer[0]); + } + // 32-bit phase: process remaining bytes with native 32-bit shifts. + // Without USE_API_VARINT64: cover bytes 1-4 (shifts 7, 14, 21, 28) — the uint32_t + // shift at byte 4 truncates upper bits but those are always zero for valid uint32 values. + // With USE_API_VARINT64: cover bytes 1-3 (shifts 7, 14, 21) so parse_wide handles + // byte 4+ with full 64-bit arithmetic (avoids truncating values > UINT32_MAX). + uint32_t result32 = buffer[0] & 0x7F; #ifdef USE_API_VARINT64 uint32_t limit = std::min(len, uint32_t(4)); #else uint32_t limit = std::min(len, uint32_t(5)); #endif - for (uint32_t i = 0; i < limit; i++) { + for (uint32_t i = 1; i < limit; i++) { uint8_t val = buffer[i]; result32 |= uint32_t(val & 0x7F) << (i * 7); if ((val & 0x80) == 0) {