From 9906724828b10fc1640982e6958d7db6c18a6955 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 2 Jan 2026 10:56:17 -1000 Subject: [PATCH 1/2] [api] Enable zero-copy bytes for VoiceAssistantAudio and other SOURCE_BOTH messages --- esphome/components/api/api.proto | 4 +-- esphome/components/api/api_pb2.cpp | 10 ++++--- esphome/components/api/api_pb2.h | 27 ++++++++----------- esphome/components/api/api_pb2_dump.cpp | 6 +---- .../voice_assistant/voice_assistant.cpp | 15 ++++++----- script/api_protobuf/api_protobuf.py | 6 ++--- 6 files changed, 31 insertions(+), 37 deletions(-) diff --git a/esphome/components/api/api.proto b/esphome/components/api/api.proto index fc05947774..f508a9c1e7 100644 --- a/esphome/components/api/api.proto +++ b/esphome/components/api/api.proto @@ -2425,7 +2425,7 @@ message ZWaveProxyFrame { option (ifdef) = "USE_ZWAVE_PROXY"; option (no_delay) = true; - bytes data = 1 [(pointer_to_buffer) = true]; + bytes data = 1; } enum ZWaveProxyRequestType { @@ -2439,5 +2439,5 @@ message ZWaveProxyRequest { option (ifdef) = "USE_ZWAVE_PROXY"; ZWaveProxyRequestType type = 1; - bytes data = 2 [(pointer_to_buffer) = true]; + bytes data = 2; } diff --git a/esphome/components/api/api_pb2.cpp b/esphome/components/api/api_pb2.cpp index edd6dfc6a9..c6caeedb89 100644 --- a/esphome/components/api/api_pb2.cpp +++ b/esphome/components/api/api_pb2.cpp @@ -2527,20 +2527,22 @@ bool VoiceAssistantAudio::decode_varint(uint32_t field_id, ProtoVarInt value) { } bool VoiceAssistantAudio::decode_length(uint32_t field_id, ProtoLengthDelimited value) { switch (field_id) { - case 1: - this->data = value.as_string(); + case 1: { + this->data = value.data(); + this->data_len = value.size(); break; + } default: return false; } return true; } void VoiceAssistantAudio::encode(ProtoWriteBuffer buffer) const { - buffer.encode_bytes(1, this->data_ptr_, this->data_len_); + buffer.encode_bytes(1, this->data, this->data_len); buffer.encode_bool(2, this->end); } void VoiceAssistantAudio::calculate_size(ProtoSize &size) const { - size.add_length(1, this->data_len_); + size.add_length(1, this->data_len); size.add_bool(1, this->end); } bool VoiceAssistantTimerEventResponse::decode_varint(uint32_t field_id, ProtoVarInt value) { diff --git a/esphome/components/api/api_pb2.h b/esphome/components/api/api_pb2.h index 2579ebbae2..c635be4cc5 100644 --- a/esphome/components/api/api_pb2.h +++ b/esphome/components/api/api_pb2.h @@ -1046,7 +1046,7 @@ class SubscribeLogsRequest final : public ProtoDecodableMessage { class SubscribeLogsResponse final : public ProtoMessage { public: static constexpr uint8_t MESSAGE_TYPE = 29; - static constexpr uint8_t ESTIMATED_SIZE = 11; + static constexpr uint8_t ESTIMATED_SIZE = 21; #ifdef HAS_PROTO_MESSAGE_DUMP const char *message_name() const override { return "subscribe_logs_response"; } #endif @@ -1069,7 +1069,7 @@ class SubscribeLogsResponse final : public ProtoMessage { class NoiseEncryptionSetKeyRequest final : public ProtoDecodableMessage { public: static constexpr uint8_t MESSAGE_TYPE = 124; - static constexpr uint8_t ESTIMATED_SIZE = 9; + static constexpr uint8_t ESTIMATED_SIZE = 19; #ifdef HAS_PROTO_MESSAGE_DUMP const char *message_name() const override { return "noise_encryption_set_key_request"; } #endif @@ -1161,7 +1161,7 @@ class HomeassistantActionRequest final : public ProtoMessage { class HomeassistantActionResponse final : public ProtoDecodableMessage { public: static constexpr uint8_t MESSAGE_TYPE = 130; - static constexpr uint8_t ESTIMATED_SIZE = 24; + static constexpr uint8_t ESTIMATED_SIZE = 34; #ifdef HAS_PROTO_MESSAGE_DUMP const char *message_name() const override { return "homeassistant_action_response"; } #endif @@ -1388,7 +1388,7 @@ class ListEntitiesCameraResponse final : public InfoResponseProtoMessage { class CameraImageResponse final : public StateResponseProtoMessage { public: static constexpr uint8_t MESSAGE_TYPE = 44; - static constexpr uint8_t ESTIMATED_SIZE = 20; + static constexpr uint8_t ESTIMATED_SIZE = 30; #ifdef HAS_PROTO_MESSAGE_DUMP const char *message_name() const override { return "camera_image_response"; } #endif @@ -2123,7 +2123,7 @@ class BluetoothGATTReadRequest final : public ProtoDecodableMessage { class BluetoothGATTReadResponse final : public ProtoMessage { public: static constexpr uint8_t MESSAGE_TYPE = 74; - static constexpr uint8_t ESTIMATED_SIZE = 17; + static constexpr uint8_t ESTIMATED_SIZE = 27; #ifdef HAS_PROTO_MESSAGE_DUMP const char *message_name() const override { return "bluetooth_gatt_read_response"; } #endif @@ -2146,7 +2146,7 @@ class BluetoothGATTReadResponse final : public ProtoMessage { class BluetoothGATTWriteRequest final : public ProtoDecodableMessage { public: static constexpr uint8_t MESSAGE_TYPE = 75; - static constexpr uint8_t ESTIMATED_SIZE = 19; + static constexpr uint8_t ESTIMATED_SIZE = 29; #ifdef HAS_PROTO_MESSAGE_DUMP const char *message_name() const override { return "bluetooth_gatt_write_request"; } #endif @@ -2182,7 +2182,7 @@ class BluetoothGATTReadDescriptorRequest final : public ProtoDecodableMessage { class BluetoothGATTWriteDescriptorRequest final : public ProtoDecodableMessage { public: static constexpr uint8_t MESSAGE_TYPE = 77; - static constexpr uint8_t ESTIMATED_SIZE = 17; + static constexpr uint8_t ESTIMATED_SIZE = 27; #ifdef HAS_PROTO_MESSAGE_DUMP const char *message_name() const override { return "bluetooth_gatt_write_descriptor_request"; } #endif @@ -2218,7 +2218,7 @@ class BluetoothGATTNotifyRequest final : public ProtoDecodableMessage { class BluetoothGATTNotifyDataResponse final : public ProtoMessage { public: static constexpr uint8_t MESSAGE_TYPE = 79; - static constexpr uint8_t ESTIMATED_SIZE = 17; + static constexpr uint8_t ESTIMATED_SIZE = 27; #ifdef HAS_PROTO_MESSAGE_DUMP const char *message_name() const override { return "bluetooth_gatt_notify_data_response"; } #endif @@ -2521,17 +2521,12 @@ class VoiceAssistantEventResponse final : public ProtoDecodableMessage { class VoiceAssistantAudio final : public ProtoDecodableMessage { public: static constexpr uint8_t MESSAGE_TYPE = 106; - static constexpr uint8_t ESTIMATED_SIZE = 11; + static constexpr uint8_t ESTIMATED_SIZE = 21; #ifdef HAS_PROTO_MESSAGE_DUMP const char *message_name() const override { return "voice_assistant_audio"; } #endif - std::string data{}; - const uint8_t *data_ptr_{nullptr}; - size_t data_len_{0}; - void set_data(const uint8_t *data, size_t len) { - this->data_ptr_ = data; - this->data_len_ = len; - } + const uint8_t *data{nullptr}; + uint16_t data_len{0}; bool end{false}; void encode(ProtoWriteBuffer buffer) const override; void calculate_size(ProtoSize &size) const override; diff --git a/esphome/components/api/api_pb2_dump.cpp b/esphome/components/api/api_pb2_dump.cpp index 567f10fcc0..15db306d5f 100644 --- a/esphome/components/api/api_pb2_dump.cpp +++ b/esphome/components/api/api_pb2_dump.cpp @@ -1978,11 +1978,7 @@ void VoiceAssistantEventResponse::dump_to(std::string &out) const { void VoiceAssistantAudio::dump_to(std::string &out) const { MessageDumpHelper helper(out, "VoiceAssistantAudio"); out.append(" data: "); - if (this->data_ptr_ != nullptr) { - out.append(format_hex_pretty(this->data_ptr_, this->data_len_)); - } else { - out.append(format_hex_pretty(reinterpret_cast(this->data.data()), this->data.size())); - } + out.append(format_hex_pretty(this->data, this->data_len)); out.append("\n"); dump_field(out, "end", this->end); } diff --git a/esphome/components/voice_assistant/voice_assistant.cpp b/esphome/components/voice_assistant/voice_assistant.cpp index 9bb5393be2..8101d210b3 100644 --- a/esphome/components/voice_assistant/voice_assistant.cpp +++ b/esphome/components/voice_assistant/voice_assistant.cpp @@ -272,7 +272,8 @@ void VoiceAssistant::loop() { size_t read_bytes = this->ring_buffer_->read((void *) this->send_buffer_, SEND_BUFFER_SIZE, 0); if (this->audio_mode_ == AUDIO_MODE_API) { api::VoiceAssistantAudio msg; - msg.set_data(this->send_buffer_, read_bytes); + msg.data = this->send_buffer_; + msg.data_len = read_bytes; this->api_client_->send_message(msg, api::VoiceAssistantAudio::MESSAGE_TYPE); } else { if (!this->udp_socket_running_) { @@ -841,12 +842,12 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) { void VoiceAssistant::on_audio(const api::VoiceAssistantAudio &msg) { #ifdef USE_SPEAKER // We should never get to this function if there is no speaker anyway if ((this->speaker_ != nullptr) && (this->speaker_buffer_ != nullptr)) { - if (this->speaker_buffer_index_ + msg.data.length() < SPEAKER_BUFFER_SIZE) { - memcpy(this->speaker_buffer_ + this->speaker_buffer_index_, msg.data.data(), msg.data.length()); - this->speaker_buffer_index_ += msg.data.length(); - this->speaker_buffer_size_ += msg.data.length(); - this->speaker_bytes_received_ += msg.data.length(); - ESP_LOGV(TAG, "Received audio: %u bytes from API", msg.data.length()); + if (this->speaker_buffer_index_ + msg.data_len < SPEAKER_BUFFER_SIZE) { + memcpy(this->speaker_buffer_ + this->speaker_buffer_index_, msg.data, msg.data_len); + this->speaker_buffer_index_ += msg.data_len; + this->speaker_buffer_size_ += msg.data_len; + this->speaker_bytes_received_ += msg.data_len; + ESP_LOGV(TAG, "Received audio: %u bytes from API", msg.data_len); } else { ESP_LOGE(TAG, "Cannot receive audio, buffer is full"); } diff --git a/script/api_protobuf/api_protobuf.py b/script/api_protobuf/api_protobuf.py index 5b68c6a3d2..7293f2abbc 100755 --- a/script/api_protobuf/api_protobuf.py +++ b/script/api_protobuf/api_protobuf.py @@ -362,12 +362,12 @@ def create_field_type_info( # Traditional fixed array approach with copy (takes priority) return FixedArrayBytesType(field, fixed_size) - # For SOURCE_CLIENT only messages (decode but no encode), use pointer + # For messages that decode (SOURCE_CLIENT or SOURCE_BOTH), use pointer # for zero-copy access to the receive buffer - if needs_decode and not needs_encode: + if needs_decode: return PointerToBytesBufferType(field, None) - # For SOURCE_BOTH/SOURCE_SERVER, explicit annotation is still needed + # For SOURCE_SERVER (encode only), explicit annotation is still needed if get_field_opt(field, pb.pointer_to_buffer, False): return PointerToBytesBufferType(field, None) From 8a5e06b6d23e17f85d0335dca7f11abf93a16576 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 2 Jan 2026 14:08:09 -1000 Subject: [PATCH 2/2] merge --- esphome/components/api/api_pb2.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/esphome/components/api/api_pb2.h b/esphome/components/api/api_pb2.h index cd9bc2013e..c635be4cc5 100644 --- a/esphome/components/api/api_pb2.h +++ b/esphome/components/api/api_pb2.h @@ -1069,7 +1069,7 @@ class SubscribeLogsResponse final : public ProtoMessage { class NoiseEncryptionSetKeyRequest final : public ProtoDecodableMessage { public: static constexpr uint8_t MESSAGE_TYPE = 124; - static constexpr uint8_t ESTIMATED_SIZE = 9; + static constexpr uint8_t ESTIMATED_SIZE = 19; #ifdef HAS_PROTO_MESSAGE_DUMP const char *message_name() const override { return "noise_encryption_set_key_request"; } #endif @@ -1161,7 +1161,7 @@ class HomeassistantActionRequest final : public ProtoMessage { class HomeassistantActionResponse final : public ProtoDecodableMessage { public: static constexpr uint8_t MESSAGE_TYPE = 130; - static constexpr uint8_t ESTIMATED_SIZE = 24; + static constexpr uint8_t ESTIMATED_SIZE = 34; #ifdef HAS_PROTO_MESSAGE_DUMP const char *message_name() const override { return "homeassistant_action_response"; } #endif @@ -2146,7 +2146,7 @@ class BluetoothGATTReadResponse final : public ProtoMessage { class BluetoothGATTWriteRequest final : public ProtoDecodableMessage { public: static constexpr uint8_t MESSAGE_TYPE = 75; - static constexpr uint8_t ESTIMATED_SIZE = 19; + static constexpr uint8_t ESTIMATED_SIZE = 29; #ifdef HAS_PROTO_MESSAGE_DUMP const char *message_name() const override { return "bluetooth_gatt_write_request"; } #endif @@ -2182,7 +2182,7 @@ class BluetoothGATTReadDescriptorRequest final : public ProtoDecodableMessage { class BluetoothGATTWriteDescriptorRequest final : public ProtoDecodableMessage { public: static constexpr uint8_t MESSAGE_TYPE = 77; - static constexpr uint8_t ESTIMATED_SIZE = 17; + static constexpr uint8_t ESTIMATED_SIZE = 27; #ifdef HAS_PROTO_MESSAGE_DUMP const char *message_name() const override { return "bluetooth_gatt_write_descriptor_request"; } #endif