[voice_assistant] Use zero-copy buffer access for audio data (#12656)

This commit is contained in:
J. Nick Koston
2026-01-02 14:10:21 -10:00
committed by GitHub
parent 0b7ff09657
commit 51259888bf
5 changed files with 19 additions and 25 deletions

View File

@@ -1937,7 +1937,7 @@ message VoiceAssistantAudio {
option (source) = SOURCE_BOTH; option (source) = SOURCE_BOTH;
option (ifdef) = "USE_VOICE_ASSISTANT"; option (ifdef) = "USE_VOICE_ASSISTANT";
bytes data = 1; bytes data = 1 [(pointer_to_buffer) = true];
bool end = 2; bool end = 2;
} }

View File

@@ -2527,20 +2527,22 @@ bool VoiceAssistantAudio::decode_varint(uint32_t field_id, ProtoVarInt value) {
} }
bool VoiceAssistantAudio::decode_length(uint32_t field_id, ProtoLengthDelimited value) { bool VoiceAssistantAudio::decode_length(uint32_t field_id, ProtoLengthDelimited value) {
switch (field_id) { switch (field_id) {
case 1: case 1: {
this->data = value.as_string(); this->data = value.data();
this->data_len = value.size();
break; break;
}
default: default:
return false; return false;
} }
return true; return true;
} }
void VoiceAssistantAudio::encode(ProtoWriteBuffer buffer) const { void VoiceAssistantAudio::encode(ProtoWriteBuffer buffer) const {
buffer.encode_bytes(1, this->data_ptr_, this->data_len_); buffer.encode_bytes(1, this->data, this->data_len);
buffer.encode_bool(2, this->end); buffer.encode_bool(2, this->end);
} }
void VoiceAssistantAudio::calculate_size(ProtoSize &size) const { void VoiceAssistantAudio::calculate_size(ProtoSize &size) const {
size.add_length(1, this->data_len_); size.add_length(1, this->data_len);
size.add_bool(1, this->end); size.add_bool(1, this->end);
} }
bool VoiceAssistantTimerEventResponse::decode_varint(uint32_t field_id, ProtoVarInt value) { bool VoiceAssistantTimerEventResponse::decode_varint(uint32_t field_id, ProtoVarInt value) {

View File

@@ -2521,17 +2521,12 @@ class VoiceAssistantEventResponse final : public ProtoDecodableMessage {
class VoiceAssistantAudio final : public ProtoDecodableMessage { class VoiceAssistantAudio final : public ProtoDecodableMessage {
public: public:
static constexpr uint8_t MESSAGE_TYPE = 106; static constexpr uint8_t MESSAGE_TYPE = 106;
static constexpr uint8_t ESTIMATED_SIZE = 11; static constexpr uint8_t ESTIMATED_SIZE = 21;
#ifdef HAS_PROTO_MESSAGE_DUMP #ifdef HAS_PROTO_MESSAGE_DUMP
const char *message_name() const override { return "voice_assistant_audio"; } const char *message_name() const override { return "voice_assistant_audio"; }
#endif #endif
std::string data{}; const uint8_t *data{nullptr};
const uint8_t *data_ptr_{nullptr}; uint16_t data_len{0};
size_t data_len_{0};
void set_data(const uint8_t *data, size_t len) {
this->data_ptr_ = data;
this->data_len_ = len;
}
bool end{false}; bool end{false};
void encode(ProtoWriteBuffer buffer) const override; void encode(ProtoWriteBuffer buffer) const override;
void calculate_size(ProtoSize &size) const override; void calculate_size(ProtoSize &size) const override;

View File

@@ -1978,11 +1978,7 @@ void VoiceAssistantEventResponse::dump_to(std::string &out) const {
void VoiceAssistantAudio::dump_to(std::string &out) const { void VoiceAssistantAudio::dump_to(std::string &out) const {
MessageDumpHelper helper(out, "VoiceAssistantAudio"); MessageDumpHelper helper(out, "VoiceAssistantAudio");
out.append(" data: "); out.append(" data: ");
if (this->data_ptr_ != nullptr) { out.append(format_hex_pretty(this->data, this->data_len));
out.append(format_hex_pretty(this->data_ptr_, this->data_len_));
} else {
out.append(format_hex_pretty(reinterpret_cast<const uint8_t *>(this->data.data()), this->data.size()));
}
out.append("\n"); out.append("\n");
dump_field(out, "end", this->end); dump_field(out, "end", this->end);
} }

View File

@@ -272,7 +272,8 @@ void VoiceAssistant::loop() {
size_t read_bytes = this->ring_buffer_->read((void *) this->send_buffer_, SEND_BUFFER_SIZE, 0); size_t read_bytes = this->ring_buffer_->read((void *) this->send_buffer_, SEND_BUFFER_SIZE, 0);
if (this->audio_mode_ == AUDIO_MODE_API) { if (this->audio_mode_ == AUDIO_MODE_API) {
api::VoiceAssistantAudio msg; api::VoiceAssistantAudio msg;
msg.set_data(this->send_buffer_, read_bytes); msg.data = this->send_buffer_;
msg.data_len = read_bytes;
this->api_client_->send_message(msg, api::VoiceAssistantAudio::MESSAGE_TYPE); this->api_client_->send_message(msg, api::VoiceAssistantAudio::MESSAGE_TYPE);
} else { } else {
if (!this->udp_socket_running_) { if (!this->udp_socket_running_) {
@@ -841,12 +842,12 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) {
void VoiceAssistant::on_audio(const api::VoiceAssistantAudio &msg) { void VoiceAssistant::on_audio(const api::VoiceAssistantAudio &msg) {
#ifdef USE_SPEAKER // We should never get to this function if there is no speaker anyway #ifdef USE_SPEAKER // We should never get to this function if there is no speaker anyway
if ((this->speaker_ != nullptr) && (this->speaker_buffer_ != nullptr)) { if ((this->speaker_ != nullptr) && (this->speaker_buffer_ != nullptr)) {
if (this->speaker_buffer_index_ + msg.data.length() < SPEAKER_BUFFER_SIZE) { if (this->speaker_buffer_index_ + msg.data_len < SPEAKER_BUFFER_SIZE) {
memcpy(this->speaker_buffer_ + this->speaker_buffer_index_, msg.data.data(), msg.data.length()); memcpy(this->speaker_buffer_ + this->speaker_buffer_index_, msg.data, msg.data_len);
this->speaker_buffer_index_ += msg.data.length(); this->speaker_buffer_index_ += msg.data_len;
this->speaker_buffer_size_ += msg.data.length(); this->speaker_buffer_size_ += msg.data_len;
this->speaker_bytes_received_ += msg.data.length(); this->speaker_bytes_received_ += msg.data_len;
ESP_LOGV(TAG, "Received audio: %u bytes from API", msg.data.length()); ESP_LOGV(TAG, "Received audio: %u bytes from API", msg.data_len);
} else { } else {
ESP_LOGE(TAG, "Cannot receive audio, buffer is full"); ESP_LOGE(TAG, "Cannot receive audio, buffer is full");
} }