diff --git a/esphome/components/esp8266/__init__.py b/esphome/components/esp8266/__init__.py index c7b5d5c130..927a59fd61 100644 --- a/esphome/components/esp8266/__init__.py +++ b/esphome/components/esp8266/__init__.py @@ -205,6 +205,7 @@ async def to_code(config): "pre:testing_mode.py", "pre:exclude_updater.py", "pre:exclude_waveform.py", + "pre:remove_float_scanf.py", "post:post_build.py", ], ) @@ -342,3 +343,8 @@ def copy_files() -> None: exclude_waveform_file, CORE.relative_build_path("exclude_waveform.py"), ) + remove_float_scanf_file = dir / "remove_float_scanf.py.script" + copy_file_if_changed( + remove_float_scanf_file, + CORE.relative_build_path("remove_float_scanf.py"), + ) diff --git a/esphome/components/esp8266/remove_float_scanf.py.script b/esphome/components/esp8266/remove_float_scanf.py.script new file mode 100644 index 0000000000..b1d03a4d46 --- /dev/null +++ b/esphome/components/esp8266/remove_float_scanf.py.script @@ -0,0 +1,46 @@ +# pylint: disable=E0602 +Import("env") # noqa + +# Remove forced scanf linkage to allow garbage collection of unused code +# +# The ESP8266 Arduino framework unconditionally adds: +# -u _printf_float -u _scanf_float +# +# The -u flag forces symbols to be linked even if unreferenced, which pulls +# in the entire scanf family (~7-8KB). ESPHome doesn't use scanf at all +# (verified by CI check in PR #13657), so this is pure dead weight. +# +# By removing -u _scanf_float, --gc-sections can eliminate: +# - scanf family functions (~7KB) +# - _strtod_l (~3.7KB) +# - Related parsing infrastructure +# +# We keep -u _printf_float because components still use %f in logging. + + +def remove_scanf_float_flag(source, target, env): + """Remove -u _scanf_float from linker flags. + + This is called as a pre-action before the link step, after the + Arduino framework has added its default flags. + """ + linkflags = env.get("LINKFLAGS", []) + new_linkflags = [] + i = 0 + + while i < len(linkflags): + flag = linkflags[i] + if flag == "-u" and i + 1 < len(linkflags): + next_flag = linkflags[i + 1] + if next_flag == "_scanf_float": + print("ESPHome: Removing _scanf_float (saves ~8KB flash)") + i += 2 # Skip both -u and the symbol + continue + new_linkflags.append(flag) + i += 1 + + env.Replace(LINKFLAGS=new_linkflags) + + +# Register the callback to run before the link step +env.AddPreAction("$BUILD_DIR/${PROGNAME}.elf", remove_scanf_float_flag) diff --git a/esphome/components/usb_host/usb_host_client.cpp b/esphome/components/usb_host/usb_host_client.cpp index 5b0aed4c59..c77d738ace 100644 --- a/esphome/components/usb_host/usb_host_client.cpp +++ b/esphome/components/usb_host/usb_host_client.cpp @@ -424,9 +424,9 @@ bool USBClient::control_transfer(uint8_t type, uint8_t request, uint16_t value, if (trq == nullptr) return false; auto length = data.size(); - if (length > sizeof(trq->transfer->data_buffer_size) - SETUP_PACKET_SIZE) { + if (length > trq->transfer->data_buffer_size - SETUP_PACKET_SIZE) { ESP_LOGE(TAG, "Control transfer data size too large: %u > %u", length, - sizeof(trq->transfer->data_buffer_size) - sizeof(usb_setup_packet_t)); + trq->transfer->data_buffer_size - SETUP_PACKET_SIZE); this->release_trq(trq); return false; } @@ -507,9 +507,13 @@ bool USBClient::transfer_in(uint8_t ep_address, const transfer_cb_t &callback, u /** * Performs an output transfer operation. - * THREAD CONTEXT: Called from main loop thread only - * - USB UART output uses defer() to ensure main loop context - * - Modbus and other components call from loop() + * THREAD CONTEXT: Called from both USB task and main loop threads. + * - USB task: output transfer callback restarts output directly (no defer) + * - Main loop: initial output trigger from write_array() and loop() + * Thread safety is ensured by: + * - get_trq_() uses atomic CAS (multi-consumer safe) + * - claimed trq slot is exclusively owned until submission + * - usb_host_transfer_submit() is safe to call from any task context * * @param ep_address The endpoint address. * @param callback The callback function to be called when the transfer is complete. @@ -524,6 +528,11 @@ bool USBClient::transfer_out(uint8_t ep_address, const transfer_cb_t &callback, ESP_LOGE(TAG, "Too many requests queued"); return false; } + if (length > trq->transfer->data_buffer_size) { + ESP_LOGE(TAG, "transfer_out: data length %u exceeds buffer size %u", length, trq->transfer->data_buffer_size); + this->release_trq(trq); + return false; + } trq->callback = callback; trq->transfer->callback = transfer_callback; trq->transfer->bEndpointAddress = ep_address | USB_DIR_OUT; diff --git a/esphome/components/usb_uart/ch34x.cpp b/esphome/components/usb_uart/ch34x.cpp index caa4b65657..7fa964c0cb 100644 --- a/esphome/components/usb_uart/ch34x.cpp +++ b/esphome/components/usb_uart/ch34x.cpp @@ -5,8 +5,7 @@ #include "esphome/components/bytebuffer/bytebuffer.h" -namespace esphome { -namespace usb_uart { +namespace esphome::usb_uart { using namespace bytebuffer; /** @@ -74,8 +73,8 @@ void USBUartTypeCH34X::enable_channels() { this->control_transfer(USB_VENDOR_DEV | usb_host::USB_DIR_OUT, cmd, value, (factor << 8) | divisor, callback); this->control_transfer(USB_VENDOR_DEV | usb_host::USB_DIR_OUT, cmd + 3, 0x80, 0, callback); } - USBUartTypeCdcAcm::enable_channels(); + this->start_channels(); } -} // namespace usb_uart -} // namespace esphome +} // namespace esphome::usb_uart + #endif // USE_ESP32_VARIANT_ESP32P4 || USE_ESP32_VARIANT_ESP32S2 || USE_ESP32_VARIANT_ESP32S3 diff --git a/esphome/components/usb_uart/cp210x.cpp b/esphome/components/usb_uart/cp210x.cpp index fa8c2331b2..ae9170c5fb 100644 --- a/esphome/components/usb_uart/cp210x.cpp +++ b/esphome/components/usb_uart/cp210x.cpp @@ -5,8 +5,7 @@ #include "esphome/components/bytebuffer/bytebuffer.h" -namespace esphome { -namespace usb_uart { +namespace esphome::usb_uart { using namespace bytebuffer; /** @@ -119,8 +118,8 @@ void USBUartTypeCP210X::enable_channels() { this->control_transfer(USB_VENDOR_IFC | usb_host::USB_DIR_OUT, SET_BAUDRATE, 0, channel->index_, callback, baud.get_data()); } - USBUartTypeCdcAcm::enable_channels(); + this->start_channels(); } -} // namespace usb_uart -} // namespace esphome +} // namespace esphome::usb_uart + #endif // USE_ESP32_VARIANT_ESP32P4 || USE_ESP32_VARIANT_ESP32S2 || USE_ESP32_VARIANT_ESP32S3 diff --git a/esphome/components/usb_uart/usb_uart.cpp b/esphome/components/usb_uart/usb_uart.cpp index 5c2806c456..de81bfc587 100644 --- a/esphome/components/usb_uart/usb_uart.cpp +++ b/esphome/components/usb_uart/usb_uart.cpp @@ -3,12 +3,10 @@ #include "usb_uart.h" #include "esphome/core/log.h" #include "esphome/core/application.h" -#include "esphome/components/uart/uart_debugger.h" #include -namespace esphome { -namespace usb_uart { +namespace esphome::usb_uart { /** * @@ -135,16 +133,51 @@ void USBUartChannel::write_array(const uint8_t *data, size_t len) { ESP_LOGV(TAG, "Channel not initialised - write ignored"); return; } - while (this->output_buffer_.get_free_space() != 0 && len-- != 0) { - this->output_buffer_.push(*data++); +#ifdef USE_UART_DEBUGGER + if (this->debug_) { + constexpr size_t BATCH = 16; + char buf[4 + format_hex_pretty_size(BATCH)]; // ">>> " + "XX,XX,...,XX\0" + for (size_t off = 0; off < len; off += BATCH) { + size_t n = std::min(len - off, BATCH); + memcpy(buf, ">>> ", 4); + format_hex_pretty_to(buf + 4, sizeof(buf) - 4, data + off, n, ','); + ESP_LOGD(TAG, "%s", buf); + } } - len++; - if (len > 0) { - ESP_LOGE(TAG, "Buffer full - failed to write %d bytes", len); +#endif + while (len > 0) { + UsbOutputChunk *chunk = this->output_pool_.allocate(); + if (chunk == nullptr) { + ESP_LOGE(TAG, "Output pool full - lost %zu bytes", len); + break; + } + size_t chunk_len = std::min(len, UsbOutputChunk::MAX_CHUNK_SIZE); + memcpy(chunk->data, data, chunk_len); + chunk->length = static_cast(chunk_len); + if (!this->output_queue_.push(chunk)) { + this->output_pool_.release(chunk); + ESP_LOGE(TAG, "Output queue full - lost %zu bytes", len); + break; + } + data += chunk_len; + len -= chunk_len; } this->parent_->start_output(this); } +void USBUartChannel::flush() { + // Spin until the output queue is drained and the last USB transfer completes. + // Safe to call from the main loop only. + // The 100 ms timeout guards against a device that stops responding mid-flush; + // in that case the main loop is blocked for the full duration. + uint32_t deadline = millis() + 100; // 100 ms safety timeout + while ((!this->output_queue_.empty() || this->output_started_.load()) && millis() < deadline) { + // Kick start_output() in case data arrived but no transfer is in flight yet. + this->parent_->start_output(this); + yield(); + } +} + bool USBUartChannel::peek_byte(uint8_t *data) { if (this->input_buffer_.is_empty()) { return false; @@ -182,8 +215,10 @@ void USBUartComponent::loop() { #ifdef USE_UART_DEBUGGER if (channel->debug_) { - uart::UARTDebug::log_hex(uart::UART_DIRECTION_RX, std::vector(chunk->data, chunk->data + chunk->length), - ','); // NOLINT() + char buf[4 + format_hex_pretty_size(UsbDataChunk::MAX_CHUNK_SIZE)]; // "<<< " + hex + memcpy(buf, "<<< ", 4); + format_hex_pretty_to(buf + 4, sizeof(buf) - 4, chunk->data, chunk->length, ','); + ESP_LOGD(TAG, "%s", buf); } #endif @@ -192,6 +227,13 @@ void USBUartComponent::loop() { // Return chunk to pool for reuse this->chunk_pool_.release(chunk); + + // Invoke the RX callback (if registered) immediately after data lands in the + // ring buffer. This lets consumers such as ZigbeeProxy process incoming bytes + // in the same loop iteration they are delivered, avoiding an extra wakeup cycle. + if (channel->rx_callback_) { + channel->rx_callback_(); + } } // Log dropped USB data periodically @@ -234,10 +276,10 @@ void USBUartComponent::start_input(USBUartChannel *channel) { // The underlying transfer_in() uses lock-free atomic allocation from the // TransferRequest pool, making this multi-threaded access safe - // if already started, don't restart. A spurious failure in compare_exchange_weak - // is not a problem, as it will be retried on the next read_array() + // Use compare_exchange_strong to avoid spurious failures: a missed submit here is + // never retried by read_array() because no data will ever arrive to trigger it. auto started = false; - if (!channel->input_started_.compare_exchange_weak(started, true)) + if (!channel->input_started_.compare_exchange_strong(started, true)) return; const auto *ep = channel->cdc_dev_.in_ep; // CALLBACK CONTEXT: This lambda is executed in USB task via transfer_callback @@ -285,37 +327,56 @@ void USBUartComponent::start_input(USBUartChannel *channel) { this->start_input(channel); }; if (!this->transfer_in(ep->bEndpointAddress, callback, ep->wMaxPacketSize)) { + ESP_LOGE(TAG, "IN transfer submission failed for ep=0x%02X", ep->bEndpointAddress); channel->input_started_.store(false); } } void USBUartComponent::start_output(USBUartChannel *channel) { - // IMPORTANT: This function must only be called from the main loop! - // The output_buffer_ is not thread-safe and can only be accessed from main loop. - // USB callbacks use defer() to ensure this function runs in the correct context. - if (channel->output_started_.load()) - return; - if (channel->output_buffer_.is_empty()) { + // THREAD CONTEXT: Called from both main loop and USB task threads. + // The output_queue_ is a lock-free SPSC queue, so pop() is safe from either thread. + // The output_started_ atomic flag is claimed via compare_exchange to guarantee that + // only one thread starts a transfer at a time. + + // Atomically claim the "output in progress" flag. If already set, another thread + // is handling the transfer; return immediately. + bool expected = false; + if (!channel->output_started_.compare_exchange_strong(expected, true, std::memory_order_acq_rel)) { return; } + + UsbOutputChunk *chunk = channel->output_queue_.pop(); + if (chunk == nullptr) { + // Nothing to send — release the flag and return. + channel->output_started_.store(false, std::memory_order_release); + return; + } + const auto *ep = channel->cdc_dev_.out_ep; - // CALLBACK CONTEXT: This lambda is executed in USB task via transfer_callback - auto callback = [this, channel](const usb_host::TransferStatus &status) { - ESP_LOGV(TAG, "Output Transfer result: length: %u; status %X", status.data_len, status.error_code); - channel->output_started_.store(false); - // Defer restart to main loop (defer is thread-safe) - this->defer([this, channel] { this->start_output(channel); }); + // CALLBACK CONTEXT: This lambda is executed in the USB task via transfer_callback. + // It releases the chunk, clears the flag, and directly restarts output without + // going through defer() — eliminating one full main-loop-wakeup cycle of latency. + auto callback = [this, channel, chunk](const usb_host::TransferStatus &status) { + if (!status.success) { + ESP_LOGW(TAG, "Output transfer failed: status %X", status.error_code); + } else { + ESP_LOGV(TAG, "Output Transfer result: length: %u; status %X", status.data_len, status.error_code); + } + channel->output_pool_.release(chunk); + channel->output_started_.store(false, std::memory_order_release); + // Restart directly from USB task — safe because output_queue_ is lock-free + // and transfer_out() uses thread-safe atomic slot allocation. + this->start_output(channel); }; - channel->output_started_.store(true); - uint8_t data[ep->wMaxPacketSize]; - auto len = channel->output_buffer_.pop(data, ep->wMaxPacketSize); - this->transfer_out(ep->bEndpointAddress, callback, data, len); -#ifdef USE_UART_DEBUGGER - if (channel->debug_) { - uart::UARTDebug::log_hex(uart::UART_DIRECTION_TX, std::vector(data, data + len), ','); // NOLINT() + + const uint8_t len = chunk->length; + if (!this->transfer_out(ep->bEndpointAddress, callback, chunk->data, len)) { + // Transfer submission failed — return chunk and release flag so callers can retry. + channel->output_pool_.release(chunk); + channel->output_started_.store(false, std::memory_order_release); + return; } -#endif - ESP_LOGV(TAG, "Output %d bytes started", len); + ESP_LOGV(TAG, "Output %u bytes started", len); } /** @@ -351,6 +412,20 @@ void USBUartTypeCdcAcm::on_connected() { fix_mps(channel->cdc_dev_.in_ep); fix_mps(channel->cdc_dev_.out_ep); channel->initialised_.store(true); + // Claim the communication (interrupt) interface so CDC class requests are accepted + // by the device. Some CDC ACM implementations (e.g. EFR32 NCP) require this before + // they enable data flow on the bulk endpoints. + if (channel->cdc_dev_.interrupt_interface_number != channel->cdc_dev_.bulk_interface_number) { + auto err_comm = usb_host_interface_claim(this->handle_, this->device_handle_, + channel->cdc_dev_.interrupt_interface_number, 0); + if (err_comm != ESP_OK) { + ESP_LOGW(TAG, "Could not claim comm interface %d: %s", channel->cdc_dev_.interrupt_interface_number, + esp_err_to_name(err_comm)); + } else { + channel->cdc_dev_.comm_interface_claimed = true; + ESP_LOGD(TAG, "Claimed comm interface %d", channel->cdc_dev_.interrupt_interface_number); + } + } auto err = usb_host_interface_claim(this->handle_, this->device_handle_, channel->cdc_dev_.bulk_interface_number, 0); if (err != ESP_OK) { @@ -378,18 +453,74 @@ void USBUartTypeCdcAcm::on_disconnected() { usb_host_endpoint_halt(this->device_handle_, channel->cdc_dev_.notify_ep->bEndpointAddress); usb_host_endpoint_flush(this->device_handle_, channel->cdc_dev_.notify_ep->bEndpointAddress); } + if (channel->cdc_dev_.comm_interface_claimed) { + usb_host_interface_release(this->handle_, this->device_handle_, channel->cdc_dev_.interrupt_interface_number); + channel->cdc_dev_.comm_interface_claimed = false; + } usb_host_interface_release(this->handle_, this->device_handle_, channel->cdc_dev_.bulk_interface_number); // Reset the input and output started flags to their initial state to avoid the possibility of spurious restarts channel->input_started_.store(true); channel->output_started_.store(true); channel->input_buffer_.clear(); - channel->output_buffer_.clear(); + // Drain any pending output chunks and return them to the pool + { + UsbOutputChunk *chunk; + while ((chunk = channel->output_queue_.pop()) != nullptr) { + channel->output_pool_.release(chunk); + } + } channel->initialised_.store(false); } USBClient::on_disconnected(); } void USBUartTypeCdcAcm::enable_channels() { + static constexpr uint8_t CDC_REQUEST_TYPE = usb_host::USB_TYPE_CLASS | usb_host::USB_RECIP_INTERFACE; + static constexpr uint8_t CDC_SET_LINE_CODING = 0x20; + static constexpr uint8_t CDC_SET_CONTROL_LINE_STATE = 0x22; + static constexpr uint16_t CDC_DTR_RTS = 0x0003; // D0=DTR, D1=RTS + + for (auto *channel : this->channels_) { + if (!channel->initialised_.load()) + continue; + // Configure the bridge's UART parameters. A USB-UART bridge will not forward data + // at the correct speed until SET_LINE_CODING is sent; without it the UART may run + // at an indeterminate default rate so the NCP receives garbled bytes and never + // sends RSTACK. + uint32_t baud = channel->baud_rate_; + std::vector line_coding = { + static_cast(baud & 0xFF), static_cast((baud >> 8) & 0xFF), + static_cast((baud >> 16) & 0xFF), static_cast((baud >> 24) & 0xFF), + static_cast(channel->stop_bits_), // bCharFormat: 0=1stop, 1=1.5stop, 2=2stop + static_cast(channel->parity_), // bParityType: 0=None, 1=Odd, 2=Even, 3=Mark, 4=Space + static_cast(channel->data_bits_), // bDataBits + }; + ESP_LOGD(TAG, "SET_LINE_CODING: baud=%u stop=%u parity=%u data=%u", (unsigned) baud, channel->stop_bits_, + (unsigned) channel->parity_, channel->data_bits_); + this->control_transfer( + CDC_REQUEST_TYPE, CDC_SET_LINE_CODING, 0, channel->cdc_dev_.interrupt_interface_number, + [](const usb_host::TransferStatus &status) { + if (!status.success) { + ESP_LOGW(TAG, "SET_LINE_CODING failed: %X", status.error_code); + } else { + ESP_LOGD(TAG, "SET_LINE_CODING OK"); + } + }, + line_coding); + // Assert DTR+RTS to signal DTE is present. + this->control_transfer(CDC_REQUEST_TYPE, CDC_SET_CONTROL_LINE_STATE, CDC_DTR_RTS, + channel->cdc_dev_.interrupt_interface_number, [](const usb_host::TransferStatus &status) { + if (!status.success) { + ESP_LOGW(TAG, "SET_CONTROL_LINE_STATE failed: %X", status.error_code); + } else { + ESP_LOGD(TAG, "SET_CONTROL_LINE_STATE (DTR+RTS) OK"); + } + }); + } + this->start_channels(); +} + +void USBUartTypeCdcAcm::start_channels() { for (auto *channel : this->channels_) { if (!channel->initialised_.load()) continue; @@ -399,6 +530,6 @@ void USBUartTypeCdcAcm::enable_channels() { } } -} // namespace usb_uart -} // namespace esphome +} // namespace esphome::usb_uart + #endif // USE_ESP32_VARIANT_ESP32P4 || USE_ESP32_VARIANT_ESP32S2 || USE_ESP32_VARIANT_ESP32S3 diff --git a/esphome/components/usb_uart/usb_uart.h b/esphome/components/usb_uart/usb_uart.h index 94e6120457..9a9fe1c2ca 100644 --- a/esphome/components/usb_uart/usb_uart.h +++ b/esphome/components/usb_uart/usb_uart.h @@ -8,9 +8,10 @@ #include "esphome/core/lock_free_queue.h" #include "esphome/core/event_pool.h" #include +#include + +namespace esphome::usb_uart { -namespace esphome { -namespace usb_uart { class USBUartTypeCdcAcm; class USBUartComponent; class USBUartChannel; @@ -31,6 +32,7 @@ struct CdcEps { const usb_ep_desc_t *out_ep; uint8_t bulk_interface_number; uint8_t interrupt_interface_number; + bool comm_interface_claimed{false}; }; enum UARTParityOptions { @@ -83,6 +85,16 @@ struct UsbDataChunk { void release() {} }; +// Structure for queuing outgoing USB data chunks (one per USB FS packet) +struct UsbOutputChunk { + static constexpr size_t MAX_CHUNK_SIZE = 64; // USB FS MPS + uint8_t data[MAX_CHUNK_SIZE]; + uint8_t length; + + // Required for EventPool - no cleanup needed for POD types + void release() {} +}; + class USBUartChannel : public uart::UARTComponent, public Parented { friend class USBUartComponent; friend class USBUartTypeCdcAcm; @@ -90,24 +102,32 @@ class USBUartChannel : public uart::UARTComponent, public Parentedinput_buffer_.get_available(); } - void flush() override {} + void flush() override; void check_logger_conflict() override {} void set_parity(UARTParityOptions parity) { this->parity_ = parity; } void set_debug(bool debug) { this->debug_ = debug; } void set_dummy_receiver(bool dummy_receiver) { this->dummy_receiver_ = dummy_receiver; } + /// Register a callback invoked immediately after data is pushed to the input ring buffer. + /// Called from USBUartComponent::loop() in the main loop context. + /// Allows consumers (e.g. ZigbeeProxy) to process bytes in the same loop iteration + /// they arrive, eliminating one full main-loop-wakeup cycle of latency. + void set_rx_callback(std::function cb) { this->rx_callback_ = std::move(cb); } + protected: // Larger structures first for better alignment RingBuffer input_buffer_; - RingBuffer output_buffer_; + LockFreeQueue output_queue_; + EventPool output_pool_; + std::function rx_callback_{}; CdcEps cdc_dev_{}; // Enum (likely 4 bytes) UARTParityOptions parity_{UART_CONFIG_PARITY_NONE}; @@ -150,8 +170,12 @@ class USBUartTypeCdcAcm : public USBUartComponent { protected: virtual std::vector parse_descriptors(usb_device_handle_t dev_hdl); void on_connected() override; - virtual void enable_channels(); void on_disconnected() override; + virtual void enable_channels(); + /// Resets per-channel transfer flags and posts the first bulk IN transfer. + /// Called by enable_channels() and by vendor-specific subclass overrides that + /// handle their own line-coding setup before starting data flow. + void start_channels(); }; class USBUartTypeCP210X : public USBUartTypeCdcAcm { @@ -170,7 +194,6 @@ class USBUartTypeCH34X : public USBUartTypeCdcAcm { void enable_channels() override; }; -} // namespace usb_uart -} // namespace esphome +} // namespace esphome::usb_uart #endif // USE_ESP32_VARIANT_ESP32P4 || USE_ESP32_VARIANT_ESP32S2 || USE_ESP32_VARIANT_ESP32S3