diff --git a/esphome/components/audio/__init__.py b/esphome/components/audio/__init__.py index f48b776ddd..d8d426ec63 100644 --- a/esphome/components/audio/__init__.py +++ b/esphome/components/audio/__init__.py @@ -1,10 +1,14 @@ +from dataclasses import dataclass + import esphome.codegen as cg from esphome.components.esp32 import add_idf_component, include_builtin_idf_component import esphome.config_validation as cv from esphome.const import CONF_BITS_PER_SAMPLE, CONF_NUM_CHANNELS, CONF_SAMPLE_RATE +from esphome.core import CORE import esphome.final_validate as fv CODEOWNERS = ["@kahrendt"] +DOMAIN = "audio" audio_ns = cg.esphome_ns.namespace("audio") AudioFile = audio_ns.struct("AudioFile") @@ -14,9 +18,38 @@ AUDIO_FILE_TYPE_ENUM = { "WAV": AudioFileType.WAV, "MP3": AudioFileType.MP3, "FLAC": AudioFileType.FLAC, + "OPUS": AudioFileType.OPUS, } +@dataclass +class AudioData: + flac_support: bool = False + mp3_support: bool = False + opus_support: bool = False + + +def _get_data() -> AudioData: + if DOMAIN not in CORE.data: + CORE.data[DOMAIN] = AudioData() + return CORE.data[DOMAIN] + + +def request_flac_support() -> None: + """Request FLAC codec support for audio decoding.""" + _get_data().flac_support = True + + +def request_mp3_support() -> None: + """Request MP3 codec support for audio decoding.""" + _get_data().mp3_support = True + + +def request_opus_support() -> None: + """Request Opus codec support for audio decoding.""" + _get_data().opus_support = True + + CONF_MIN_BITS_PER_SAMPLE = "min_bits_per_sample" CONF_MAX_BITS_PER_SAMPLE = "max_bits_per_sample" CONF_MIN_CHANNELS = "min_channels" @@ -173,3 +206,12 @@ async def to_code(config): name="esphome/esp-audio-libs", ref="2.0.3", ) + + data = _get_data() + if data.flac_support: + cg.add_define("USE_AUDIO_FLAC_SUPPORT") + if data.mp3_support: + cg.add_define("USE_AUDIO_MP3_SUPPORT") + if data.opus_support: + cg.add_define("USE_AUDIO_OPUS_SUPPORT") + add_idf_component(name="esphome/micro-opus", ref="0.3.3") diff --git a/esphome/components/audio/audio.cpp b/esphome/components/audio/audio.cpp index 9cc9b7d0da..40592f6107 100644 --- a/esphome/components/audio/audio.cpp +++ b/esphome/components/audio/audio.cpp @@ -46,6 +46,10 @@ const char *audio_file_type_to_string(AudioFileType file_type) { #ifdef USE_AUDIO_MP3_SUPPORT case AudioFileType::MP3: return "MP3"; +#endif +#ifdef USE_AUDIO_OPUS_SUPPORT + case AudioFileType::OPUS: + return "OPUS"; #endif case AudioFileType::WAV: return "WAV"; diff --git a/esphome/components/audio/audio.h b/esphome/components/audio/audio.h index e01d7eb101..7d7db9e944 100644 --- a/esphome/components/audio/audio.h +++ b/esphome/components/audio/audio.h @@ -112,6 +112,9 @@ enum class AudioFileType : uint8_t { #endif #ifdef USE_AUDIO_MP3_SUPPORT MP3, +#endif +#ifdef USE_AUDIO_OPUS_SUPPORT + OPUS, #endif WAV, }; diff --git a/esphome/components/audio/audio_decoder.cpp b/esphome/components/audio/audio_decoder.cpp index 8f514468c4..ee6d7d0a15 100644 --- a/esphome/components/audio/audio_decoder.cpp +++ b/esphome/components/audio/audio_decoder.cpp @@ -3,10 +3,13 @@ #ifdef USE_ESP32 #include "esphome/core/hal.h" +#include "esphome/core/log.h" namespace esphome { namespace audio { +static const char *const TAG = "audio.decoder"; + static const uint32_t DECODING_TIMEOUT_MS = 50; // The decode function will yield after this duration static const uint32_t READ_WRITE_TIMEOUT_MS = 20; // Timeout for transferring audio data @@ -79,6 +82,14 @@ esp_err_t AudioDecoder::start(AudioFileType audio_file_type) { // Always reallocate the output transfer buffer to the smallest necessary size this->output_transfer_buffer_->reallocate(this->free_buffer_required_); break; +#endif +#ifdef USE_AUDIO_OPUS_SUPPORT + case AudioFileType::OPUS: + this->opus_decoder_ = make_unique(); + this->free_buffer_required_ = + this->output_transfer_buffer_->capacity(); // Adjusted and reallocated after reading the header + this->decoder_buffers_internally_ = true; + break; #endif case AudioFileType::WAV: this->wav_decoder_ = make_unique(); @@ -158,8 +169,9 @@ AudioDecoderState AudioDecoder::decode(bool stop_gracefully) { // Decode more audio // Only shift data on the first loop iteration to avoid unnecessary, slow moves - size_t bytes_read = this->input_transfer_buffer_->transfer_data_from_source(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS), - first_loop_iteration); + // If the decoder buffers internally, then never shift + size_t bytes_read = this->input_transfer_buffer_->transfer_data_from_source( + pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS), first_loop_iteration && !this->decoder_buffers_internally_); if (!first_loop_iteration && (this->input_transfer_buffer_->available() < bytes_processed)) { // Less data is available than what was processed in last iteration, so don't attempt to decode. @@ -195,6 +207,11 @@ AudioDecoderState AudioDecoder::decode(bool stop_gracefully) { case AudioFileType::MP3: state = this->decode_mp3_(); break; +#endif +#ifdef USE_AUDIO_OPUS_SUPPORT + case AudioFileType::OPUS: + state = this->decode_opus_(); + break; #endif case AudioFileType::WAV: state = this->decode_wav_(); @@ -339,6 +356,45 @@ FileDecoderState AudioDecoder::decode_mp3_() { } #endif +#ifdef USE_AUDIO_OPUS_SUPPORT +FileDecoderState AudioDecoder::decode_opus_() { + bool processed_header = this->opus_decoder_->is_initialized(); + + size_t bytes_consumed, samples_decoded; + + micro_opus::OggOpusResult result = this->opus_decoder_->decode( + this->input_transfer_buffer_->get_buffer_start(), this->input_transfer_buffer_->available(), + this->output_transfer_buffer_->get_buffer_end(), this->output_transfer_buffer_->free(), bytes_consumed, + samples_decoded); + + if (result == micro_opus::OGG_OPUS_OK) { + if (!processed_header && this->opus_decoder_->is_initialized()) { + // Header processed and stream info is available + this->audio_stream_info_ = + audio::AudioStreamInfo(this->opus_decoder_->get_bit_depth(), this->opus_decoder_->get_channels(), + this->opus_decoder_->get_sample_rate()); + } + if (samples_decoded > 0 && this->audio_stream_info_.has_value()) { + // Some audio was processed + this->output_transfer_buffer_->increase_buffer_length( + this->audio_stream_info_.value().frames_to_bytes(samples_decoded)); + } + this->input_transfer_buffer_->decrease_buffer_length(bytes_consumed); + } else if (result == micro_opus::OGG_OPUS_OUTPUT_BUFFER_TOO_SMALL) { + // Reallocate to decode the packet on the next call + this->free_buffer_required_ = this->opus_decoder_->get_required_output_buffer_size(); + if (!this->output_transfer_buffer_->reallocate(this->free_buffer_required_)) { + // Couldn't reallocate output buffer + return FileDecoderState::FAILED; + } + } else { + ESP_LOGE(TAG, "Opus decoder failed: %" PRId8, result); + return FileDecoderState::POTENTIALLY_FAILED; + } + return FileDecoderState::MORE_TO_PROCESS; +} +#endif + FileDecoderState AudioDecoder::decode_wav_() { if (!this->audio_stream_info_.has_value()) { // Header hasn't been processed diff --git a/esphome/components/audio/audio_decoder.h b/esphome/components/audio/audio_decoder.h index 2ca1d623fe..cad16110ae 100644 --- a/esphome/components/audio/audio_decoder.h +++ b/esphome/components/audio/audio_decoder.h @@ -24,6 +24,11 @@ #endif #include +// micro-opus +#ifdef USE_AUDIO_OPUS_SUPPORT +#include +#endif + namespace esphome { namespace audio { @@ -47,7 +52,7 @@ class AudioDecoder { * @brief Class that facilitates decoding an audio file. * The audio file is read from a ring buffer source, decoded, and sent to an audio sink (ring buffer or speaker * component). - * Supports wav, flac, and mp3 formats. + * Supports wav, flac, mp3, and ogg opus formats. */ public: /// @brief Allocates the input and output transfer buffers @@ -55,7 +60,7 @@ class AudioDecoder { /// @param output_buffer_size Size of the output transfer buffer in bytes. AudioDecoder(size_t input_buffer_size, size_t output_buffer_size); - /// @brief Deallocates the MP3 decoder (the flac and wav decoders are deallocated automatically) + /// @brief Deallocates the MP3 decoder (the flac, opus, and wav decoders are deallocated automatically) ~AudioDecoder(); /// @brief Adds a source ring buffer for raw file data. Takes ownership of the ring buffer in a shared_ptr. @@ -108,6 +113,10 @@ class AudioDecoder { #ifdef USE_AUDIO_MP3_SUPPORT FileDecoderState decode_mp3_(); esp_audio_libs::helix_decoder::HMP3Decoder mp3_decoder_; +#endif +#ifdef USE_AUDIO_OPUS_SUPPORT + FileDecoderState decode_opus_(); + std::unique_ptr opus_decoder_; #endif FileDecoderState decode_wav_(); @@ -124,6 +133,8 @@ class AudioDecoder { bool end_of_file_{false}; bool wav_has_known_end_{false}; + bool decoder_buffers_internally_{false}; + bool pause_output_{false}; uint32_t accumulated_frames_written_{0}; diff --git a/esphome/components/audio/audio_reader.cpp b/esphome/components/audio/audio_reader.cpp index 4e4bd31f9b..78d69d7a39 100644 --- a/esphome/components/audio/audio_reader.cpp +++ b/esphome/components/audio/audio_reader.cpp @@ -197,6 +197,11 @@ esp_err_t AudioReader::start(const std::string &uri, AudioFileType &file_type) { else if (str_endswith_ignore_case(url, ".flac")) { file_type = AudioFileType::FLAC; } +#endif +#ifdef USE_AUDIO_OPUS_SUPPORT + else if (str_endswith_ignore_case(url, ".opus")) { + file_type = AudioFileType::OPUS; + } #endif else { file_type = AudioFileType::NONE; @@ -241,6 +246,14 @@ AudioFileType AudioReader::get_audio_type(const char *content_type) { if (strcasecmp(content_type, "audio/flac") == 0 || strcasecmp(content_type, "audio/x-flac") == 0) { return AudioFileType::FLAC; } +#endif +#ifdef USE_AUDIO_OPUS_SUPPORT + // Match "audio/ogg" with a codecs parameter containing "opus" + // Valid forms: audio/ogg;codecs=opus, audio/ogg; codecs="opus", etc. + // Plain "audio/ogg" without a codecs parameter is not matched, as those are almost always Ogg Vorbis streams + if (strncasecmp(content_type, "audio/ogg", 9) == 0 && strcasestr(content_type + 9, "opus") != nullptr) { + return AudioFileType::OPUS; + } #endif return AudioFileType::NONE; } diff --git a/esphome/components/audio/audio_transfer_buffer.cpp b/esphome/components/audio/audio_transfer_buffer.cpp index ddb669e0eb..a8be55d62f 100644 --- a/esphome/components/audio/audio_transfer_buffer.cpp +++ b/esphome/components/audio/audio_transfer_buffer.cpp @@ -165,6 +165,8 @@ size_t AudioSinkTransferBuffer::transfer_data_to_sink(TickType_t ticks_to_wait, if (this->ring_buffer_.use_count() > 0) { bytes_written = this->ring_buffer_->write_without_replacement((void *) this->data_start_, this->available(), ticks_to_wait); + } else if (this->sink_callback_ != nullptr) { + bytes_written = this->sink_callback_->audio_sink_write(this->data_start_, this->available(), ticks_to_wait); } this->decrease_buffer_length(bytes_written); diff --git a/esphome/components/audio/audio_transfer_buffer.h b/esphome/components/audio/audio_transfer_buffer.h index 24c0670d1a..22c22cc9ae 100644 --- a/esphome/components/audio/audio_transfer_buffer.h +++ b/esphome/components/audio/audio_transfer_buffer.h @@ -15,6 +15,12 @@ namespace esphome { namespace audio { +/// @brief Abstract interface for writing decoded audio data to a sink. +class AudioSinkCallback { + public: + virtual size_t audio_sink_write(uint8_t *data, size_t length, TickType_t ticks_to_wait) = 0; +}; + class AudioTransferBuffer { /* * @brief Class that facilitates tranferring data between a buffer and an audio source or sink. @@ -108,6 +114,10 @@ class AudioSinkTransferBuffer : public AudioTransferBuffer { void set_sink(speaker::Speaker *speaker) { this->speaker_ = speaker; } #endif + /// @brief Adds a callback as the transfer buffer's sink. + /// @param callback Pointer to the AudioSinkCallback implementation + void set_sink(AudioSinkCallback *callback) { this->sink_callback_ = callback; } + void clear_buffered_data() override; bool has_buffered_data() const override; @@ -116,6 +126,7 @@ class AudioSinkTransferBuffer : public AudioTransferBuffer { #ifdef USE_SPEAKER speaker::Speaker *speaker_{nullptr}; #endif + AudioSinkCallback *sink_callback_{nullptr}; }; class AudioSourceTransferBuffer : public AudioTransferBuffer { diff --git a/esphome/components/esp32_camera/__init__.py b/esphome/components/esp32_camera/__init__.py index db6244fb3f..3a5d87792b 100644 --- a/esphome/components/esp32_camera/__init__.py +++ b/esphome/components/esp32_camera/__init__.py @@ -22,8 +22,10 @@ from esphome.const import ( CONF_TRIGGER_ID, CONF_VSYNC_PIN, ) +from esphome.core import CORE from esphome.core.entity_helpers import setup_entity import esphome.final_validate as fv +from esphome.types import ConfigType _LOGGER = logging.getLogger(__name__) @@ -84,6 +86,18 @@ FRAME_SIZES = { "2560X1920": ESP32CameraFrameSize.ESP32_CAMERA_SIZE_2560X1920, "QSXGA": ESP32CameraFrameSize.ESP32_CAMERA_SIZE_2560X1920, } +ESP32CameraPixelFormat = esp32_camera_ns.enum("ESP32CameraPixelFormat") +PIXEL_FORMATS = { + "RGB565": ESP32CameraPixelFormat.ESP32_PIXEL_FORMAT_RGB565, + "YUV422": ESP32CameraPixelFormat.ESP32_PIXEL_FORMAT_YUV422, + "YUV420": ESP32CameraPixelFormat.ESP32_PIXEL_FORMAT_YUV420, + "GRAYSCALE": ESP32CameraPixelFormat.ESP32_PIXEL_FORMAT_GRAYSCALE, + "JPEG": ESP32CameraPixelFormat.ESP32_PIXEL_FORMAT_JPEG, + "RGB888": ESP32CameraPixelFormat.ESP32_PIXEL_FORMAT_RGB888, + "RAW": ESP32CameraPixelFormat.ESP32_PIXEL_FORMAT_RAW, + "RGB444": ESP32CameraPixelFormat.ESP32_PIXEL_FORMAT_RGB444, + "RGB555": ESP32CameraPixelFormat.ESP32_PIXEL_FORMAT_RGB555, +} ESP32GainControlMode = esp32_camera_ns.enum("ESP32GainControlMode") ENUM_GAIN_CONTROL_MODE = { "MANUAL": ESP32GainControlMode.ESP32_GC_MODE_MANU, @@ -131,6 +145,7 @@ CONF_EXTERNAL_CLOCK = "external_clock" CONF_I2C_PINS = "i2c_pins" CONF_POWER_DOWN_PIN = "power_down_pin" # image +CONF_PIXEL_FORMAT = "pixel_format" CONF_JPEG_QUALITY = "jpeg_quality" CONF_VERTICAL_FLIP = "vertical_flip" CONF_HORIZONTAL_MIRROR = "horizontal_mirror" @@ -171,6 +186,21 @@ def validate_fb_location_(value): return validator(value) +def validate_jpeg_quality(config: ConfigType) -> ConfigType: + quality = config.get(CONF_JPEG_QUALITY) + pixel_format = config.get(CONF_PIXEL_FORMAT, "JPEG") + + if quality == 0: + # Set default JPEG quality if not specified for backwards compatibility + if pixel_format == "JPEG": + config[CONF_JPEG_QUALITY] = 10 + # For pixel formats other than JPEG, the valid 0 means no conversion + elif quality < 6 or quality > 63: + raise cv.Invalid(f"jpeg_quality must be between 6 and 63, got {quality}") + + return config + + CONFIG_SCHEMA = cv.All( cv.ENTITY_BASE_SCHEMA.extend( { @@ -206,7 +236,12 @@ CONFIG_SCHEMA = cv.All( cv.Optional(CONF_RESOLUTION, default="640X480"): cv.enum( FRAME_SIZES, upper=True ), - cv.Optional(CONF_JPEG_QUALITY, default=10): cv.int_range(min=6, max=63), + cv.Optional(CONF_PIXEL_FORMAT, default="JPEG"): cv.enum( + PIXEL_FORMATS, upper=True + ), + cv.Optional(CONF_JPEG_QUALITY, default=0): cv.Any( + cv.one_of(0), cv.int_range(min=6, max=63) + ), cv.Optional(CONF_CONTRAST, default=0): camera_range_param, cv.Optional(CONF_BRIGHTNESS, default=0): camera_range_param, cv.Optional(CONF_SATURATION, default=0): camera_range_param, @@ -270,11 +305,21 @@ CONFIG_SCHEMA = cv.All( ), } ).extend(cv.COMPONENT_SCHEMA), + validate_jpeg_quality, cv.has_exactly_one_key(CONF_I2C_PINS, CONF_I2C_ID), ) def _final_validate(config): + # Check psram requirement for non-JPEG formats + if ( + config.get(CONF_PIXEL_FORMAT, "JPEG") != "JPEG" + and psram_domain not in CORE.loaded_integrations + ): + raise cv.Invalid( + f"Non-JPEG pixel formats require the '{psram_domain}' component for JPEG conversion" + ) + if CONF_I2C_PINS not in config: return fconf = fv.full_config.get() @@ -298,6 +343,7 @@ SETTERS = { CONF_RESET_PIN: "set_reset_pin", CONF_POWER_DOWN_PIN: "set_power_down_pin", # image + CONF_PIXEL_FORMAT: "set_pixel_format", CONF_JPEG_QUALITY: "set_jpeg_quality", CONF_VERTICAL_FLIP: "set_vertical_flip", CONF_HORIZONTAL_MIRROR: "set_horizontal_mirror", @@ -351,6 +397,8 @@ async def to_code(config): cg.add(var.set_frame_size(config[CONF_RESOLUTION])) cg.add_define("USE_CAMERA") + if config[CONF_JPEG_QUALITY] != 0 and config[CONF_PIXEL_FORMAT] != "JPEG": + cg.add_define("USE_ESP32_CAMERA_JPEG_CONVERSION") add_idf_component(name="espressif/esp32-camera", ref="2.1.1") add_idf_sdkconfig_option("CONFIG_SCCB_HARDWARE_I2C_DRIVER_NEW", True) diff --git a/esphome/components/esp32_camera/esp32_camera.cpp b/esphome/components/esp32_camera/esp32_camera.cpp index cfe06b1673..655ae54f0a 100644 --- a/esphome/components/esp32_camera/esp32_camera.cpp +++ b/esphome/components/esp32_camera/esp32_camera.cpp @@ -16,6 +16,74 @@ static constexpr size_t FRAMEBUFFER_TASK_STACK_SIZE = 1792; static constexpr uint32_t FRAME_LOG_INTERVAL_MS = 60000; #endif +static const char *frame_size_to_str(framesize_t size) { + switch (size) { + case FRAMESIZE_QQVGA: + return "160x120 (QQVGA)"; + case FRAMESIZE_QCIF: + return "176x155 (QCIF)"; + case FRAMESIZE_HQVGA: + return "240x176 (HQVGA)"; + case FRAMESIZE_QVGA: + return "320x240 (QVGA)"; + case FRAMESIZE_CIF: + return "400x296 (CIF)"; + case FRAMESIZE_VGA: + return "640x480 (VGA)"; + case FRAMESIZE_SVGA: + return "800x600 (SVGA)"; + case FRAMESIZE_XGA: + return "1024x768 (XGA)"; + case FRAMESIZE_SXGA: + return "1280x1024 (SXGA)"; + case FRAMESIZE_UXGA: + return "1600x1200 (UXGA)"; + case FRAMESIZE_FHD: + return "1920x1080 (FHD)"; + case FRAMESIZE_P_HD: + return "720x1280 (P_HD)"; + case FRAMESIZE_P_3MP: + return "864x1536 (P_3MP)"; + case FRAMESIZE_QXGA: + return "2048x1536 (QXGA)"; + case FRAMESIZE_QHD: + return "2560x1440 (QHD)"; + case FRAMESIZE_WQXGA: + return "2560x1600 (WQXGA)"; + case FRAMESIZE_P_FHD: + return "1080x1920 (P_FHD)"; + case FRAMESIZE_QSXGA: + return "2560x1920 (QSXGA)"; + default: + return "UNKNOWN"; + } +} + +static const char *pixel_format_to_str(pixformat_t format) { + switch (format) { + case PIXFORMAT_RGB565: + return "RGB565"; + case PIXFORMAT_YUV422: + return "YUV422"; + case PIXFORMAT_YUV420: + return "YUV420"; + case PIXFORMAT_GRAYSCALE: + return "GRAYSCALE"; + case PIXFORMAT_JPEG: + return "JPEG"; + case PIXFORMAT_RGB888: + return "RGB888"; + case PIXFORMAT_RAW: + return "RAW"; + case PIXFORMAT_RGB444: + return "RGB444"; + case PIXFORMAT_RGB555: + return "RGB555"; + default: + return "UNKNOWN"; + } +} + /* ---------------- public API (derivated) ---------------- */ void ESP32Camera::setup() { #ifdef USE_I2C @@ -68,64 +136,9 @@ void ESP32Camera::dump_config() { this->name_.c_str(), YESNO(this->is_internal()), conf.pin_d0, conf.pin_d1, conf.pin_d2, conf.pin_d3, conf.pin_d4, conf.pin_d5, conf.pin_d6, conf.pin_d7, conf.pin_vsync, conf.pin_href, conf.pin_pclk, conf.pin_xclk, conf.xclk_freq_hz, conf.pin_sccb_sda, conf.pin_sccb_scl, conf.pin_reset); - switch (this->config_.frame_size) { - case FRAMESIZE_QQVGA: - ESP_LOGCONFIG(TAG, " Resolution: 160x120 (QQVGA)"); - break; - case FRAMESIZE_QCIF: - ESP_LOGCONFIG(TAG, " Resolution: 176x155 (QCIF)"); - break; - case FRAMESIZE_HQVGA: - ESP_LOGCONFIG(TAG, " Resolution: 240x176 (HQVGA)"); - break; - case FRAMESIZE_QVGA: - ESP_LOGCONFIG(TAG, " Resolution: 320x240 (QVGA)"); - break; - case FRAMESIZE_CIF: - ESP_LOGCONFIG(TAG, " Resolution: 400x296 (CIF)"); - break; - case FRAMESIZE_VGA: - ESP_LOGCONFIG(TAG, " Resolution: 640x480 (VGA)"); - break; - case FRAMESIZE_SVGA: - ESP_LOGCONFIG(TAG, " Resolution: 800x600 (SVGA)"); - break; - case FRAMESIZE_XGA: - ESP_LOGCONFIG(TAG, " Resolution: 1024x768 (XGA)"); - break; - case FRAMESIZE_SXGA: - ESP_LOGCONFIG(TAG, " Resolution: 1280x1024 (SXGA)"); - break; - case FRAMESIZE_UXGA: - ESP_LOGCONFIG(TAG, " Resolution: 1600x1200 (UXGA)"); - break; - case FRAMESIZE_FHD: - ESP_LOGCONFIG(TAG, " Resolution: 1920x1080 (FHD)"); - break; - case FRAMESIZE_P_HD: - ESP_LOGCONFIG(TAG, " Resolution: 720x1280 (P_HD)"); - break; - case FRAMESIZE_P_3MP: - ESP_LOGCONFIG(TAG, " Resolution: 864x1536 (P_3MP)"); - break; - case FRAMESIZE_QXGA: - ESP_LOGCONFIG(TAG, " Resolution: 2048x1536 (QXGA)"); - break; - case FRAMESIZE_QHD: - ESP_LOGCONFIG(TAG, " Resolution: 2560x1440 (QHD)"); - break; - case FRAMESIZE_WQXGA: - ESP_LOGCONFIG(TAG, " Resolution: 2560x1600 (WQXGA)"); - break; - case FRAMESIZE_P_FHD: - ESP_LOGCONFIG(TAG, " Resolution: 1080x1920 (P_FHD)"); - break; - case FRAMESIZE_QSXGA: - ESP_LOGCONFIG(TAG, " Resolution: 2560x1920 (QSXGA)"); - break; - default: - break; - } + + ESP_LOGCONFIG(TAG, " Resolution: %s", frame_size_to_str(this->config_.frame_size)); + ESP_LOGCONFIG(TAG, " Pixel Format: %s", pixel_format_to_str(this->config_.pixel_format)); if (this->is_failed()) { ESP_LOGE(TAG, " Setup Failed: %s", esp_err_to_name(this->init_error_)); @@ -184,8 +197,19 @@ void ESP32Camera::loop() { // check if we can return the image if (this->can_return_image_()) { // return image - auto *fb = this->current_image_->get_raw_buffer(); - xQueueSend(this->framebuffer_return_queue_, &fb, portMAX_DELAY); +#ifdef USE_ESP32_CAMERA_JPEG_CONVERSION + if (this->config_.pixel_format != PIXFORMAT_JPEG && this->config_.jpeg_quality > 0) { + // for non-JPEG format, we need to free the data and raw buffer + auto *jpg_buf = this->current_image_->get_data_buffer(); + free(jpg_buf); // NOLINT(cppcoreguidelines-no-malloc) + auto *fb = this->current_image_->get_raw_buffer(); + this->fb_allocator_.deallocate(fb, 1); + } else +#endif + { + auto *fb = this->current_image_->get_raw_buffer(); + xQueueSend(this->framebuffer_return_queue_, &fb, portMAX_DELAY); + } this->current_image_.reset(); } @@ -212,6 +236,38 @@ void ESP32Camera::loop() { xQueueSend(this->framebuffer_return_queue_, &fb, portMAX_DELAY); return; } + +#ifdef USE_ESP32_CAMERA_JPEG_CONVERSION + if (this->config_.pixel_format != PIXFORMAT_JPEG && this->config_.jpeg_quality > 0) { + // for non-JPEG format, we need to convert the frame to JPEG + uint8_t *jpg_buf; + size_t jpg_buf_len; + size_t width = fb->width; + size_t height = fb->height; + struct timeval timestamp = fb->timestamp; + bool ok = frame2jpg(fb, 100 - this->config_.jpeg_quality, &jpg_buf, &jpg_buf_len); + // return the original frame buffer to the queue + xQueueSend(this->framebuffer_return_queue_, &fb, portMAX_DELAY); + if (!ok) { + ESP_LOGE(TAG, "Failed to convert frame to JPEG!"); + return; + } + // create a new camera_fb_t for the JPEG data + fb = this->fb_allocator_.allocate(1); + if (fb == nullptr) { + ESP_LOGE(TAG, "Failed to allocate memory for camera frame buffer!"); + free(jpg_buf); // NOLINT(cppcoreguidelines-no-malloc) + return; + } + memset(fb, 0, sizeof(camera_fb_t)); + fb->buf = jpg_buf; + fb->len = jpg_buf_len; + fb->width = width; + fb->height = height; + fb->format = PIXFORMAT_JPEG; + fb->timestamp = timestamp; + } +#endif this->current_image_ = std::make_shared(fb, this->single_requesters_ | this->stream_requesters_); #if ESPHOME_LOG_LEVEL >= ESPHOME_LOG_LEVEL_VERBOSE @@ -342,6 +398,37 @@ void ESP32Camera::set_frame_size(ESP32CameraFrameSize size) { break; } } +void ESP32Camera::set_pixel_format(ESP32CameraPixelFormat format) { + switch (format) { + case ESP32_PIXEL_FORMAT_RGB565: + this->config_.pixel_format = PIXFORMAT_RGB565; + break; + case ESP32_PIXEL_FORMAT_YUV422: + this->config_.pixel_format = PIXFORMAT_YUV422; + break; + case ESP32_PIXEL_FORMAT_YUV420: + this->config_.pixel_format = PIXFORMAT_YUV420; + break; + case ESP32_PIXEL_FORMAT_GRAYSCALE: + this->config_.pixel_format = PIXFORMAT_GRAYSCALE; + break; + case ESP32_PIXEL_FORMAT_JPEG: + this->config_.pixel_format = PIXFORMAT_JPEG; + break; + case ESP32_PIXEL_FORMAT_RGB888: + this->config_.pixel_format = PIXFORMAT_RGB888; + break; + case ESP32_PIXEL_FORMAT_RAW: + this->config_.pixel_format = PIXFORMAT_RAW; + break; + case ESP32_PIXEL_FORMAT_RGB444: + this->config_.pixel_format = PIXFORMAT_RGB444; + break; + case ESP32_PIXEL_FORMAT_RGB555: + this->config_.pixel_format = PIXFORMAT_RGB555; + break; + } +} void ESP32Camera::set_jpeg_quality(uint8_t quality) { this->config_.jpeg_quality = quality; } void ESP32Camera::set_vertical_flip(bool vertical_flip) { this->vertical_flip_ = vertical_flip; } void ESP32Camera::set_horizontal_mirror(bool horizontal_mirror) { this->horizontal_mirror_ = horizontal_mirror; } diff --git a/esphome/components/esp32_camera/esp32_camera.h b/esphome/components/esp32_camera/esp32_camera.h index eea93b7e01..9fbd3848f2 100644 --- a/esphome/components/esp32_camera/esp32_camera.h +++ b/esphome/components/esp32_camera/esp32_camera.h @@ -41,6 +41,18 @@ enum ESP32CameraFrameSize { ESP32_CAMERA_SIZE_2560X1920, // QSXGA }; +enum ESP32CameraPixelFormat { + ESP32_PIXEL_FORMAT_RGB565, + ESP32_PIXEL_FORMAT_YUV422, + ESP32_PIXEL_FORMAT_YUV420, + ESP32_PIXEL_FORMAT_GRAYSCALE, + ESP32_PIXEL_FORMAT_JPEG, + ESP32_PIXEL_FORMAT_RGB888, + ESP32_PIXEL_FORMAT_RAW, + ESP32_PIXEL_FORMAT_RGB444, + ESP32_PIXEL_FORMAT_RGB555, +}; + enum ESP32AgcGainCeiling { ESP32_GAINCEILING_2X = GAINCEILING_2X, ESP32_GAINCEILING_4X = GAINCEILING_4X, @@ -126,6 +138,7 @@ class ESP32Camera : public camera::Camera { void set_reset_pin(uint8_t pin); void set_power_down_pin(uint8_t pin); /* -- image */ + void set_pixel_format(ESP32CameraPixelFormat format); void set_frame_size(ESP32CameraFrameSize size); void set_jpeg_quality(uint8_t quality); void set_vertical_flip(bool vertical_flip); @@ -220,6 +233,7 @@ class ESP32Camera : public camera::Camera { #ifdef USE_I2C i2c::InternalI2CBus *i2c_bus_{nullptr}; #endif // USE_I2C + RAMAllocator fb_allocator_{RAMAllocator::ALLOC_INTERNAL}; }; class ESP32CameraImageTrigger : public Trigger, public camera::CameraListener { diff --git a/esphome/components/mdns/__init__.py b/esphome/components/mdns/__init__.py index 3088d8ad7e..f87f929615 100644 --- a/esphome/components/mdns/__init__.py +++ b/esphome/components/mdns/__init__.py @@ -21,7 +21,7 @@ DEPENDENCIES = ["network"] # Components that create mDNS services at runtime # IMPORTANT: If you add a new component here, you must also update the corresponding # #ifdef blocks in mdns_component.cpp compile_records_() method -COMPONENTS_WITH_MDNS_SERVICES = ("api", "prometheus", "web_server") +COMPONENTS_WITH_MDNS_SERVICES = ("api", "prometheus", "sendspin", "web_server") mdns_ns = cg.esphome_ns.namespace("mdns") MDNSComponent = mdns_ns.class_("MDNSComponent", cg.Component) diff --git a/esphome/components/mdns/mdns_component.cpp b/esphome/components/mdns/mdns_component.cpp index 47db92610a..5e5e1279d9 100644 --- a/esphome/components/mdns/mdns_component.cpp +++ b/esphome/components/mdns/mdns_component.cpp @@ -29,6 +29,10 @@ static const char *const TAG = "mdns"; #define USE_WEBSERVER_PORT 80 // NOLINT #endif +#ifndef USE_SENDSPIN_PORT +#define USE_SENDSPIN_PORT 8928 // NOLINT +#endif + // Define all constant strings using the macro MDNS_STATIC_CONST_CHAR(SERVICE_TCP, "_tcp"); @@ -150,6 +154,18 @@ void MDNSComponent::compile_records_(StaticVector using TurnOnAction = MediaPlayerCommandAction; template using TurnOffAction = MediaPlayerCommandAction; +template +using NextAction = MediaPlayerCommandAction; +template +using PreviousAction = MediaPlayerCommandAction; +template +using MuteAction = MediaPlayerCommandAction; +template +using UnmuteAction = MediaPlayerCommandAction; +template +using RepeatOffAction = MediaPlayerCommandAction; +template +using RepeatOneAction = MediaPlayerCommandAction; +template +using RepeatAllAction = MediaPlayerCommandAction; +template +using ShuffleAction = MediaPlayerCommandAction; +template +using UnshuffleAction = MediaPlayerCommandAction; +template +using GroupJoinAction = MediaPlayerCommandAction; +template +using ClearPlaylistAction = MediaPlayerCommandAction; template class PlayMediaAction : public Action, public Parented { TEMPLATABLE_VALUE(std::string, media_url) @@ -105,5 +127,10 @@ template class IsOffCondition : public Condition, public bool check(const Ts &...x) override { return this->parent_->state == MediaPlayerState::MEDIA_PLAYER_STATE_OFF; } }; +template class IsMutedCondition : public Condition, public Parented { + public: + bool check(const Ts &...x) override { return this->parent_->is_muted(); } +}; + } // namespace media_player } // namespace esphome diff --git a/esphome/components/media_player/media_player.cpp b/esphome/components/media_player/media_player.cpp index 17d9b054da..a53d598b0f 100644 --- a/esphome/components/media_player/media_player.cpp +++ b/esphome/components/media_player/media_player.cpp @@ -60,11 +60,39 @@ const char *media_player_command_to_string(MediaPlayerCommand command) { return "TURN_ON"; case MEDIA_PLAYER_COMMAND_TURN_OFF: return "TURN_OFF"; + case MEDIA_PLAYER_COMMAND_NEXT: + return "NEXT"; + case MEDIA_PLAYER_COMMAND_PREVIOUS: + return "PREVIOUS"; + case MEDIA_PLAYER_COMMAND_REPEAT_ALL: + return "REPEAT_ALL"; + case MEDIA_PLAYER_COMMAND_SHUFFLE: + return "SHUFFLE"; + case MEDIA_PLAYER_COMMAND_UNSHUFFLE: + return "UNSHUFFLE"; + case MEDIA_PLAYER_COMMAND_GROUP_JOIN: + return "GROUP_JOIN"; default: return "UNKNOWN"; } } +void MediaPlayerTraits::set_supports_pause(bool supports_pause) { + if (supports_pause) { + this->feature_flags_ |= MediaPlayerEntityFeature::PAUSE | MediaPlayerEntityFeature::PLAY; + } else { + this->feature_flags_ &= ~(MediaPlayerEntityFeature::PAUSE | MediaPlayerEntityFeature::PLAY); + } +} + +void MediaPlayerTraits::set_supports_turn_off_on(bool supports_turn_off_on) { + if (supports_turn_off_on) { + this->feature_flags_ |= MediaPlayerEntityFeature::TURN_OFF | MediaPlayerEntityFeature::TURN_ON; + } else { + this->feature_flags_ &= ~(MediaPlayerEntityFeature::TURN_OFF | MediaPlayerEntityFeature::TURN_ON); + } +} + void MediaPlayerCall::validate_() { if (this->media_url_.has_value()) { if (this->command_.has_value() && this->command_.value() != MEDIA_PLAYER_COMMAND_ENQUEUE) { @@ -125,6 +153,30 @@ MediaPlayerCall &MediaPlayerCall::set_command(const char *command) { this->set_command(MEDIA_PLAYER_COMMAND_TURN_ON); } else if (ESPHOME_strcasecmp_P(command, ESPHOME_PSTR("TURN_OFF")) == 0) { this->set_command(MEDIA_PLAYER_COMMAND_TURN_OFF); + } else if (ESPHOME_strcasecmp_P(command, ESPHOME_PSTR("VOLUME_UP")) == 0) { + this->set_command(MEDIA_PLAYER_COMMAND_VOLUME_UP); + } else if (ESPHOME_strcasecmp_P(command, ESPHOME_PSTR("VOLUME_DOWN")) == 0) { + this->set_command(MEDIA_PLAYER_COMMAND_VOLUME_DOWN); + } else if (ESPHOME_strcasecmp_P(command, ESPHOME_PSTR("ENQUEUE")) == 0) { + this->set_command(MEDIA_PLAYER_COMMAND_ENQUEUE); + } else if (ESPHOME_strcasecmp_P(command, ESPHOME_PSTR("REPEAT_ONE")) == 0) { + this->set_command(MEDIA_PLAYER_COMMAND_REPEAT_ONE); + } else if (ESPHOME_strcasecmp_P(command, ESPHOME_PSTR("REPEAT_OFF")) == 0) { + this->set_command(MEDIA_PLAYER_COMMAND_REPEAT_OFF); + } else if (ESPHOME_strcasecmp_P(command, ESPHOME_PSTR("REPEAT_ALL")) == 0) { + this->set_command(MEDIA_PLAYER_COMMAND_REPEAT_ALL); + } else if (ESPHOME_strcasecmp_P(command, ESPHOME_PSTR("CLEAR_PLAYLIST")) == 0) { + this->set_command(MEDIA_PLAYER_COMMAND_CLEAR_PLAYLIST); + } else if (ESPHOME_strcasecmp_P(command, ESPHOME_PSTR("NEXT")) == 0) { + this->set_command(MEDIA_PLAYER_COMMAND_NEXT); + } else if (ESPHOME_strcasecmp_P(command, ESPHOME_PSTR("PREVIOUS")) == 0) { + this->set_command(MEDIA_PLAYER_COMMAND_PREVIOUS); + } else if (ESPHOME_strcasecmp_P(command, ESPHOME_PSTR("SHUFFLE")) == 0) { + this->set_command(MEDIA_PLAYER_COMMAND_SHUFFLE); + } else if (ESPHOME_strcasecmp_P(command, ESPHOME_PSTR("UNSHUFFLE")) == 0) { + this->set_command(MEDIA_PLAYER_COMMAND_UNSHUFFLE); + } else if (ESPHOME_strcasecmp_P(command, ESPHOME_PSTR("GROUP_JOIN")) == 0) { + this->set_command(MEDIA_PLAYER_COMMAND_GROUP_JOIN); } else { ESP_LOGW(TAG, "'%s' - Unrecognized command %s", this->parent_->get_name().c_str(), command); } diff --git a/esphome/components/media_player/media_player.h b/esphome/components/media_player/media_player.h index f75a68dd85..3509747718 100644 --- a/esphome/components/media_player/media_player.h +++ b/esphome/components/media_player/media_player.h @@ -58,6 +58,12 @@ enum MediaPlayerCommand : uint8_t { MEDIA_PLAYER_COMMAND_CLEAR_PLAYLIST = 11, MEDIA_PLAYER_COMMAND_TURN_ON = 12, MEDIA_PLAYER_COMMAND_TURN_OFF = 13, + MEDIA_PLAYER_COMMAND_NEXT = 14, + MEDIA_PLAYER_COMMAND_PREVIOUS = 15, + MEDIA_PLAYER_COMMAND_REPEAT_ALL = 16, + MEDIA_PLAYER_COMMAND_SHUFFLE = 17, + MEDIA_PLAYER_COMMAND_UNSHUFFLE = 18, + MEDIA_PLAYER_COMMAND_GROUP_JOIN = 19, }; const char *media_player_command_to_string(MediaPlayerCommand command); @@ -74,38 +80,40 @@ struct MediaPlayerSupportedFormat { uint32_t sample_bytes; }; +// Base features always reported for all media players +static constexpr uint32_t BASE_MEDIA_PLAYER_FEATURES = + MediaPlayerEntityFeature::PLAY_MEDIA | MediaPlayerEntityFeature::BROWSE_MEDIA | MediaPlayerEntityFeature::STOP | + MediaPlayerEntityFeature::VOLUME_SET | MediaPlayerEntityFeature::VOLUME_MUTE | + MediaPlayerEntityFeature::MEDIA_ANNOUNCE; + class MediaPlayer; class MediaPlayerTraits { public: MediaPlayerTraits() = default; - void set_supports_pause(bool supports_pause) { this->supports_pause_ = supports_pause; } - bool get_supports_pause() const { return this->supports_pause_; } - - void set_supports_turn_off_on(bool supports_turn_off_on) { this->supports_turn_off_on_ = supports_turn_off_on; } - bool get_supports_turn_off_on() const { return this->supports_turn_off_on_; } + uint32_t get_feature_flags() const { return this->feature_flags_; } + void add_feature_flags(uint32_t feature_flags) { this->feature_flags_ |= feature_flags; } + void clear_feature_flags(uint32_t feature_flags) { this->feature_flags_ &= ~feature_flags; } + // Returns true only if all specified flags are set + bool has_feature_flags(uint32_t feature_flags) const { + return (this->feature_flags_ & feature_flags) == feature_flags; + } std::vector &get_supported_formats() { return this->supported_formats_; } - uint32_t get_feature_flags() const { - uint32_t flags = 0; - flags |= MediaPlayerEntityFeature::PLAY_MEDIA | MediaPlayerEntityFeature::BROWSE_MEDIA | - MediaPlayerEntityFeature::STOP | MediaPlayerEntityFeature::VOLUME_SET | - MediaPlayerEntityFeature::VOLUME_MUTE | MediaPlayerEntityFeature::MEDIA_ANNOUNCE; - if (this->get_supports_pause()) { - flags |= MediaPlayerEntityFeature::PAUSE | MediaPlayerEntityFeature::PLAY; - } - if (this->get_supports_turn_off_on()) { - flags |= MediaPlayerEntityFeature::TURN_OFF | MediaPlayerEntityFeature::TURN_ON; - } - return flags; + // Legacy setters/getters are kept for backward compatibility + void set_supports_pause(bool supports_pause); + bool get_supports_pause() const { return this->has_feature_flags(MediaPlayerEntityFeature::PAUSE); } + + void set_supports_turn_off_on(bool supports_turn_off_on); + bool get_supports_turn_off_on() const { + return this->has_feature_flags(MediaPlayerEntityFeature::TURN_ON | MediaPlayerEntityFeature::TURN_OFF); } protected: std::vector supported_formats_{}; - bool supports_pause_{false}; - bool supports_turn_off_on_{false}; + uint32_t feature_flags_{BASE_MEDIA_PLAYER_FEATURES}; }; class MediaPlayerCall { diff --git a/esphome/components/speaker/media_player/__init__.py b/esphome/components/speaker/media_player/__init__.py index 034312236c..b302bd9b23 100644 --- a/esphome/components/speaker/media_player/__init__.py +++ b/esphome/components/speaker/media_player/__init__.py @@ -26,7 +26,6 @@ from esphome.const import ( from esphome.core import CORE, HexInt from esphome.core.entity_helpers import inherit_property_from from esphome.external_files import download_content -from esphome.final_validate import full_config _LOGGER = logging.getLogger(__name__) @@ -37,6 +36,10 @@ DEPENDENCIES = ["network"] CODEOWNERS = ["@kahrendt", "@synesthesiam"] DOMAIN = "media_player" +CODEC_SUPPORT_ALL = "all" +CODEC_SUPPORT_NEEDED = "needed" +CODEC_SUPPORT_NONE = "none" + TYPE_LOCAL = "local" TYPE_WEB = "web" @@ -110,6 +113,8 @@ def _get_supported_format_struct(pipeline, type): args.append(("format", "flac")) elif pipeline[CONF_FORMAT] == "MP3": args.append(("format", "mp3")) + elif pipeline[CONF_FORMAT] == "OPUS": + args.append(("format", "opus")) elif pipeline[CONF_FORMAT] == "WAV": args.append(("format", "wav")) @@ -173,6 +178,13 @@ def _read_audio_file_and_type(file_config): media_file_type = audio.AUDIO_FILE_TYPE_ENUM["MP3"] elif file_type in ("flac"): media_file_type = audio.AUDIO_FILE_TYPE_ENUM["FLAC"] + elif ( + file_type in ("ogg") + and len(data) >= 36 + and data.startswith(b"OggS") + and data[28:36] == b"OpusHead" + ): + media_file_type = audio.AUDIO_FILE_TYPE_ENUM["OPUS"] return data, media_file_type @@ -199,6 +211,10 @@ def _validate_pipeline(config): inherit_property_from(CONF_NUM_CHANNELS, CONF_SPEAKER)(config) inherit_property_from(CONF_SAMPLE_RATE, CONF_SPEAKER)(config) + # Opus only supports 48 kHz + if config.get(CONF_FORMAT) == "OPUS" and config.get(CONF_SAMPLE_RATE) != 48000: + raise cv.Invalid("Opus only supports a sample rate of 48000 Hz") + # Validate the transcoder settings is compatible with the speaker audio.final_validate_audio_schema( "speaker media_player", @@ -225,12 +241,27 @@ def _validate_repeated_speaker(config): def _final_validate(config): - # Default to using codec if psram is enabled - if (use_codec := config.get(CONF_CODEC_SUPPORT_ENABLED)) is None: - use_codec = psram.DOMAIN in full_config.get() - conf_id = config[CONF_ID].id - core_data = CORE.data.setdefault(DOMAIN, {conf_id: {}}) - core_data[conf_id][CONF_CODEC_SUPPORT_ENABLED] = use_codec + # Normalize boolean values to string equivalents + codec_mode = config[CONF_CODEC_SUPPORT_ENABLED] + if codec_mode is True: + codec_mode = CODEC_SUPPORT_ALL + elif codec_mode is False: + codec_mode = CODEC_SUPPORT_NONE + + use_codec = codec_mode != CODEC_SUPPORT_NONE + + # In "needed" mode, collect formats from pipelines and files + needed_formats = set() + need_all = False + if codec_mode == CODEC_SUPPORT_NEEDED: + for pipeline_key in (CONF_ANNOUNCEMENT_PIPELINE, CONF_MEDIA_PIPELINE): + if pipeline := config.get(pipeline_key): + fmt = pipeline[CONF_FORMAT] + if fmt == "NONE": + # No preferred format means any format could arrive + need_all = True + else: + needed_formats.add(fmt) for file_config in config.get(CONF_FILES, []): _, media_file_type = _read_audio_file_and_type(file_config) @@ -243,6 +274,26 @@ def _final_validate(config): raise cv.Invalid( f"Unsupported local media file type, set {CONF_CODEC_SUPPORT_ENABLED} to true or convert the media file to wav" ) + # In "needed" mode, add file format to needed codecs + if codec_mode == CODEC_SUPPORT_NEEDED: + for fmt_name, fmt_enum in audio.AUDIO_FILE_TYPE_ENUM.items(): + if str(media_file_type) == str(fmt_enum): + if fmt_name not in ("WAV", "NONE"): + needed_formats.add(fmt_name) + break + + # Request codec support + if codec_mode == CODEC_SUPPORT_ALL or need_all: + audio.request_flac_support() + audio.request_mp3_support() + audio.request_opus_support() + elif codec_mode == CODEC_SUPPORT_NEEDED: + if "FLAC" in needed_formats: + audio.request_flac_support() + if "MP3" in needed_formats: + audio.request_mp3_support() + if "OPUS" in needed_formats: + audio.request_opus_support() return config @@ -307,7 +358,17 @@ CONFIG_SCHEMA = cv.All( cv.Optional(CONF_BUFFER_SIZE, default=1000000): cv.int_range( min=4000, max=4000000 ), - cv.Optional(CONF_CODEC_SUPPORT_ENABLED): cv.boolean, + cv.Optional( + CONF_CODEC_SUPPORT_ENABLED, default=CODEC_SUPPORT_NEEDED + ): cv.Any( + cv.boolean, + cv.one_of( + CODEC_SUPPORT_ALL, + CODEC_SUPPORT_NEEDED, + CODEC_SUPPORT_NONE, + lower=True, + ), + ), cv.Optional(CONF_FILES): cv.ensure_list(MEDIA_FILE_TYPE_SCHEMA), cv.Optional(CONF_TASK_STACK_IN_PSRAM): cv.All( cv.boolean, cv.requires_component(psram.DOMAIN) @@ -340,11 +401,6 @@ FINAL_VALIDATE_SCHEMA = cv.All( async def to_code(config): - if CORE.data[DOMAIN][config[CONF_ID].id][CONF_CODEC_SUPPORT_ENABLED]: - # Compile all supported audio codecs - cg.add_define("USE_AUDIO_FLAC_SUPPORT", True) - cg.add_define("USE_AUDIO_MP3_SUPPORT", True) - var = await media_player.new_media_player(config) await cg.register_component(var, config) diff --git a/esphome/components/speaker/media_player/audio_pipeline.cpp b/esphome/components/speaker/media_player/audio_pipeline.cpp index 8be37d740a..177743feb1 100644 --- a/esphome/components/speaker/media_player/audio_pipeline.cpp +++ b/esphome/components/speaker/media_player/audio_pipeline.cpp @@ -13,7 +13,12 @@ namespace speaker { static const uint32_t INITIAL_BUFFER_MS = 1000; // Start playback after buffering this duration of the file static const uint32_t READ_TASK_STACK_SIZE = 5 * 1024; +// Opus decoding uses more stack than other codecs +#ifdef USE_AUDIO_OPUS_SUPPORT +static const uint32_t DECODE_TASK_STACK_SIZE = 5 * 1024; +#else static const uint32_t DECODE_TASK_STACK_SIZE = 3 * 1024; +#endif static const uint32_t INFO_ERROR_QUEUE_COUNT = 5; @@ -552,6 +557,11 @@ void AudioPipeline::decode_task(void *params) { case audio::AudioFileType::FLAC: initial_bytes_to_buffer /= 2; // Estimate the FLAC compression factor is 2 break; +#endif +#ifdef USE_AUDIO_OPUS_SUPPORT + case audio::AudioFileType::OPUS: + initial_bytes_to_buffer /= 8; // Estimate the Opus compression factor is 8 + break; #endif default: break; diff --git a/esphome/components/web_server/web_server.cpp b/esphome/components/web_server/web_server.cpp index 0e8d40efff..99ad3bf4e2 100644 --- a/esphome/components/web_server/web_server.cpp +++ b/esphome/components/web_server/web_server.cpp @@ -1549,16 +1549,16 @@ json::SerializationBuffer<> WebServer::climate_json_(climate::Climate *obj, Json for (auto const &custom_preset : traits.get_supported_custom_presets()) opt.add(custom_preset); } + root[ESPHOME_F("max_temp")] = + (value_accuracy_to_buf(temp_buf, traits.get_visual_max_temperature(), target_accuracy), temp_buf); + root[ESPHOME_F("min_temp")] = + (value_accuracy_to_buf(temp_buf, traits.get_visual_min_temperature(), target_accuracy), temp_buf); + root[ESPHOME_F("step")] = traits.get_visual_target_temperature_step(); this->add_sorting_info_(root, obj); } bool has_state = false; root[ESPHOME_F("mode")] = PSTR_LOCAL(climate_mode_to_string(obj->mode)); - root[ESPHOME_F("max_temp")] = - (value_accuracy_to_buf(temp_buf, traits.get_visual_max_temperature(), target_accuracy), temp_buf); - root[ESPHOME_F("min_temp")] = - (value_accuracy_to_buf(temp_buf, traits.get_visual_min_temperature(), target_accuracy), temp_buf); - root[ESPHOME_F("step")] = traits.get_visual_target_temperature_step(); if (traits.has_feature_flags(climate::CLIMATE_SUPPORTS_ACTION)) { root[ESPHOME_F("action")] = PSTR_LOCAL(climate_action_to_string(obj->action)); root[ESPHOME_F("state")] = root[ESPHOME_F("action")]; @@ -1602,8 +1602,8 @@ json::SerializationBuffer<> WebServer::climate_json_(climate::Climate *obj, Json } if (traits.has_feature_flags(climate::CLIMATE_SUPPORTS_CURRENT_HUMIDITY)) { root[ESPHOME_F("current_humidity")] = std::isnan(obj->current_humidity) - ? "NA" - : (value_accuracy_to_buf(temp_buf, obj->current_humidity, 0), temp_buf); + ? "NA" + : (value_accuracy_to_buf(temp_buf, obj->current_humidity, 0), temp_buf); } if (traits.has_feature_flags(climate::CLIMATE_SUPPORTS_TWO_POINT_TARGET_TEMPERATURE | climate::CLIMATE_REQUIRES_TWO_POINT_TARGET_TEMPERATURE)) { diff --git a/esphome/core/defines.h b/esphome/core/defines.h index 693b223147..c0d513d71a 100644 --- a/esphome/core/defines.h +++ b/esphome/core/defines.h @@ -43,6 +43,7 @@ #define USE_DEVICES #define USE_DISPLAY #define USE_ENTITY_ICON +#define USE_ESP32_CAMERA_JPEG_CONVERSION #define USE_ESP32_HOSTED #define USE_ESP32_IMPROV_STATE_CALLBACK #define USE_EVENT @@ -130,6 +131,7 @@ #define USE_AUDIO_DAC #define USE_AUDIO_FLAC_SUPPORT #define USE_AUDIO_MP3_SUPPORT +#define USE_AUDIO_OPUS_SUPPORT #define USE_API #define USE_API_CLIENT_CONNECTED_TRIGGER #define USE_API_CLIENT_DISCONNECTED_TRIGGER @@ -211,6 +213,8 @@ #define USE_ESP32_IMPROV_NEXT_URL #define USE_MICROPHONE #define USE_PSRAM +#define USE_SENDSPIN +#define USE_SENDSPIN_PORT 8928 // NOLINT #define USE_SOCKET_IMPL_BSD_SOCKETS #define USE_SOCKET_SELECT_SUPPORT #define USE_WAKE_LOOP_THREADSAFE diff --git a/esphome/idf_component.yml b/esphome/idf_component.yml index f39ea9b3ae..83b2d9d95c 100644 --- a/esphome/idf_component.yml +++ b/esphome/idf_component.yml @@ -3,6 +3,8 @@ dependencies: version: "7.4.2" esphome/esp-audio-libs: version: 2.0.3 + esphome/micro-opus: + version: 0.3.3 espressif/esp-tflite-micro: version: 1.3.3~1 espressif/esp32-camera: diff --git a/tests/components/media_player/common.yaml b/tests/components/media_player/common.yaml index 763bc231c0..c83ee89ad4 100644 --- a/tests/components/media_player/common.yaml +++ b/tests/components/media_player/common.yaml @@ -23,8 +23,27 @@ media_player: - media_player.stop: - media_player.stop: announcement: true + on_announcement: + - media_player.play: + on_turn_on: + - media_player.play: + on_turn_off: + - media_player.stop: on_pause: - media_player.toggle: + - media_player.turn_on: + - media_player.turn_off: + - media_player.next: + - media_player.previous: + - media_player.mute: + - media_player.unmute: + - media_player.repeat_off: + - media_player.repeat_one: + - media_player.repeat_all: + - media_player.shuffle: + - media_player.unshuffle: + - media_player.group_join: + - media_player.clear_playlist: - wait_until: media_player.is_idle: - wait_until: @@ -33,6 +52,12 @@ media_player: media_player.is_announcing: - wait_until: media_player.is_paused: + - wait_until: + media_player.is_on: + - wait_until: + media_player.is_off: + - wait_until: + media_player.is_muted: - media_player.volume_up: - media_player.volume_down: - media_player.volume_set: 50% diff --git a/tests/components/speaker/audio_dac.esp32-ard.yaml b/tests/components/speaker/audio_dac.esp32-ard.yaml deleted file mode 100644 index 3f5d1bba7c..0000000000 --- a/tests/components/speaker/audio_dac.esp32-ard.yaml +++ /dev/null @@ -1,10 +0,0 @@ -substitutions: - i2s_bclk_pin: GPIO27 - i2s_lrclk_pin: GPIO26 - i2s_mclk_pin: GPIO25 - i2s_dout_pin: GPIO23 - -packages: - i2c: !include ../../test_build_components/common/i2c/esp32-ard.yaml - -<<: !include common-audio_dac.yaml diff --git a/tests/components/speaker/common-media_player.yaml b/tests/components/speaker/common-media_player.yaml index edc9f670fc..c958c0d912 100644 --- a/tests/components/speaker/common-media_player.yaml +++ b/tests/components/speaker/common-media_player.yaml @@ -1,5 +1,11 @@ <<: !include common.yaml +wifi: + ap: + +psram: + mode: quad + media_player: - platform: speaker id: speaker_media_player_id @@ -10,3 +16,4 @@ media_player: volume_max: 0.95 volume_min: 0.0 task_stack_in_psram: true + codec_support_enabled: all diff --git a/tests/components/speaker/media_player.esp32-s3-idf.yaml b/tests/components/speaker/media_player.esp32-s3-idf.yaml deleted file mode 100644 index b3eec04d23..0000000000 --- a/tests/components/speaker/media_player.esp32-s3-idf.yaml +++ /dev/null @@ -1,9 +0,0 @@ -substitutions: - scl_pin: GPIO2 - sda_pin: GPIO3 - i2s_bclk_pin: GPIO4 - i2s_lrclk_pin: GPIO5 - i2s_mclk_pin: GPIO6 - i2s_dout_pin: GPIO7 - -<<: !include common-media_player.yaml diff --git a/tests/components/speaker/audio_dac.esp32-idf.yaml b/tests/components/speaker/test-audio_dac.esp32-idf.yaml similarity index 100% rename from tests/components/speaker/audio_dac.esp32-idf.yaml rename to tests/components/speaker/test-audio_dac.esp32-idf.yaml diff --git a/tests/components/speaker/media_player.esp32-idf.yaml b/tests/components/speaker/test-media_player.esp32-idf.yaml similarity index 100% rename from tests/components/speaker/media_player.esp32-idf.yaml rename to tests/components/speaker/test-media_player.esp32-idf.yaml diff --git a/tests/components/speaker/test.esp32-ard.yaml b/tests/components/speaker/test.esp32-ard.yaml deleted file mode 100644 index 13350cd097..0000000000 --- a/tests/components/speaker/test.esp32-ard.yaml +++ /dev/null @@ -1,10 +0,0 @@ -substitutions: - i2s_bclk_pin: GPIO27 - i2s_lrclk_pin: GPIO26 - i2s_mclk_pin: GPIO25 - i2s_dout_pin: GPIO4 - -packages: - i2c: !include ../../test_build_components/common/i2c/esp32-ard.yaml - -<<: !include common.yaml diff --git a/tests/test_build_components/common/i2c_camera/esp32-idf.yaml b/tests/test_build_components/common/i2c_camera/esp32-idf.yaml index 443ebbebd9..07ab6cdc8d 100644 --- a/tests/test_build_components/common/i2c_camera/esp32-idf.yaml +++ b/tests/test_build_components/common/i2c_camera/esp32-idf.yaml @@ -30,6 +30,7 @@ esp32_camera: resolution: 640x480 jpeg_quality: 10 frame_buffer_location: PSRAM + pixel_format: JPEG on_image: then: - lambda: |-