From afbea937435c2cc203f9d494845c478e73961003 Mon Sep 17 00:00:00 2001 From: Christian Jensen Date: Sat, 4 Apr 2026 22:58:32 -0700 Subject: [PATCH 1/4] Add Waveshare ESP32-S3 Audio Board Add configuration for the Waveshare ESP32-S3 Audio Board as a wake word voice assistant. The board features an ES8311 DAC, ES7210 ADC, TCA9555 I/O expander, 7-LED WS2812 RGB ring, 3 buttons, and a speaker amplifier. --- .github/workflows/build.yml | 1 + .../waveshare-esp32-s3-audio.factory.yaml | 24 + .../waveshare-esp32-s3-audio.yaml | 426 ++++++++++++++++++ 3 files changed, 451 insertions(+) create mode 100644 waveshare-esp32-s3-audio/waveshare-esp32-s3-audio.factory.yaml create mode 100644 waveshare-esp32-s3-audio/waveshare-esp32-s3-audio.yaml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d9ce8988..f1c3ed9d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -22,6 +22,7 @@ jobs: esp32-s3-box-lite/esp32-s3-box-lite.factory.yaml esp32-s3-box-3/esp32-s3-box-3.factory.yaml m5stack-atom-echo/m5stack-atom-echo.factory.yaml + waveshare-esp32-s3-audio/waveshare-esp32-s3-audio.factory.yaml esphome-version: 2026.3.2 release-summary: ${{ github.event_name == 'release' && github.event.release.body || '' }} release-url: ${{ github.event_name == 'release' && github.event.release.html_url || '' }} diff --git a/waveshare-esp32-s3-audio/waveshare-esp32-s3-audio.factory.yaml b/waveshare-esp32-s3-audio/waveshare-esp32-s3-audio.factory.yaml new file mode 100644 index 00000000..fe166654 --- /dev/null +++ b/waveshare-esp32-s3-audio/waveshare-esp32-s3-audio.factory.yaml @@ -0,0 +1,24 @@ +packages: + waveshare-esp32-s3-audio: !include waveshare-esp32-s3-audio.yaml + +esphome: + project: + name: waveshare.esp32-s3-audio-wake-word-voice-assistant + version: dev + +ota: + - platform: http_request + id: ota_http_request + +update: + - platform: http_request + id: update_http_request + name: Firmware + source: https://firmware.esphome.io/wake-word-voice-assistant/waveshare-esp32-s3-audio/manifest.json + +http_request: + +dashboard_import: + package_import_url: github://esphome/wake-word-voice-assistants/waveshare-esp32-s3-audio/waveshare-esp32-s3-audio.yaml@main + +improv_serial: diff --git a/waveshare-esp32-s3-audio/waveshare-esp32-s3-audio.yaml b/waveshare-esp32-s3-audio/waveshare-esp32-s3-audio.yaml new file mode 100644 index 00000000..85e6f15a --- /dev/null +++ b/waveshare-esp32-s3-audio/waveshare-esp32-s3-audio.yaml @@ -0,0 +1,426 @@ +substitutions: + name: waveshare-esp32-s3-audio + friendly_name: Waveshare ESP32-S3 Audio + +esphome: + name: ${name} + name_add_mac_suffix: true + friendly_name: ${friendly_name} + min_version: 2025.5.0 + +esp32: + board: esp32-s3-devkitc-1 + variant: esp32s3 + flash_size: 16MB + framework: + type: esp-idf + +psram: + mode: octal + speed: 80MHz + +logger: +api: + +ota: + - platform: esphome + id: ota_esphome + +wifi: + ap: + +captive_portal: + +button: + - platform: factory_reset + id: factory_reset_btn + name: Factory reset + +i2c: + scl: GPIO10 + sda: GPIO11 + +i2s_audio: + - id: i2s_audio_bus + i2s_mclk_pin: GPIO12 + i2s_bclk_pin: GPIO13 + i2s_lrclk_pin: GPIO14 + +audio_adc: + - platform: es7210 + id: es7210_adc + +microphone: + - platform: i2s_audio + id: board_microphone + i2s_audio_id: i2s_audio_bus + i2s_din_pin: GPIO15 + adc_type: external + +audio_dac: + - platform: es8311 + id: es8311_dac + +speaker: + - platform: i2s_audio + id: board_speaker + i2s_audio_id: i2s_audio_bus + i2s_dout_pin: GPIO16 + dac_type: external + audio_dac: es8311_dac + buffer_duration: + seconds: 20 + +tca9555: + - id: exio + address: 0x20 + +switch: + - platform: gpio + id: pa_enable + name: Speaker Amplifier + pin: + tca9555: exio + number: 8 + mode: OUTPUT + restore_mode: ALWAYS_ON + - platform: template + name: Use listen light + id: use_listen_light + optimistic: true + restore_mode: RESTORE_DEFAULT_ON + entity_category: config + on_turn_on: + - script.execute: reset_led + on_turn_off: + - script.execute: reset_led + - platform: template + id: timer_ringing + optimistic: true + restore_mode: ALWAYS_OFF + on_turn_off: + - lambda: |- + id(board_media_player) + ->make_call() + .set_command(media_player::MediaPlayerCommand::MEDIA_PLAYER_COMMAND_REPEAT_OFF) + .set_announcement(true) + .perform(); + id(board_media_player)->set_playlist_delay_ms(speaker::AudioPipelineType::ANNOUNCEMENT, 0); + - media_player.stop: + announcement: true + on_turn_on: + - lambda: |- + id(board_media_player) + ->make_call() + .set_command(media_player::MediaPlayerCommand::MEDIA_PLAYER_COMMAND_REPEAT_ONE) + .set_announcement(true) + .perform(); + id(board_media_player)->set_playlist_delay_ms(speaker::AudioPipelineType::ANNOUNCEMENT, 1000); + - media_player.speaker.play_on_device_media_file: + media_file: timer_finished_wave_file + announcement: true + - delay: 15min + - switch.turn_off: timer_ringing + +media_player: + - platform: speaker + name: None + id: board_media_player + announcement_pipeline: + speaker: board_speaker + files: + - id: timer_finished_wave_file + file: https://github.com/esphome/wake-word-voice-assistants/raw/main/sounds/timer_finished.wav + on_announcement: + - if: + condition: + - microphone.is_capturing: + then: + - script.execute: stop_wake_word + - light.turn_on: + id: rgb_ring + blue: 100% + red: 0% + green: 0% + brightness: 100% + effect: none + on_idle: + - script.execute: start_wake_word + - script.execute: reset_led + +binary_sensor: + - platform: gpio + name: Volume Down + id: key1 + pin: + tca9555: exio + number: 9 + mode: INPUT + inverted: true + on_press: + - media_player.volume_down: board_media_player + + - platform: gpio + name: Action + id: key2 + disabled_by_default: true + entity_category: diagnostic + pin: + tca9555: exio + number: 10 + mode: INPUT + inverted: true + on_multi_click: + - timing: + - ON for at least 50ms + - OFF for at least 50ms + then: + - if: + condition: + switch.is_on: timer_ringing + then: + - switch.turn_off: timer_ringing + else: + - script.execute: start_wake_word + - timing: + - ON for at least 10s + then: + - button.press: factory_reset_btn + + - platform: gpio + name: Volume Up + id: key3 + pin: + tca9555: exio + number: 11 + mode: INPUT + inverted: true + on_press: + - media_player.volume_up: board_media_player + +light: + - platform: esp32_rmt_led_strip + id: rgb_ring + name: None + disabled_by_default: true + entity_category: config + pin: GPIO38 + default_transition_length: 0s + num_leds: 7 + chipset: ws2812 + rgb_order: RGB + effects: + - pulse: + name: "Slow Pulse" + transition_length: 250ms + update_interval: 250ms + min_brightness: 50% + max_brightness: 100% + - pulse: + name: "Fast Pulse" + transition_length: 100ms + update_interval: 100ms + min_brightness: 50% + max_brightness: 100% + - addressable_rainbow: + name: Rainbow + - addressable_color_wipe: + name: Wipe + add_led_interval: 40ms + reverse: false + - addressable_scan: + name: Scanner + move_interval: 60ms + - addressable_twinkle: + name: Twinkle + twinkle_probability: 5% + progress_interval: 60ms + +script: + - id: reset_led + then: + - if: + condition: + - lambda: |- + return id(wake_word_engine_location).current_option() == "On device"; + - switch.is_on: use_listen_light + then: + - light.turn_on: + id: rgb_ring + red: 100% + green: 89% + blue: 71% + brightness: 60% + effect: none + else: + - light.turn_off: rgb_ring + - id: start_wake_word + then: + - if: + condition: + and: + - not: + - voice_assistant.is_running: + - lambda: |- + return id(wake_word_engine_location).current_option() == "On device"; + then: + - lambda: id(va).set_use_wake_word(false); + - micro_wake_word.start: + - if: + condition: + and: + - not: + - voice_assistant.is_running: + - lambda: |- + return id(wake_word_engine_location).current_option() == "In Home Assistant"; + then: + - lambda: id(va).set_use_wake_word(true); + - voice_assistant.start_continuous: + - id: stop_wake_word + then: + - if: + condition: + lambda: |- + return id(wake_word_engine_location).current_option() == "In Home Assistant"; + then: + - lambda: id(va).set_use_wake_word(false); + - voice_assistant.stop: + - if: + condition: + lambda: |- + return id(wake_word_engine_location).current_option() == "On device"; + then: + - micro_wake_word.stop: + +select: + - platform: template + entity_category: config + name: Wake word engine location + id: wake_word_engine_location + optimistic: true + restore_value: true + options: + - In Home Assistant + - On device + initial_option: On device + on_value: + - if: + condition: + lambda: return x == "In Home Assistant"; + then: + - micro_wake_word.stop: + - delay: 500ms + - lambda: id(va).set_use_wake_word(true); + - voice_assistant.start_continuous: + - if: + condition: + lambda: return x == "On device"; + then: + - lambda: id(va).set_use_wake_word(false); + - voice_assistant.stop: + - delay: 500ms + - micro_wake_word.start: + +micro_wake_word: + on_wake_word_detected: + - voice_assistant.start: + wake_word: !lambda return wake_word; + vad: + models: + - model: okay_nabu + - model: hey_mycroft + - model: hey_jarvis + +voice_assistant: + id: va + micro_wake_word: + microphone: + microphone: board_microphone + media_player: board_media_player + noise_suppression_level: 2 + volume_multiplier: 2.0 + on_listening: + - light.turn_on: + id: rgb_ring + blue: 100% + red: 0% + green: 0% + brightness: 35% + effect: "Scanner" + on_stt_vad_end: + - light.turn_on: + id: rgb_ring + blue: 100% + red: 0% + green: 0% + effect: "Fast Pulse" + on_tts_start: + - light.turn_on: + id: rgb_ring + blue: 100% + red: 0% + green: 0% + brightness: 100% + effect: none + on_tts_stream_start: + - light.turn_on: + id: rgb_ring + red: 100% + green: 0% + blue: 60% + brightness: 35% + effect: "Wipe" + on_tts_stream_end: + - script.execute: reset_led + on_end: + - wait_until: + condition: + - media_player.is_announcing: + timeout: 0.5s + - if: + condition: + - lambda: |- + return id(wake_word_engine_location).current_option() == "On device"; + then: + - wait_until: + - and: + - not: + voice_assistant.is_running: + - not: + speaker.is_playing: + - lambda: id(va).set_use_wake_word(false); + - micro_wake_word.start: + - script.execute: reset_led + on_error: + - light.turn_on: + id: rgb_ring + red: 100% + green: 0% + blue: 0% + brightness: 100% + effect: none + - delay: 2s + - script.execute: reset_led + on_client_connected: + - delay: 2s + - script.execute: start_wake_word + on_client_disconnected: + - script.execute: stop_wake_word + on_timer_finished: + - script.execute: stop_wake_word + - wait_until: + not: + microphone.is_capturing: + - switch.turn_on: timer_ringing + - light.turn_on: + id: rgb_ring + red: 0% + green: 100% + blue: 0% + brightness: 100% + effect: "Fast Pulse" + - wait_until: + - switch.is_off: timer_ringing + - light.turn_off: rgb_ring + - switch.turn_off: timer_ringing + From 86b9a72131f4ebd5af9bec6eb749305cf650c5f0 Mon Sep 17 00:00:00 2001 From: Christian Jensen Date: Sun, 5 Apr 2026 07:57:40 -0700 Subject: [PATCH 2/4] Fix voice assistant lifecycle handlers for reliable wake word restart Update on_end to wait 2s + speaker idle before restarting wake word, add wake word restart to on_error after speaker finishes, replace on_idle with on_announcement handler that stops wake word, waits for announcement to complete, then restarts it. --- .../waveshare-esp32-s3-audio.yaml | 37 ++++++++----------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/waveshare-esp32-s3-audio/waveshare-esp32-s3-audio.yaml b/waveshare-esp32-s3-audio/waveshare-esp32-s3-audio.yaml index 85e6f15a..5c6873fe 100644 --- a/waveshare-esp32-s3-audio/waveshare-esp32-s3-audio.yaml +++ b/waveshare-esp32-s3-audio/waveshare-esp32-s3-audio.yaml @@ -132,11 +132,7 @@ media_player: - id: timer_finished_wave_file file: https://github.com/esphome/wake-word-voice-assistants/raw/main/sounds/timer_finished.wav on_announcement: - - if: - condition: - - microphone.is_capturing: - then: - - script.execute: stop_wake_word + - script.execute: stop_wake_word - light.turn_on: id: rgb_ring blue: 100% @@ -144,7 +140,11 @@ media_player: green: 0% brightness: 100% effect: none - on_idle: + - delay: 500ms + - wait_until: + not: + media_player.is_announcing: + - delay: 500ms - script.execute: start_wake_word - script.execute: reset_led @@ -373,23 +373,12 @@ voice_assistant: on_tts_stream_end: - script.execute: reset_led on_end: + - delay: 2s - wait_until: - condition: - - media_player.is_announcing: - timeout: 0.5s - - if: - condition: - - lambda: |- - return id(wake_word_engine_location).current_option() == "On device"; - then: - - wait_until: - - and: - - not: - voice_assistant.is_running: - - not: - speaker.is_playing: - - lambda: id(va).set_use_wake_word(false); - - micro_wake_word.start: + not: + speaker.is_playing: + - delay: 500ms + - script.execute: start_wake_word - script.execute: reset_led on_error: - light.turn_on: @@ -400,6 +389,10 @@ voice_assistant: brightness: 100% effect: none - delay: 2s + - wait_until: + not: + speaker.is_playing: + - script.execute: start_wake_word - script.execute: reset_led on_client_connected: - delay: 2s From 201fb59139041fe77cdc454409189fb0093472da Mon Sep 17 00:00:00 2001 From: Christian Jensen Date: Mon, 6 Apr 2026 11:40:31 -0700 Subject: [PATCH 3/4] fix: move wake word restart from on_end to on_tts_stream_end Restarting the microphone (via wake word) in on_end causes an I2S bus conflict because on_end fires before the speaker finishes playing TTS audio. Moving the restart to on_tts_stream_end ensures we wait for the speaker to finish before reclaiming the I2S bus for the microphone. - on_end: now only resets LEDs (no mic restart) - on_tts_stream_end: waits for speaker, then restarts wake word --- waveshare-esp32-s3-audio/waveshare-esp32-s3-audio.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/waveshare-esp32-s3-audio/waveshare-esp32-s3-audio.yaml b/waveshare-esp32-s3-audio/waveshare-esp32-s3-audio.yaml index 5c6873fe..73c6e899 100644 --- a/waveshare-esp32-s3-audio/waveshare-esp32-s3-audio.yaml +++ b/waveshare-esp32-s3-audio/waveshare-esp32-s3-audio.yaml @@ -371,14 +371,12 @@ voice_assistant: brightness: 35% effect: "Wipe" on_tts_stream_end: - - script.execute: reset_led - on_end: - - delay: 2s - wait_until: not: speaker.is_playing: - delay: 500ms - script.execute: start_wake_word + on_end: - script.execute: reset_led on_error: - light.turn_on: From d1ee8f453ce1c45d4aac4bcdb52309f0d9f02b6d Mon Sep 17 00:00:00 2001 From: Christian Jensen Date: Mon, 6 Apr 2026 11:48:25 -0700 Subject: [PATCH 4/4] Add restart_mww_fallback script for silent command recovery When a voice command produces no TTS response, on_tts_stream_end never fires and the wake word engine stays stopped. The new fallback script is called from on_end: it waits 5s (giving on_tts_stream_end time to act first), checks if wake word is already running, and only restarts if not. --- .../waveshare-esp32-s3-audio.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/waveshare-esp32-s3-audio/waveshare-esp32-s3-audio.yaml b/waveshare-esp32-s3-audio/waveshare-esp32-s3-audio.yaml index 73c6e899..bd642388 100644 --- a/waveshare-esp32-s3-audio/waveshare-esp32-s3-audio.yaml +++ b/waveshare-esp32-s3-audio/waveshare-esp32-s3-audio.yaml @@ -276,6 +276,20 @@ script: then: - lambda: id(va).set_use_wake_word(true); - voice_assistant.start_continuous: + - id: restart_mww_fallback + mode: restart + then: + - delay: 5s + - if: + condition: + not: + micro_wake_word.is_running: + then: + - wait_until: + not: + speaker.is_playing: + - delay: 500ms + - script.execute: start_wake_word - id: stop_wake_word then: - if: @@ -378,6 +392,7 @@ voice_assistant: - script.execute: start_wake_word on_end: - script.execute: reset_led + - script.execute: restart_mww_fallback on_error: - light.turn_on: id: rgb_ring