Speaker Audio Media Player in esp32-s3-zero?

Has anyone got Speaker Audio Media Player running on anything like an esp32-s3-zero pls?

I’ve had a nightmare with updates breaking my DIY smart speaker for ages now… towards the end of last year I re-did it from scratch, brute-forcing what worked and what didnt. Had to settle on using the arduino framework and media player, nothing else would work out.
Over the last few months updates to both esphome and HA core break it in various ways again. So I’ve had to stop running the latest versions of both.

I saw mention of the (new?) Speaker Audio Media Player a couple of days ago, so thought I’d give it a try. Just a media player to begin with, which I’d use for various TTS notifications from HA. Then maybe a voice assistant.
I cant get it to make any noise at all, using the example from here, and hardware/connections that have worked with older versions of HA and esphome.

Other than the basic definition of the board, wifi, pins, an LED, I’ve copied/pasted the example.
Has anyone got this example working on anything like an esp32-s3?
Or can anyone see what I’m doing wrong?
Is this just not hardware that can be used for a smart speaker/media player now? Is there any DIY hardware that would work?

The yaml that isn’t working for me:

substitutions:
  friendly_name: Jarvis2
  host_name: jarvis2

  led_brightness:     25%       # RGB LED brightness for simple on/off
  min_led_brightness: 20%       # Min LED brghtness for effects
  max_led_brightness: 40%       # Max LED brghtness for effects

  log_level:      DEBUG         # NONE, ERROR, WARN, INFO, DEBUG (Default), VERBOSE, VERY_VERBOSE

  static_ip:      192.168.5.36  # Static ID details
  static_gateway: 192.168.5.254
  static_subnet:  255.255.255.0
  static_dns:     192.168.5.2

  vol_up_pin:     GPIO12        # Button for Vol+
  vol_down_pin:   GPIO11        # Button for Vol-

  amp_din_pin:    GPIO04        # Amp DIN
  amp_bclk_pin:   GPIO05        # Amp BCLK
  amp_lrc_pin:    GPIO06        # Amp LRC

  rgb_led_pin:    GPIO21        # RGB LED pin, GPIO21 for onboard LED
  no_leds:        "1"           # Number of RGB LEDs, 1 for onboard LED

api:                                    # Enable Home Assistant API.
  encryption:
    key: !secret api_encryption_key

ota:
  platform: esphome # For 2024.6.1+
  password: !secret ota_password

safe_mode: # For 2024.6.1+

esphome:
  name: $host_name
  friendly_name: $friendly_name
  platformio_options:
    board_build.flash_mode: dio
  on_boot:
    - logger.log:
        level: ERROR
        format: "****Booted"
    - light.turn_on:
        id: rgb_led
        brightness: $led_brightness
        blue: 0%
        red: 100%
        green: 100%

esp32:
  board: esp32-s3-devkitc-1
  flash_size: 4MB
  variant: esp32s3
  framework:
    type: esp-idf
    version: recommended
psram:
  mode: quad
  speed: 80MHz

wifi:                                   # WiFi connection details. Without domain:, defaults to .local. https://esphome.io/components/wifi.html
  ssid: !secret wifi_ssid
  password: !secret wifi_password
  fast_connect: true
  manual_ip:
    static_ip: $static_ip
    gateway: $static_gateway
    subnet: $static_subnet
    dns1: $static_dns
  power_save_mode: none                 # NONE, LIGHT, HIGH

  ap:                                   # Enable fallback hotspot (captive portal) in case wifi connection fails
    ssid: "${friendly_name} Fallback"
    password: !secret fallback_password
captive_portal:

web_server:
  port: 80
  include_internal: true              # include internal entities
  local: true                           # Load everything locally rather than over the web

sensor:
  - platform: wifi_signal # Reports the WiFi signal strength in dB. https://esphome.io/components/sensor/wifi_signal.html
    name: "WiFi Signal dB"              # Uncomment to show signal DB in HA
    id: wifi_signal_db
    update_interval: 60s                # Report signal every minute
    entity_category: "diagnostic"
    device_class: "signal_strength"
    disabled_by_default: true           # Shows entity in HA, but disabled by default
    internal: true
  - platform: copy                      # Reports the WiFi signal strength %
    source_id: wifi_signal_db
    name: "WiFi Signal"
    filters:
      - lambda: return min(max(2 * (x + 100.0), 0.0), 100.0);
    unit_of_measurement: "%"
    entity_category: "diagnostic"
    device_class: "signal_strength"
    disabled_by_default: true           # Shows entity in HA, but disabled by default
  - platform: uptime
    id: uptime_seconds
    # update_interval: 10s
    internal: true                    # Uncomment to leave internal and not send to HA
    name: "Uptime seconds"
    entity_category: "diagnostic"
    disabled_by_default: true           # Shows entity in HA, but disabled by default

button:
  - platform: restart
    id: "restart_device"
    name: "Restart"
    entity_category: "diagnostic"
  - platform: safe_mode
    id: "restart_device_safe_mode"
    name: "Restart (Safe Mode)"
    entity_category: "diagnostic"
    disabled_by_default: true           # Shows entity in HA, but disabled by default
  - platform: factory_reset
    name: Restart with Factory Default Settings
    disabled_by_default: true
    entity_category: diagnostic
  - platform: shutdown
    name: "Shutdown"
    disabled_by_default: true
    entity_category: diagnostic

debug:
  update_interval: 5s

text_sensor:
  # Expose WiFi information as sensors.
  - platform: wifi_info
    ip_address:
      name: Wi-Fi IP
      icon: mdi:wifi
    mac_address:
      name: Wi-Fi MAC
      icon: mdi:wifi
  - platform: version
    name: "ESPHome Version"
    icon: mdi:wrench-outline
    disabled_by_default: true
  - platform: debug
    device:
      name: "Device Info"
    reset_reason:
      name: "Reset Reason"

# Sync time with Home Assistant
time:
  - platform: homeassistant
    id: ha_time

logger:                                 # Enable logging. https://esphome.io/components/logger.html
  level: $log_level
  baud_rate: 0                          # Stops logging over onboard UART

binary_sensor:
  - platform: status
    name: "API Status"
  - platform: gpio
    pin:
      number: $vol_up_pin
      mode:
        input: true
        pullup: true
    name: "Vol+"
    filters:
      - invert:
    internal: true
    on_press:
      - media_player.volume_up
      - light.turn_on:
          id: rgb_led
          brightness: $led_brightness
          blue: 100%
          red: 100%
          green: 0%
  - platform: gpio
    pin:
      number: $vol_down_pin
      mode:
        input: true
        pullup: true
    name: "Vol-"
    filters:
      - invert:
    internal: true
    on_press:
      - media_player.volume_down
      - light.turn_on:
          id: rgb_led
          brightness: $led_brightness
          blue: 100%
          red: 100%
          green: 0%

light:
  - platform: esp32_rmt_led_strip
    id: "rgb_led"
    name: "RGB LED"
    pin: $rgb_led_pin
    num_leds: 1
    # rmt_channel: 0
    rgb_order: GRB
    chipset: ws2812
    default_transition_length: 0ms
    effects:
      - pulse:
          name: "Fast Pulse"
          transition_length: 0.5s
          update_interval: 0.5s
          min_brightness: $min_led_brightness
          max_brightness: $max_led_brightness
      - pulse:
          name: "Slow Pulse"
          # transition_length: 1s       # defaults to 1s
          update_interval: 2s
          min_brightness: $min_led_brightness
          max_brightness: $max_led_brightness
      - pulse:
          name: "Breathe"
          transition_length:
            on_length: 1s
            off_length: 500ms
          update_interval: 1.5s
          min_brightness: $min_led_brightness
          max_brightness: $led_brightness
      - random:
          name: Random
      - random:
          name: Random, custom timing
          transition_length: 500ms
          update_interval: 2s
      - strobe:
      - strobe:
          name: Strobe Effect, RGBW
          colors:
            - state: true
              brightness: $led_brightness
              red: 100%
              green: 0%
              blue: 0%
              duration: 1000ms
            - state: true
              brightness: $led_brightness
              red: 0%
              green: 100%
              blue: 0%
              duration: 1000ms
            - state: true
              brightness: $led_brightness
              red: 0%
              green: 0%
              blue: 100%
              duration: 1000ms
            - state: true
              brightness: $led_brightness
              red: 100%
              green: 100%
              blue: 100%
              duration: 1000ms
      - flicker:
      - flicker:
          name: Flicker Effect With Custom Values
          alpha: 95%
          intensity: 1.5%
      - lambda:
          name: Lambda
          update_interval: 1s
          lambda: |-
            static int state = 0;
            auto call = id(rgb_led).turn_on();
            // Transition of 1000ms = 1s
            call.set_transition_length(1000);
            if (state == 0) {
              call.set_rgb(1.0, 1.0, 1.0);
            } else if (state == 1) {
              call.set_rgb(1.0, 0.0, 1.0);
            } else if (state == 2) {
              call.set_rgb(0.0, 0.0, 1.0);
            } else {
              call.set_rgb(1.0, 0.0, 0.0);
            }
            call.perform();
            state += 1;
            if (state == 4)
              state = 0;              
      - addressable_rainbow:
      - addressable_color_wipe:
      - addressable_twinkle:
      - addressable_random_twinkle:

i2s_audio:
    i2s_lrclk_pin: $amp_lrc_pin
    i2s_bclk_pin: $amp_bclk_pin
    # sample_rate: 48000
speaker:
  - platform: i2s_audio
    id: speaker_id
    dac_type: external
    i2s_dout_pin: $amp_din_pin
    sample_rate: 48000
  - platform: mixer
    id: mixer_speaker_id
    output_speaker: speaker_id
    source_speakers:
      - id: announcement_spk_mixer_input
      - id: media_spk_mixer_input
  - platform: resampler
    id: media_spk_resampling_input
    output_speaker: media_spk_mixer_input
  - platform: resampler
    id: announcement_spk_resampling_input
    output_speaker: announcement_spk_mixer_input
media_player:
  - platform: speaker
    name: "Speaker Media Player"
    id: speaker_media_player_id
    media_pipeline:
        speaker: media_spk_resampling_input
        num_channels: 2
    announcement_pipeline:
        speaker: announcement_spk_resampling_input
        num_channels: 1
    files:
      - id: alarm_sound
        file: pip.flac # Placed in the yaml directory. Should be encoded with a 48000 Hz sample rate, mono or stereo audio, and 16 bits per sample.
switch:
  - platform: template
    name: "Ring Timer"
    id: timer_ringing
    optimistic: true
    restore_mode: ALWAYS_OFF
    on_turn_off:
        # Stop playing the alarm
        - media_player.stop:
            announcement: true
        - mixer_speaker.apply_ducking:  # Stop ducking the media stream over 2 seconds
            id: media_spk_mixer_input
            decibel_reduction: 0
            duration: 2.0s
    on_turn_on:
        # Duck media audio by 20 decibels instantly
        - mixer_speaker.apply_ducking:
            id: media_spk_mixer_input
            decibel_reduction: 20
            duration: 0.0s
        - while:
            condition:
                switch.is_on: timer_ringing
            then:
                # Play the alarm sound as an announcement
                - media_player.speaker.play_on_device_media_file:
                    media_file: alarm_sound
                    # announcement: true
                # Wait until the alarm sound starts playing
                - wait_until:
                    media_player.is_announcing:
                # Wait until the alarm sound stops playing
                - wait_until:
                    not:
                      media_player.is_announcing:

Thanks a lot

Have a look here.

Its working for me.

Thanks a lot. I’ll see what I can do with it.
(just realised I hadn’t included the esps full name… its an esp32-s3-zero)

Holy carp @Arh, it’s alive!
With current esphome and HA core versions, tied into my local ollama setup!

Thanks so much to you and @steriku for sharing things, this had been such a frustrating pain for me for aaages now.

This seems to be working almost perfectly for me, with one small exception. If I have music playing using the media player and send an announcement at the same time, the music’s volume is ducked low but the announcement doesn’t actually play. Still a massive improvement on what I had though.
Here’s how mine looks so far, in case it’s of any to anyone or anyone can see what’s up with the focus loss ducking:

substitutions:
  friendly_name: Jarvis
  host_name: jarvis

  #pins
  amp_lrc_pin:        GPIO06        # LRC on Max98357
  amp_bclk_pin:       GPIO05        # BCLK on Max98357
  mic_ws_pin:         GPIO08        # WS on INMP441
  mic_ck_pin:         GPIO09        # SLK on INMP441
  rgb_led_pin:        GPIO21        # on-board LED
  amp_din_pin:        GPIO04        # DIN on Max98357
  mic_din_pin:        GPIO07        # SD on INMP441

  no_leds:            "1"           # Number of RGB LEDs, 1 for onboard LED
  led_brightness:     25%           # RGB LED brightness for simple on/off
  min_led_brightness: 20%           # Min LED brghtness for effects
  max_led_brightness: 40%           # Max LED brghtness for effects

  assist_mic_gain:          31dBFS  # 31dBFS # Assistant mic gain. Between 0dBFS and 31dBFS
  assist_noise_supression:  "4"     # Assistant noise suppression. Between 0 and 4 inclusive
  assist_vol_multiplier:    "8"     # "15" # Assistant volume multiplier. Must be larger than 0. Defaults to 1 (disabled)

  log_level:          ERROR         # NONE, ERROR, WARN, INFO, DEBUG (Default), VERBOSE, VERY_VERBOSE

  static_ip:          192.168.5.35  # Static IP address
  static_gateway:     192.168.5.254 # Static gateway address
  static_subnet:      255.255.255.0 # Static subnet
  static_dns:         192.168.5.2   # Static DNS address

  # Sounds
  # connected_sound:         "http://192.168.5.11:8123/local/Voice_assistant/jarvis_connected.mp3"
  # stop_listening_sound:    "http://192.168.5.11:8123/local/Voice_assistant/ill_stop_listening.mp3"
  # start_listening_sound:   "http://192.168.5.11:8123/local/Voice_assistant/im_listening.mp3"
  pip_sound:               "http://192.168.5.11:8123/local/Voice_assistant/pip.mp3"
  test_sound:              "http://192.168.5.11:8123/local/Voice_assistant/testing_1_2_3_testing.mp3"

  # Phases of the Voice AssistantGPIO8
  # IDLE: The voice assistant is ready to be triggered by a wake-word
  voice_assist_idle_phase_id: '1'
  # LISTENING: The voice assistant is ready to listen to a voice command (after being triggered by the wake word)
  voice_assist_listening_phase_id: '2'
  # THINKING: The voice assistant is currently processing the command
  voice_assist_thinking_phase_id: '3'
  # REPLYING: The voice assistant is replying to the command
  voice_assist_replying_phase_id: '4'
  # NOT_READY: The voice assistant is not ready 
  voice_assist_not_ready_phase_id: '10'
  # ERROR: The voice assistant encountered an error
  voice_assist_error_phase_id: '11'  
  # MUTED: The voice assistant is muted and will not reply to a wake-word
  voice_assist_muted_phase_id: '12'

#psram:
#  mode: octal
#  speed: 80MHz
psram:
  mode: quad
  speed: 80MHz

packages:
  logger: !include common/logger.yaml
  api_ota: !include common/api_ota.yaml
  wifi: !include common/wifi.yaml
  web_server: !include common/web_server.yaml
  sensor: !include common/sensor.yaml
  binary_sensor: !include common/binary_sensor.yaml
  button: !include common/button.yaml
  text_sensors: !include common/text_sensor.yaml
  time: !include common/sync_time.yaml
  switch: !include common/switch.yaml

logger:                                 # Enable logging. https://esphome.io/components/logger.html
  level: ${log_level}

wifi:
  # use_address: "192.168.5.35"
  manual_ip:
    static_ip: ${static_ip}
    gateway: ${static_gateway}
    subnet: ${static_subnet}
    dns1: ${static_dns}
  power_save_mode: HIGH                # NONE, LIGHT, HIGH
  # If the device connects, or disconnects, to the Wifi: Run the script to refresh the LED status
  on_connect:
    - script.execute: control_led
  on_disconnect:
    - script.execute: control_led

esp32:
  board: esp32-s3-devkitc-1
  variant: ESP32S3
  framework:
    type: esp-idf
    version: recommended
    sdkconfig_options:
      # ESP32-S3 N16R8
      # CONFIG_ESP32_S3_BOX_BOARD: "y"
      # CONFIG_ESP32S3_DEFAULT_CPU_FREQ_240: "y"
      # CONFIG_ESP32S3_DATA_CACHE_64KB:      "y"
      # CONFIG_ESP32S3_DATA_CACHE_LINE_64B:  "y"
      # CONFIG_BT_ALLOCATION_FROM_SPIRAM_FIRST: "y"
      # CONFIG_BT_BLE_DYNAMIC_ENV_MEMORY: "y"
      # CONFIG_MBEDTLS_EXTERNAL_MEM_ALLOC: "y"
      # CONFIG_MBEDTLS_SSL_PROTO_TLS1_3: "y" 

      # ESP32-S3-Zero
      CONFIG_ESP32_S3_BOX_BOARD: "y"
      CONFIG_ESP32_WIFI_STATIC_RX_BUFFER_NUM: "16"
      CONFIG_ESP32_WIFI_DYNAMIC_RX_BUFFER_NUM: "512"
      CONFIG_TCPIP_RECVMBOX_SIZE: "512"
      CONFIG_TCP_SND_BUF_DEFAULT: "65535"
      CONFIG_TCP_WND_DEFAULT: "512000"
      CONFIG_TCP_RECVMBOX_SIZE: "512"
      CONFIG_ESP32S3_DEFAULT_CPU_FREQ_240: "y"
      CONFIG_ESP32S3_DATA_CACHE_64KB:      "y"
      CONFIG_ESP32S3_DATA_CACHE_LINE_64B:  "y"
      CONFIG_BT_ALLOCATION_FROM_SPIRAM_FIRST: "y"
      CONFIG_BT_BLE_DYNAMIC_ENV_MEMORY: "y"
      CONFIG_MBEDTLS_EXTERNAL_MEM_ALLOC: "y"
      CONFIG_MBEDTLS_SSL_PROTO_TLS1_3: "y" 

esphome:
  name: ${host_name}
  friendly_name: ${friendly_name}
  on_boot:
      priority: 600
      then: 
        # Run the script to refresh the LED status
        - script.execute: control_led
        # - output.turn_off: set_low_speaker
        # If after 30 seconds, the device is still initializing (It did not yet connect to Home Assistant), turn off the init_in_progress variable and run the script to refresh the LED status
        - delay: 30s
        - if:
            condition:
              lambda: return id(init_in_progress);
            then:
              - lambda: id(init_in_progress) = false;
              - script.execute: control_led

microphone:
  - platform: i2s_audio
    i2s_din_pin: ${mic_din_pin}
    adc_type: external
    pdm: false
    i2s_audio_id: i2s_in
    id: comm_mic
    channel: left
    # sample_rate: 16000
    # bits_per_sample: 16bit

speaker:
  - platform: i2s_audio
    id: i2s_audio_speaker
    dac_type: external
    sample_rate: 48000
    i2s_dout_pin: 
      number: ${amp_din_pin}
    bits_per_sample: 32bit
    i2s_audio_id: i2s_output
    timeout: never
    buffer_duration: 100ms
    channel: left
    # sample_rate: 16000
    # bits_per_sample: 32bit


  # Virtual speakers to combine the announcement and media streams together into one output
  - platform: mixer
    id: mixing_speaker
    output_speaker: i2s_audio_speaker
    # num_channels: 2
    num_channels: 1
    source_speakers:
      - id: announcement_mixing_input
        timeout: never
      - id: media_mixing_input
        timeout: never

  # Vritual speakers to resample each pipelines' audio, if necessary, as the mixer speaker requires the same sample rate
  - platform: resampler
    id: announcement_resampling_speaker
    output_speaker: announcement_mixing_input
    sample_rate: 48000
    bits_per_sample: 16
  - platform: resampler
    id: media_resampling_speaker
    output_speaker: media_mixing_input
    sample_rate: 48000
    bits_per_sample: 16

media_player:
  - platform: speaker
    id: external_media_player
    name: Media Player
    internal: False
    volume_increment: 0.05
    volume_min: 0.4
    volume_max: 0.85 # when amp gain connected to ground. Avoids cutting out.
    icon: mdi:speaker-wireless
    announcement_pipeline:
      speaker: announcement_resampling_speaker
      format: FLAC     # FLAC is the least processor intensive codec
      num_channels: 1  # Stereo audio is unnecessary for announcements
      sample_rate: 48000
    media_pipeline:
      speaker: media_resampling_speaker
      format: FLAC     # FLAC is the least processor intensive codec
      # num_channels: 2
      num_channels: 1
      sample_rate: 48000
    on_announcement:
      - mixer_speaker.apply_ducking:
          id: media_mixing_input
          decibel_reduction: 20
          duration: 0.0s
    on_state:
      if:
        condition:
          and:
            - not:
                voice_assistant.is_running:
            - not:
                media_player.is_announcing:
        then:
          - mixer_speaker.apply_ducking:
              id: media_mixing_input
              decibel_reduction: 0
              duration: 1.0s
    files:
      - id: timer_finished_sound
        file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/timer_finished.flac
      # - id: wake_word_triggered_sound
      #   file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/wake_word_triggered.flac
      - id: wake_word_triggered_sound
        file: wake_word_triggered.flac
      - id: pip_sound
        file: pip.flac # Placed in the yaml directory. Should be encoded with a 48000 Hz sample rate, mono or stereo audio, and 16 bits per sample.

i2s_audio:
  - id: i2s_output
    i2s_lrclk_pin: ${amp_lrc_pin}
    i2s_bclk_pin: ${amp_bclk_pin}
  - id: i2s_in
    i2s_lrclk_pin: ${mic_ws_pin}
    i2s_bclk_pin: ${mic_ck_pin}

voice_assistant:
  id: va
  microphone: comm_mic
  media_player: external_media_player
  noise_suppression_level: ${assist_noise_supression}
  auto_gain: ${assist_mic_gain}
  volume_multiplier: ${assist_vol_multiplier}
  # use_wake_word: false

  # When the voice assistant connects to HA:
  # Set init_in_progress to false (Initialization is over).
  # If the switch is on, start the voice assistant
  # In any case: Set the correct phase and run the script to refresh the LED status
  on_client_connected:
    - lambda: id(init_in_progress) = false; 
    - if:
        condition:
          switch.is_on: enable_assistant
        then:
          - micro_wake_word.start
          - lambda: id(voice_assistant_phase) = ${voice_assist_idle_phase_id};
        else:
          - lambda: id(voice_assistant_phase) = ${voice_assist_muted_phase_id};
    - script.execute: control_led

  # When the voice assistant disconnects to HA: 
  # Stop the voice assistant
  # Set the correct phase and run the script to refresh the LED status
  on_client_disconnected:
    - lambda: id(voice_assistant_phase) = ${voice_assist_not_ready_phase_id};  
    - micro_wake_word.stop
    - script.execute: control_led

  # When the voice assistant starts to listen: Set the correct phase and run the script to refresh the LED status
  on_listening:
    - lambda: id(voice_assistant_phase) = ${voice_assist_listening_phase_id};
    - script.execute: control_led

  # When the voice assistant starts to think: Set the correct phase and run the script to refresh the LED status
  on_stt_vad_end:
    - lambda: id(voice_assistant_phase) = ${voice_assist_thinking_phase_id};
    - script.execute: control_led

  # When the voice assistant starts to reply: Set the correct phase and run the script to refresh the LED status
  # on_tts_stream_start:
  on_tts_start: 
    - lambda: id(voice_assistant_phase) = ${voice_assist_replying_phase_id};
    - script.execute: control_led
  
  on_end:
    - if:
        condition:
          - switch.is_on: enable_assistant
        then:
          - wait_until:
              not:
                voice_assistant.is_running:
          - micro_wake_word.start
  # When the voice assistant finished to reply: Set the correct phase and run the script to refresh the LED status
  # on_tts_stream_end:
  # on_stt_end: 
    - lambda: id(voice_assistant_phase) = ${voice_assist_idle_phase_id};
    - script.execute: control_led

  # When the voice assistant encounters an error: 
  # Set the error phase and run the script to refresh the LED status
  # Wait 1 second and set the correct phase (idle or muted depending on the state of the switch) and run the script to refresh the LED status 
  on_error:
    - if:
        condition:
          lambda: return !id(init_in_progress);
        then:
          - lambda: id(voice_assistant_phase) = ${voice_assist_error_phase_id};  
          - script.execute: control_led
          - delay: 1s
          - if:
              condition:
                switch.is_on: enable_assistant
              then:
                - lambda: id(voice_assistant_phase) = ${voice_assist_idle_phase_id};
              else:
                - lambda: id(voice_assistant_phase) = ${voice_assist_muted_phase_id};
          - script.execute: control_led

micro_wake_word:
  models: 
    - hey_jarvis
  vad:
  id: mww
  on_wake_word_detected:
    - if:
        condition:
          switch.is_on: enable_assistant
        then:
          - if:
              condition:
                voice_assistant.is_running:
              then:
                voice_assistant.stop:
              # Stop any other media player announcement
              else:
                - if:
                    condition:
                      media_player.is_announcing:
                    then:
                      - media_player.stop:
                          announcement: true
                    # Start the voice assistant and play the wake sound, if enabled
                    else:
                      - script.execute:
                          id: play_sound
                          priority: true
                          sound_file: !lambda return id(wake_word_triggered_sound);
                      - delay: 300ms
                      - voice_assistant.start:
                          wake_word: !lambda return wake_word;

globals:
  # Global initialisation variable. Initialized to true and set to false once everything is connected. Only used to have a smooth "plugging" experience
  - id: init_in_progress
    type: bool
    restore_value: no
    initial_value: 'true'
  # Global variable tracking the phase of the voice assistant (defined above). Initialized to not_ready
  - id: voice_assistant_phase
    type: int
    restore_value: no
    initial_value: ${voice_assist_not_ready_phase_id}

light:
  - platform: esp32_rmt_led_strip
    id: led
    pin: ${rgb_led_pin}
    chipset: WS2812
    max_refresh_rate: 15ms
    num_leds: ${no_leds}
    rgb_order: GRB
    rmt_symbols: 192
    default_transition_length: 0ms
    effects:
      - pulse:
          name: "Slow Pulse"
          transition_length: 250ms
          update_interval: 250ms
          min_brightness: 50%
          max_brightness: 100%
      - pulse:
          name: "Fast Pulse"
          transition_length: 100ms
          update_interval: 100ms
          min_brightness: 50%
          max_brightness: 100%
      - pulse:
          name: "Breathe"
          transition_length:
            on_length: 1s
            off_length: 500ms
          update_interval: 1.5s
          min_brightness: ${min_led_brightness}
          max_brightness: ${led_brightness}
      - random:
          name: Random
      - random:
          name: Random, custom timing
          transition_length: 500ms
          update_interval: 2s
      - strobe:
      - strobe:
          name: Strobe Effect, RGBW
          colors:
            - state: true
              brightness: ${led_brightness}
              red: 100%
              green: 0%
              blue: 0%
              duration: 1000ms
            - state: true
              brightness: ${led_brightness}
              red: 0%
              green: 100%
              blue: 0%
              duration: 1000ms
            - state: true
              brightness: ${led_brightness}
              red: 0%
              green: 0%
              blue: 100%
              duration: 1000ms
            - state: true
              brightness: ${led_brightness}
              red: 100%
              green: 100%
              blue: 100%
              duration: 1000ms
      - flicker:
      - flicker:
          name: Flicker Effect With Custom Values
          alpha: 95%
          intensity: 1.5%
      - lambda:
          name: Lambda
          update_interval: 1s
          lambda: |-
            static int state = 0;
            auto call = id(led).turn_on();
            // Transition of 1000ms = 1s
            call.set_transition_length(1000);
            if (state == 0) {
              call.set_rgb(1.0, 1.0, 1.0);
            } else if (state == 1) {
              call.set_rgb(1.0, 0.0, 1.0);
            } else if (state == 2) {
              call.set_rgb(0.0, 0.0, 1.0);
            } else {
              call.set_rgb(1.0, 0.0, 0.0);
            }
            call.perform();
            state += 1;
            if (state == 4)
              state = 0;              
      - addressable_rainbow:
      - addressable_color_wipe:
      - addressable_twinkle:
      - addressable_random_twinkle:

script:
  # Master script controlling the LED, based on different conditions: initialization in progress, wifi and API connected, and the current voice assistant phase.
  # For the sake of simplicity and re-usability, the script calls child scripts defined below.
  # This script will be called every time one of these conditions is changing.
  - id: control_led
    then:
      - if:
          condition:
            lambda: return !id(init_in_progress);
          then:
            - if:
                condition:
                  wifi.connected:
                then:
                  - if:
                      condition:
                        api.connected:
                      then:
                        - lambda: |
                            switch(id(voice_assistant_phase)) {
                              case ${voice_assist_listening_phase_id}:
                                id(control_led_voice_assist_listening_phase).execute();
                                break;
                              case ${voice_assist_thinking_phase_id}:
                                id(control_led_voice_assist_thinking_phase).execute();
                                break;
                              case ${voice_assist_replying_phase_id}:
                                id(control_led_voice_assist_replying_phase).execute();
                                break;
                              case ${voice_assist_error_phase_id}:
                                id(control_led_voice_assist_error_phase).execute();
                                break;
                              case ${voice_assist_muted_phase_id}:
                                id(control_led_voice_assist_muted_phase).execute();
                                break;
                              case ${voice_assist_not_ready_phase_id}:
                                id(control_led_voice_assist_not_ready_phase).execute();
                                break;
                              default:
                                id(control_led_voice_assist_idle_phase).execute();
                                break;
                            }
                      else:
                        - script.execute: control_led_no_ha_connection_state
                else:
                  - script.execute: control_led_no_ha_connection_state
          else:
            - script.execute: control_led_init_state


  # Script executed during initialisation: In this example: Turn the LED in green with a slow pulse 🟢
  - id: control_led_init_state
    then:
      - light.turn_on:
          id: led
          blue: 0%
          red: 0%
          green: 100%
          effect: "Fast Pulse"
  

  # Script executed when the device has no connection to Home Assistant: In this example: Turn off the LED 
  - id: control_led_no_ha_connection_state
    then:
      - light.turn_off:
          id: led  


  # Script executed when the voice assistant is idle (waiting for a wake word): In this example: Turn the LED in white with 20% of brightness ⚪
  - id: control_led_voice_assist_idle_phase
    then:
      - light.turn_on:
          id: led
          blue: 100%
          red: 100%
          green: 100%
          brightness: 20%
          effect: "none"


  # Script executed when the voice assistant is listening to a command: In this example: Turn the LED in blue with a slow pulse 🔵
  - id: control_led_voice_assist_listening_phase
    then:
      - light.turn_on:
          id: led
          blue: 100%
          red: 0%
          green: 0%
          effect: "Slow Pulse"


  # Script executed when the voice assistant is processing the command: In this example: Turn the LED in blue with a fast pulse 🔵         
  - id: control_led_voice_assist_thinking_phase
    then:
      - light.turn_on:
          id: led
          blue: 100%
          red: 0%
          green: 0%
          effect: "Fast Pulse"


  # Script executed when the voice assistant is replying to a command: In this example: Turn the LED in blue, solid (no pulse) 🔵         
  - id: control_led_voice_assist_replying_phase
    then:
      - light.turn_on:
          id: led
          blue: 100%
          red: 0%
          green: 0%
          brightness: 100%
          effect: "none"


  # Script executed when the voice assistant encounters an error: In this example: Turn the LED in red, solid (no pulse) 🔴        
  - id: control_led_voice_assist_error_phase
    then:
      - light.turn_on:
          id: led
          blue: 0%
          red: 100%
          green: 0%
          brightness: 100%
          effect: "none"


  # Script executed when the voice assistant is muted: In this example: Turn off the LED 
  - id: control_led_voice_assist_muted_phase
    then:
      - light.turn_off:
          id: led


  # Script executed when the voice assistant is not ready: In this example: Turn off the LED 
  - id: control_led_voice_assist_not_ready_phase
    then:
      - light.turn_off:
          id: led

  # Script executed when we want to play sounds on the device.
  - id: play_sound
    parameters:
      priority: bool
      sound_file: "audio::AudioFile*"
    then:
      - lambda: |-
          if (priority) {
            id(external_media_player)
              ->make_call()
              .set_command(media_player::MediaPlayerCommand::MEDIA_PLAYER_COMMAND_STOP)
              .set_announcement(true)
              .perform();
          }
          if ( (id(external_media_player).state != media_player::MediaPlayerState::MEDIA_PLAYER_STATE_ANNOUNCING ) || priority) {
            id(external_media_player)
              ->play_file(sound_file, true, false);
          }

switch:
  - platform: template
    name: Enable Assistant
    id: enable_assistant
    icon: mdi:microphone-message
    optimistic: true
    restore_mode: RESTORE_DEFAULT_ON
    # icon: mdi:assistant
    # When the switch is turned on (on Home Assistant):
    # Start the voice assistant component
    # Set the correct phase and run the script to refresh the LED status
    on_turn_on:
      - if:
          condition:
            lambda: return !id(init_in_progress);
          then:      
            - lambda: id(voice_assistant_phase) = ${voice_assist_idle_phase_id};
            - if:
                condition:
                  not:
                    - voice_assistant.is_running
                then:
                  - micro_wake_word.start
            - script.execute: control_led
    # When the switch is turned off (on Home Assistant):
    # Stop the voice assistant component
    # Set the correct phase and run the script to refresh the LED status
    on_turn_off:
      - if:
          condition:
            lambda: return !id(init_in_progress);
          then:      
            - voice_assistant.stop
            - micro_wake_word.stop
            - lambda: id(voice_assistant_phase) = ${voice_assist_muted_phase_id};
            - script.execute: control_led

button:
  - platform: template
    name: Testing mp3
    id: test_sound
    icon: "mdi:speaker-play"
    disabled_by_default: true           # Shows entity in HA, but disabled by default
    on_press:
      - media_player.play_media: $test_sound
  - platform: template
    name: Wake flac
    id: wake_flac
    icon: "mdi:speaker-play"
    disabled_by_default: true           # Shows entity in HA, but disabled by default
    on_press:
      - script.execute:
          id: play_sound
          priority: true
          sound_file: !lambda return id(wake_word_triggered_sound);
  - platform: template
    name: Pip flac
    id: pip_flac
    icon: "mdi:speaker-play"
    disabled_by_default: true           # Shows entity in HA, but disabled by default
    on_press:
      - script.execute:
          id: play_sound
          priority: true
          sound_file: !lambda return id(pip_sound);

packages: doesn’t include anything exotic, just the basics that are common to most/all of my esphome devices