Media player TTS + voice assist speaker

I’ve spent 2 days chasing my tail trying to get a RaspiAudio Muse Luxe ESP32 “hackable” speaker to do BOTH voice-assist AND TTS media player to announce HASS events in the house… only to find out that it can only do one or the other, and each requires a different YAML configuration… at least that was my recent experience – I am not a YAML coding guy, but learning as I go.

https://www.youtube.com/results?search_query=raspiaudio+muse+luxe

Voice assist works great with the YAML provided by RaspiAudio, but no local TTS, and local media player TTS works great with the YAML offered by ESPHome website but has no voice-assist.

So am wondering if the ESP32 is not capable of providing both “services” ? or is this just that no YAML file combines both ?

It woudl be really nice to have a speaked that does voice-assist, and plays TTS locally to announce events, such as “alarm has been disabled”, or “someone is at the door” when the door bell is rung, or a camera alarm is triggered, etc…

Any ideas or suggestions on speakers I coudl look into ? or a YAML file I should try on the Muse Luxe ?

Cheers

I know nothing about this board but I have voice assistant with micro wake word and media player running successfully on ESP32 S3 N16r8 boards using code from here.

1 Like

Many Thanks for the suggestion, I will take a look, and post back if it works on the Muse Luxe.

noob question – what is “micro” wake-word ?

Micro wake word is wake word detection hosted on the esp32, as opposed to running on the server.

ahh, cool ! Thank you.

looked at the alternate YAML you mentioned, and not sure I can make it work, the Muse Luxe uses an ESP32-WROVER… I wish I knew more about coding etc.
Cheers

I’m trying to get this to work but unfortunately neither the mic or the media player seems to work. I don’t ear any sound when I’m trying to play a media and the wake command doesn’t trigger. I don’t have any error message and everything is compiling fine. I’m using a esp32 s3 devkit 1M maybe that could be the issue?

Here’s my diagram and code in case somebody can help me figure it out.
Any help would ne much appreciated.

Here’s the code that I’m using:

esphome:
  name: test
  friendly_name: test
  platformio_options:
    board_build.flash_mode: dio
  on_boot:
    - light.turn_on:
        id: led_ww
        blue: 100%
        brightness: 60%
        effect: fast pulse
esp32:
  board: esp32-s3-devkitc-1
  variant: esp32s3
  framework:
    type: esp-idf
    version: recommended

    sdkconfig_options:
        CONFIG_ESP32_S3_BOX_BOARD: "y"
        CONFIG_ESP32_WIFI_STATIC_RX_BUFFER_NUM: "16"
        CONFIG_ESP32_WIFI_DYNAMIC_RX_BUFFER_NUM: "512"
        CONFIG_TCPIP_RECVMBOX_SIZE: "512"
        CONFIG_TCP_SND_BUF_DEFAULT: "65535"
        CONFIG_TCP_WND_DEFAULT: "512000"
        CONFIG_TCP_RECVMBOX_SIZE: "512"

psram:
  mode: quad  # quad for N8R2 and octal for N16R8
  speed: 80MHz
  
logger:
  hardware_uart : UART0
  level: VERBOSE
  logs:
    micro_wake_word: WARN
# Enable Home Assistant API
api:
  encryption:
    key: "My encryption key"

ota:
  - platform: esphome
    password: "My password"

wifi:
  ssid: !secret wifi_ssid
  password: !secret wifi_password

  # Enable fallback hotspot (captive portal) in case wifi connection fails
  ap:
    ssid: "test"
    password: "another password"

button:
  - platform: restart
    name: "Restart"
    id: but_rest

switch:
  - platform: template
    id: mute
    name: mute
    optimistic: true
    on_turn_on: 
      - micro_wake_word.stop:
      - voice_assistant.stop:
      - light.turn_on:
          id: led_ww           
          red: 100%
          green: 0%
          blue: 0%
          brightness: 60%
          effect: fast pulse 
      - delay: 2s
      - light.turn_off:
          id: led_ww
      - light.turn_on:
          id: led_ww           
          red: 100%
          green: 0%
          blue: 0%
          brightness: 30%
    on_turn_off:
      - micro_wake_word.start:
      - light.turn_on:
          id: led_ww           
          red: 0%
          green: 100%
          blue: 0%
          brightness: 60%
          effect: fast pulse 
      - delay: 2s
      - light.turn_off:
          id: led_ww 
   
light:
  - platform: esp32_rmt_led_strip
    id: led_ww
    rgb_order: GRB
    pin: GPIO48
    num_leds: 1
    rmt_channel: 0
    chipset: ws2812
    name: "on board light"
    effects:
      - pulse:
      - pulse:
          name: "Fast Pulse"
          transition_length: 0.5s
          update_interval: 0.5s
          min_brightness: 0%
          max_brightness: 100%
          
          
 # Audio and Voice Assistant Config  
external_components:
  - source:
      type: git
      url: https://github.com/gnumpi/esphome_audio
      ref: main
    components: [ adf_pipeline, i2s_audio ]
    refresh: 0s  

i2s_audio:
  - id: i2s_in
    i2s_lrclk_pin: GPIO3
    i2s_bclk_pin: GPIO2
  - id: i2s_out
    i2s_lrclk_pin: GPIO6
    i2s_bclk_pin: GPIO7

adf_pipeline:
  - platform: i2s_audio
    type: audio_out
    id: adf_spk
    i2s_audio_id: i2s_out
    i2s_dout_pin: GPIO8
    
  - platform: i2s_audio
    type: audio_in
    id: adf_mic
    i2s_audio_id: i2s_in
    i2s_din_pin: GPIO4
    channel: right
    sample_rate: 16000
    bits_per_sample: 16bit
    pdm: false

microphone:
  - platform: adf_pipeline
    id: adf_microphone
    gain_log2: 3
    keep_pipeline_alive: false
    pipeline:
      - adf_mic
      - self

media_player:
  - platform: adf_pipeline
    id: adf_media_player
    name: simon_media_player
    keep_pipeline_alive: true
    internal: false
    pipeline:
      - self
      - adf_spk
            

micro_wake_word:
  on_wake_word_detected:
    - voice_assistant.start:
    - light.turn_on:
        id: led_ww           
        red: 30%
        green: 30%
        blue: 70%
        brightness: 60%
        effect: fast pulse 
  models: hey_jarvis

voice_assistant:
  id: va
  microphone: adf_microphone
  media_player: adf_media_player
  noise_suppression_level: 1
  volume_multiplier: 4
  on_stt_end:
       then: 
         - light.turn_off: led_ww
  on_error:
          - micro_wake_word.start:  
  on_end:
        then:
          - light.turn_off: led_ww
          - wait_until:
              not:
                voice_assistant.is_running:
          - micro_wake_word.start:   

Well, I figured out the issues:
It seems my board didn’t had SPRAM. I switched it to a N8R8. One thing that also gave me some problems was the PSRAM mode. It need to be set to octal instead of quad.

psram:
  mode: octal  # quad for N8R2 and octal for N16R8
  speed: 80MHz

Lastly, the external component needed to be using de dev branch instead of the
Main branch:

external_components:
  - source:
      type: git
      url: https://github.com/gnumpi/esphome_audio
      ref: dev-next
    components: [ adf_pipeline, i2s_audio ]
    refresh: 0s 

I also changed the wiring a bit but I don’t know if that had any effect.
Now that everything is working I can have a media player that gives me audio feedback when the wake word is triggered. With TTS for example. Here is the new diagram and code if that helps anyone…

Here’s the new code:

esphome:
  name: Assistant
  platformio_options:
    board_build.flash_mode: dio
esp32:
  board: esp32-s3-devkitc-1
  variant: esp32s3
  framework:
    type: esp-idf
    version: recommended
    sdkconfig_options:
      CONFIG_ESP32_S3_BOX_BOARD: "y"
   
psram:
  mode: octal  # quad for N8R2 and octal for N16R8
  speed: 80MHz

logger:

ota:
  - platform: esphome
    password: "YOUR PASSWORD"

wifi:
    ssid: !secret wifi_ssid
    password: !secret wifi_password
  # Enable fallback hotspot (captive portal) in case wifi connection fails
    ap:
      ssid: "DEVICE SSID"
      password: "DEVICE PASSWORD"



api:
  encryption:
    key: "ENCRIPTION KEY"
  on_client_connected:
        then:
          - delay: 50ms
          - light.turn_off: led_ww
          - micro_wake_word.start:
  on_client_disconnected:
        then:
          - voice_assistant.stop: 

external_components:
  - source:
      type: git
      url: https://github.com/gnumpi/esphome_audio
      ref: dev-next
    components: [ adf_pipeline, i2s_audio ]
    refresh: 0s
    
button:
  - platform: restart
    id: reboot
    name: "Reboot"
          

switch:
  - platform: template
    id: mute
    name: mute
    optimistic: true
    on_turn_on: 
      - micro_wake_word.stop:
      - voice_assistant.stop:
      - light.turn_on:
          id: led_ww           
          red: 100%
          green: 0%
          blue: 0%
          brightness: 60%
          effect: fast pulse 
      - delay: 2s
      - light.turn_off:
          id: led_ww
      - light.turn_on:
          id: led_ww           
          red: 100%
          green: 0%
          blue: 0%
          brightness: 30%
    on_turn_off:
      - micro_wake_word.start:
      - light.turn_on:
          id: led_ww           
          red: 0%
          green: 100%
          blue: 0%
          brightness: 60%
          effect: fast pulse 
      - delay: 2s
      - light.turn_off:
          id: led_ww 

light:
  - platform: esp32_rmt_led_strip
    id: led_ww
    rgb_order: GRB
    pin: GPIO48
    num_leds: 1
    rmt_channel: 0
    chipset: ws2812
    name: "on board light"
    effects:
      - pulse:
      - pulse:
          name: "Fast Pulse"
          transition_length: 0.5s
          update_interval: 0.5s
          min_brightness: 0%
          max_brightness: 100%

i2s_audio:
  - id: i2s_in
    i2s_lrclk_pin: GPIO9
    i2s_bclk_pin: GPIO2
  - id: i2s_out
    i2s_lrclk_pin: GPIO6
    i2s_bclk_pin: GPIO7

adf_pipeline:
  - platform: i2s_audio
    type: audio_out
    id: adf_i2s_out
    i2s_audio_id: i2s_out
    i2s_dout_pin: GPIO8

  - platform: i2s_audio
    type: audio_in
    id: adf_i2s_in
    i2s_audio_id: i2s_in
    i2s_din_pin: GPIO4
    channel: left
    sample_rate: 16000
    bits_per_sample: 32bit
    pdm: false


microphone:
  - platform: adf_pipeline
    id: adf_microphone
    gain_log2: 3
    keep_pipeline_alive: true
    pipeline:
      - adf_i2s_in
      - self


media_player:
  - platform: adf_pipeline
    id: adf_media_player
    name: adf_media_player
    internal: false
    keep_pipeline_alive: true
    pipeline:
      - self
      - adf_i2s_out
      
micro_wake_word:
  on_wake_word_detected:     
    - voice_assistant.start:    
    - light.turn_on:
        id: led_ww           
        red: 30%
        green: 30%
        blue: 70%
        brightness: 60%
        effect: fast pulse
    - homeassistant.service:
          service: tts.cloud_say
          data:
            entity_id: media_player.adf_media_player
          data_template:
            message: "{{ 'what do you want?' }}"
  models: 
    - model: hey_jarvis

voice_assistant:
  id: va
  microphone: adf_microphone
  media_player: adf_media_player
  noise_suppression_level: 2
  auto_gain: 16dBFS
  volume_multiplier: 1
  use_wake_word: false
  on_stt_end:
    - light.turn_off: led_ww
  on_error:
    - micro_wake_word.start:  
  on_end:
    - light.turn_off: led_ww
    - wait_until:
        not:
          voice_assistant.is_running:
    - micro_wake_word.start:  
1 Like