VA ESP32 Code from Youtube. Everything Too fast - needs a mic mute or a delay

Can anyone help. I re-created the code from the competition youtube video and made a few adjustments. One of which is to use my Sonos Media player speaker to play the responses.

Instead of useing a “boing” sound for on_wake_word_detected: i tried using the word “Yes?” as a response. The problem is that on_listening: is so fast, it detects the word “yes?” as part of the intent and fails (obviously because “yes turn on the lights” is something it cant execute.

Is there a way to either mute the microphone for a second when on_listening: OR slow down on_listening: so that it picks up audio AFTER the word “yes?”

For those interested - code from the video is below with my own modifications in the light area and to get the responses playing on a media player of your choosing.

substitutions:
  voice_assist_idle_phase_id: '1'
  voice_assist_listening_phase_id: '2'
  voice_assist_thinking_phase_id: '3'
  voice_assist_replying_phase_id: '4'
  voice_assist_not_ready_phase_id: '10'
  voice_assist_error_phase_id: '11'
  voice_assist_muted_phase_id: '12'

esphome:
  name: esphome-web-888888
  friendly_name: HA VOICE ASSISTANT
  on_boot:
    priority: 600
    then:
      - script.execute: control_led
      - delay: 30s
      - if:
          condition:
            lambda: return id(init_in_progress);
          then:
            - lambda: id(init_in_progress) = false;
            - script.execute: control_led

esp32:
  board: esp32dev
  framework:
    type: esp-idf

esp_adf:
external_components:
  - source: github://pr#5230
    components:
    - esp_adf
    refresh: 0s

logger:

api:
  encryption:
    key: "whateveryourwanthere"
  on_client_connected:
    - script.execute: control_led
  on_client_disconnected:
    - script.execute: control_led

ota:

wifi:
  ssid: !secret wifi_ssid
  password: !secret wifi_password
  manual_ip: 
    static_ip: 192.168.1.2
    gateway: 192.168.1.1
    subnet: 255.255.255.0  
  on_connect:
    - script.execute: control_led
  on_disconnect:
    - script.execute: control_led

  ap:
    ssid: "Esphome-Web-888888"
    password: "blahblahblah"

captive_portal:

light:
  - platform: esp32_rmt_led_strip
    id: led
    rgb_order: grb
    pin: GPIO15
    num_leds: 15
    rmt_channel: 0
    name: "Status LED"
    chipset: WS2812
    default_transition_length: 0s
    effects:
      - pulse:
          name: "Slow Pulse"
          transition_length: 1s
          update_interval: 1s
          min_brightness: 50%

      - pulse:
          name: "Fast Pulse"
          transition_length: 1s
          update_interval: 1s
          min_brightness: 50%

      - addressable_rainbow:
          name: "Rainbow"
          speed: 10
          width: 50

      - addressable_twinkle:
          name: "Twinkle"
          twinkle_probability: 5%
          progress_interval: 4ms

      - addressable_scan:
          name: "Scan"
          move_interval: 100ms
          scan_width: 4          

i2s_audio:
  - id: i2s_in
    i2s_lrclk_pin: GPIO25
    i2s_bclk_pin: GPIO26

microphone:
  platform: i2s_audio
  adc_type: external
  pdm: false
  id: external_microphone
  i2s_audio_id: i2s_in
  i2s_din_pin: GPIO34

voice_assistant:
  id: va
  microphone: external_microphone
  use_wake_word: true
  noise_suppression_level: 4
  auto_gain: 31dBFS
  volume_multiplier: 8.0

  on_wake_word_detected:
    - homeassistant.service:
        service: tts.speak
        data:
          media_player_entity_id: media_player.your_meediya_playa
          message: Yes?
          entity_id: tts.piper    
    - lambda: id(voice_assistant_phase) = ${voice_assist_listening_phase_id};
    - script.execute: control_led

  on_listening:
    - lambda: id(voice_assistant_phase) = ${voice_assist_listening_phase_id};
    - script.execute: control_led

  on_stt_vad_end:
    - lambda: id(voice_assistant_phase) = ${voice_assist_thinking_phase_id};
    - script.execute: control_led

  on_tts_start:
    - homeassistant.service:
        service: tts.speak
        data:
          media_player_entity_id: media_player.your_meediya_playa
          message: !lambda 'return x;'
          entity_id: tts.piper
    - lambda: id(voice_assistant_phase) = ${voice_assist_replying_phase_id};
    - script.execute: control_led          


  on_tts_end:
    - lambda: id(voice_assistant_phase) = ${voice_assist_idle_phase_id};
    - script.execute: control_led          

  on_error:
    - if:
        condition:
          lambda: return !id(init_in_progress);
        then:
          - lambda: id(voice_assistant_phase) = ${voice_assist_error_phase_id};
          - script.execute: control_led
          - delay: 1s
          - if:
              condition:
                switch.is_on: use_wake_word
              then:
                - lambda: id(voice_assistant_phase) = ${voice_assist_idle_phase_id};
              else:
                - lambda: id(voice_assistant_phase) = ${voice_assist_muted_phase_id};
          - script.execute: control_led

  on_client_connected:
    - lambda: id(init_in_progress) = false;
    - if:
        condition:
          switch.is_on: use_wake_word
        then:
          - voice_assistant.start_continuous:
          - lambda: id(voice_assistant_phase) = ${voice_assist_idle_phase_id};
        else:
          - lambda: id(voice_assistant_phase) = ${voice_assist_muted_phase_id};      
    - script.execute: control_led

  on_client_disconnected:
    - lambda: id(voice_assistant_phase) = ${voice_assist_not_ready_phase_id};
    - script.execute: control_led

switch:
  - platform: template
    name: Use wake word
    id: use_wake_word
    optimistic: true
    restore_mode: RESTORE_DEFAULT_ON
    on_turn_on:
      - if:
          condition:
            lambda: return !id(init_in_progress);
          then:
            - lambda: id(voice_assistant_phase) = ${voice_assist_idle_phase_id};
            - if:
                condition:
                  not:
                    - voice_assistant.is_running
                then:
                  - voice_assistant.start_continuous
                  - logger.log: XXXXXXXXXXXXX VA start_continuous XXXXXXXXXXXXXXX
            - script.execute: control_led

    on_turn_off:
      - if:
          condition:
            lambda: return !id(init_in_progress);
          then:
            - voice_assistant.stop
            - logger.log: VA stopped
            - lambda: id(voice_assistant_phase) = ${voice_assist_muted_phase_id};
            - script.execute: control_led

  - platform: template
    name: force listen
    id: force_listen
    optimistic: true
    restore_mode: RESTORE_DEFAULT_ON
    on_turn_on:
      - voice_assistant.start_continuous
      - logger.log: XXXX VA start_continuous XXXX
      - script.execute: control_led

    on_turn_off:
      - voice_assistant.stop
      - logger.log: XXXX VA STOP XXXX
      - script.execute: control_led




globals:
  - id: init_in_progress
    type: bool
    restore_value: no
    initial_value: "true"
  - id: voice_assistant_phase
    type: int
    restore_value: no
    initial_value: ${voice_assist_not_ready_phase_id}



script:
  - id: control_led
    then:
      - if:
          condition:
            lambda: return !id(init_in_progress);
          then:
            - if:
                condition:
                  wifi.connected:
                then:
                  - if:
                      condition:
                        api.connected:
                      then:
                        - lambda: | 
                            switch(id(voice_assistant_phase)) { 
                              case ${voice_assist_listening_phase_id}:
                                id(led).turn_on().set_rgb(1,0,0).set_brightness(1).set_effect("Slow Pulse").perform();
                                break;
                              case ${voice_assist_thinking_phase_id}:
                                id(led).turn_on().set_rgb(0,0,1).set_effect("Fast Pulse").perform();
                                break;
                              case ${voice_assist_replying_phase_id}: 
                                id(led).turn_on().set_rgb(0,0,1).set_effect("Rainbow").perform();
                                break;
                              case ${voice_assist_error_phase_id}:
                                id(led).turn_on().set_rgb(1,1,1).set_effect("Fast Pulse").perform();
                                break;
                              case ${voice_assist_muted_phase_id}:
                                id(led).turn_on().set_rgb(1,1,0).set_effect("none").perform();
                                break;
                              case ${voice_assist_not_ready_phase_id}:
                                id(led).turn_off().perform();
                                break;
                              default:
                                id(led).turn_on().set_rgb(1,0,0).set_brightness(1).set_effect("Scan").perform();
                                break;        
                            }
                      else:
                        - light.turn_off:
                            id: led
                else:
                  - light.turn_off:
                      id: led
          else:
            - light.turn_on:
                id: led
                blue: 0%
                red: 0%
                green: 100%
                effect: "Fast Pulse"