I now remember that the local wake word detection wasn’t working properly. I think it was due to RAM or CPU. I then removed the media player and switched to wake word detection in Home Assistant at the same time. It’s been working ever since. Here’s my code, in case it helps. It also shows what has been understood and what is being responded to on the display (as I don’t have any speakers connected atm).
substitutions:
loading_illustration_file: https://github.com/esphome/wake-word-voice-assistants/raw/main/casita/loading_320_240.png
idle_illustration_file: https://github.com/esphome/wake-word-voice-assistants/raw/main/casita/idle_320_240.png
listening_illustration_file: https://github.com/esphome/wake-word-voice-assistants/raw/main/casita/listening_320_240.png
thinking_illustration_file: https://github.com/esphome/wake-word-voice-assistants/raw/main/casita/thinking_320_240.png
replying_illustration_file: https://github.com/esphome/wake-word-voice-assistants/raw/main/casita/replying_320_240.png
error_illustration_file: https://github.com/esphome/wake-word-voice-assistants/raw/main/casita/error_320_240.png
timer_finished_illustration_file: https://github.com/esphome/wake-word-voice-assistants/raw/main/casita/timer_finished_320_240.png
loading_illustration_background_color: "000000"
idle_illustration_background_color: "000000"
listening_illustration_background_color: "FFFFFF"
thinking_illustration_background_color: "FFFFFF"
replying_illustration_background_color: "FFFFFF"
error_illustration_background_color: "000000"
voice_assist_idle_phase_id: "1"
voice_assist_listening_phase_id: "2"
voice_assist_thinking_phase_id: "3"
voice_assist_replying_phase_id: "4"
voice_assist_not_ready_phase_id: "10"
voice_assist_error_phase_id: "11"
voice_assist_muted_phase_id: "12"
voice_assist_timer_finished_phase_id: "20"
# Add support for non-unicode characters by using better glyphset
font_glyphsets: "GF_Latin_Core"
# for Greek use "Noto Sans" for other languages use a compatible font family
font_family: Figtree
esphome:
name: esphome-web-11d630
friendly_name: TabletKüche
platformio_options:
build_flags: "-DBOARD_HAS_PSRAM"
board_build.esp-idf.memory_type: qio_opi
board_build.flash_mode: dio
on_boot:
priority: 600
then:
- script.execute: draw_display
- lambda: |-
uint8_t cmd[1] = { 0x10 };
id(bus_a).write(0x30, cmd, 1);
- delay: 30s
- if:
condition:
lambda: return id(init_in_progress);
then:
- lambda: id(init_in_progress) = false;
- script.execute: draw_display
esp32:
board: esp32-s3-devkitc-1
flash_size: 16MB
cpu_frequency: 240MHz
framework:
type: esp-idf
sdkconfig_options:
CONFIG_ESP32S3_DEFAULT_CPU_FREQ_240: y
CONFIG_ESP32S3_DATA_CACHE_64KB: y
CONFIG_SPIRAM_FETCH_INSTRUCTIONS: y
CONFIG_SPIRAM_RODATA: y
variant: esp32s3
ota:
- platform: esphome
password: "x"
psram:
mode: octal
speed: 80MHz
api:
on_client_connected:
- script.execute: draw_display
on_client_disconnected:
- script.execute: draw_display
logger:
level: DEBUG
wifi:
ssid: !secret wifi_ssid
password: !secret wifi_password
ap:
on_connect:
- script.execute: draw_display
on_disconnect:
- script.execute: draw_display
captive_portal:
button:
- platform: factory_reset
id: factory_reset_btn
internal: true
touchscreen:
platform: gt911
id: touch
address: 0x5D
i2c_id: bus_a
i2c:
sda: 15
scl: 16
scan: true
id: bus_a
i2c_device:
id: pca9557
address: 0x30
i2c_id: bus_a
i2s_audio:
- id: i2s_audio_bus
i2s_lrclk_pin: GPIO2
i2s_bclk_pin: GPIO19
microphone:
- platform: i2s_audio
id: box_mic
sample_rate: 16000
i2s_audio_id: i2s_audio_bus
i2s_din_pin: GPIO20
adc_type: external
bits_per_sample: 32bit
channel: stereo
#speaker:
# - platform: i2s_audio
# id: box_speaker
# i2s_dout_pin: GPIO5
# dac_type: external
# sample_rate: 16000
# bits_per_sample: 16bit
# channel: left
# buffer_duration: 100ms
micro_wake_word:
id: mww
models:
- hey_jarvis
on_wake_word_detected:
- voice_assistant.start:
wake_word: !lambda return wake_word;
voice_assistant:
id: va
microphone: box_mic
micro_wake_word: mww
noise_suppression_level: 2
auto_gain: 31dBFS
volume_multiplier: 2.0
on_listening:
- lambda: id(voice_assistant_phase) = ${voice_assist_listening_phase_id};
- text_sensor.template.publish:
id: text_request
state: "..."
- text_sensor.template.publish:
id: text_response
state: "..."
- lvgl.label.update:
id: req_label
text: "..."
- lvgl.label.update:
id: reply_label
text: "..."
- script.execute: draw_display
on_stt_vad_end:
- lambda: id(voice_assistant_phase) = ${voice_assist_thinking_phase_id};
- script.execute: draw_display
on_stt_end:
- text_sensor.template.publish:
id: text_request
state: !lambda return x;
- lvgl.label.update:
id: req_label
text: !lambda 'return x;'
- script.execute: draw_display
- script.execute:
id: send_stt_text_event
stt_text: !lambda 'return x;'
on_tts_start:
- text_sensor.template.publish:
id: text_response
state: !lambda return x;
- lvgl.label.update:
id: reply_label
text: !lambda 'return x;'
- lambda: id(voice_assistant_phase) = ${voice_assist_replying_phase_id};
- script.execute: draw_display
on_end:
# Restart only mWW if enabled; streaming wake words automatically restart
- if:
condition:
- lambda: return id(wake_word_engine_location).state == "On device";
then:
- lambda: id(va).set_use_wake_word(false);
- micro_wake_word.start:
- script.execute: set_idle_or_mute_phase
- script.execute: draw_display
# Clear text sensors
# - text_sensor.template.publish:
# id: text_request
# state: ""
# - text_sensor.template.publish:
# id: text_response
# state: ""
on_error:
- if:
condition:
lambda: return !id(init_in_progress);
then:
- lambda: id(voice_assistant_phase) = ${voice_assist_error_phase_id};
- script.execute: draw_display
- delay: 1s
- if:
condition:
switch.is_off: mute
then:
- lambda: id(voice_assistant_phase) = ${voice_assist_idle_phase_id};
else:
- lambda: id(voice_assistant_phase) = ${voice_assist_muted_phase_id};
- script.execute: draw_display
on_client_connected:
- lambda: id(init_in_progress) = false;
- script.execute: start_wake_word
- script.execute: set_idle_or_mute_phase
- script.execute: draw_display
on_client_disconnected:
- script.execute: stop_wake_word
- lambda: id(voice_assistant_phase) = ${voice_assist_not_ready_phase_id};
- script.execute: draw_display
on_timer_started:
- script.execute: draw_display
on_timer_cancelled:
- script.execute: draw_display
on_timer_updated:
- script.execute: draw_display
on_timer_tick:
- script.execute: draw_display
on_timer_finished:
- switch.turn_on: timer_ringing
- script.execute: draw_display
on_tts_end:
- script.execute:
id: send_tts_uri_event
tts_uri: !lambda 'return x;'
# --- Define Sensors to fetch data from Home Assistant ---
sensor:
- platform: homeassistant
id: ha_temperature_sensor # Give it a local ID
entity_id: sensor.kueche_temperature
internal: true # Only used within ESPHome (for display)
unit_of_measurement: "°C" # Optional: For display consistency
on_value:
- lvgl.indicator.update:
id: val_needle
value: !lambda return x;
- lvgl.label.update:
id: label_temp
text:
format: "%.1f°C"
args: [ 'x' ]
- platform: homeassistant
id: ha_puf_top_sensor # Give it a local ID
entity_id: sensor.puffer_oben
internal: true # Only used within ESPHome (for display)
unit_of_measurement: "°C" # Optional: For display consistency
on_value:
- lvgl.bar.update:
id: puf_top_bar_id
value: !lambda return x;
indicator:
bg_color: !lambda |-
if (x >= 65) {
return lv_color_hex(0xFF0000);
} else if (x > 50) {
return lv_color_hex(0xebc334);
} else {
return lv_color_hex(0x202eab);
}
- lvgl.label.update:
id: puf_top_value_label
text:
format: "%.0f°C"
args: [ 'x' ]
- platform: homeassistant
id: ha_puf_mid_sensor # Give it a local ID
entity_id: sensor.puffer_mitte
internal: true # Only used within ESPHome (for display)
unit_of_measurement: "°C" # Optional: For display consistency
on_value:
- lvgl.bar.update:
id: puf_mid_bar_id
value: !lambda return x;
indicator:
bg_color: !lambda |-
if (x >= 65) {
return lv_color_hex(0xFF0000);
} else if (x > 50) {
return lv_color_hex(0xebc334);
} else {
return lv_color_hex(0x202eab);
}
- lvgl.label.update:
id: puf_mid_value_label
text:
format: "%.0f°C"
args: [ 'x' ]
- platform: homeassistant
id: ha_puf_bot_sensor # Give it a local ID
entity_id: sensor.puffer_unten
internal: true # Only used within ESPHome (for display)
unit_of_measurement: "°C" # Optional: For display consistency
on_value:
- lvgl.bar.update:
id: puf_bot_bar_id
value: !lambda return x;
indicator:
bg_color: !lambda |-
if (x >= 65) {
return lv_color_hex(0xFF0000);
} else if (x > 50) {
return lv_color_hex(0xebc334);
} else {
return lv_color_hex(0x202eab);
}
- lvgl.label.update:
id: puf_bot_value_label
text:
format: "%.0f°C"
args: [ 'x' ]
binary_sensor:
- platform: homeassistant
id: umlaufpunpe
entity_id: switch.steckdose_heizraum_switch
publish_initial_state: true
on_state:
then:
lvgl.widget.update:
id: umlauf_btn
state:
checked: !lambda return x;
time:
- platform: homeassistant
id: time_comp
on_time_sync:
- script.execute: time_update
on_time:
- minutes: '*'
seconds: 0
then:
- script.execute: time_update
script:
- id: time_update
then:
- lvgl.label.update:
id: time_value_label
text: !lambda |-
return id(time_comp).now().strftime("%H:%M");
- lvgl.label.update:
id: date_value_label
text: !lambda |-
static const char * const mon_names[] = {"JAN", "FEB", "MAR", "APR", "MAI", "JUN",
"JUL", "AUG", "SEP", "OKT", "NOV", "DEZ"};
static char date_buf[8];
auto now = id(time_comp).now();
snprintf(date_buf, sizeof(date_buf), "%s %2d", mon_names[now.month-1], now.day_of_month);
return date_buf;
- id: send_tts_uri_event
parameters:
tts_uri: string
then:
- homeassistant.event:
event: esphome.tts_uri
data:
uri: !lambda return tts_uri;
- id: send_stt_text_event
parameters:
stt_text: string
then:
- homeassistant.event:
event: esphome.stt_text
data:
text: !lambda return stt_text;
- id: draw_display
then:
- if:
condition:
lambda: return !id(init_in_progress);
then:
- if:
condition:
wifi.connected:
then:
- if:
condition:
api.connected:
then:
- if: { condition: { lambda: 'return id(voice_assistant_phase) == ${voice_assist_listening_phase_id};' }, then: { lvgl.page.show: listening_page } }
- if: { condition: { lambda: 'return id(voice_assistant_phase) == ${voice_assist_thinking_phase_id};' }, then: { lvgl.page.show: thinking_page } }
- if: { condition: { lambda: 'return id(voice_assistant_phase) == ${voice_assist_replying_phase_id};' }, then: { lvgl.page.show: replying_page } }
- if: { condition: { lambda: 'return id(voice_assistant_phase) == ${voice_assist_error_phase_id};' }, then: { lvgl.page.show: error_page } }
- if: { condition: { lambda: 'return id(voice_assistant_phase) == ${voice_assist_muted_phase_id};' }, then: { lvgl.page.show: muted_page } }
- if: { condition: { lambda: 'return id(voice_assistant_phase) == ${voice_assist_not_ready_phase_id};' }, then: { lvgl.page.show: no_ha_page } }
- if: { condition: { lambda: 'return id(voice_assistant_phase) == ${voice_assist_timer_finished_phase_id};' }, then: { lvgl.page.show: timer_finished_page } }
# default
- if:
condition:
lambda: >
return id(voice_assistant_phase) != ${voice_assist_listening_phase_id} &&
id(voice_assistant_phase) != ${voice_assist_thinking_phase_id} &&
id(voice_assistant_phase) != ${voice_assist_replying_phase_id} &&
id(voice_assistant_phase) != ${voice_assist_error_phase_id} &&
id(voice_assistant_phase) != ${voice_assist_muted_phase_id} &&
id(voice_assistant_phase) != ${voice_assist_not_ready_phase_id} &&
id(voice_assistant_phase) != ${voice_assist_timer_finished_phase_id};
then:
- lvgl.page.show: idle_page
else:
- lvgl.page.show: no_ha_page
else:
- lvgl.page.show: no_wifi_page
else:
- lvgl.page.show: initializing_page
- id: fetch_first_active_timer
then:
- lambda: |
const auto timers = id(va).get_timers();
auto output_timer = timers.begin()->second;
for (auto &iterable_timer : timers) {
if (iterable_timer.second.is_active && iterable_timer.second.seconds_left <= output_timer.seconds_left) {
output_timer = iterable_timer.second;
}
}
id(global_first_active_timer) = output_timer;
- id: check_if_timers_active
then:
- lambda: |
const auto timers = id(va).get_timers();
bool output = false;
if (timers.size() > 0) {
for (auto &iterable_timer : timers) {
if(iterable_timer.second.is_active) {
output = true;
}
}
}
id(global_is_timer_active) = output;
- id: fetch_first_timer
then:
- lambda: |
const auto timers = id(va).get_timers();
auto output_timer = timers.begin()->second;
for (auto &iterable_timer : timers) {
if (iterable_timer.second.seconds_left <= output_timer.seconds_left) {
output_timer = iterable_timer.second;
}
}
id(global_first_timer) = output_timer;
- id: check_if_timers
then:
- lambda: |
const auto timers = id(va).get_timers();
bool output = false;
if (timers.size() > 0) {
output = true;
}
id(global_is_timer) = output;
- id: draw_timer_timeline
then:
- lambda: |
id(check_if_timers_active).execute();
id(check_if_timers).execute();
if (id(global_is_timer_active)){
id(fetch_first_active_timer).execute();
int active_pixels = round( 320 * id(global_first_active_timer).seconds_left / max(id(global_first_active_timer).total_seconds , static_cast<uint32_t>(1)) );
if (active_pixels > 0){
id(main_display).filled_rectangle(0 , 225 , 320 , 15 , Color::WHITE );
id(main_display).filled_rectangle(0 , 226 , active_pixels , 13 , id(active_timer_color) );
}
} else if (id(global_is_timer)){
id(fetch_first_timer).execute();
int active_pixels = round( 320 * id(global_first_timer).seconds_left / max(id(global_first_timer).total_seconds , static_cast<uint32_t>(1)));
if (active_pixels > 0){
id(main_display).filled_rectangle(0 , 225 , 320 , 15 , Color::WHITE );
id(main_display).filled_rectangle(0 , 226 , active_pixels , 13 , id(paused_timer_color) );
}
}
- id: draw_active_timer_widget
then:
- lambda: |
id(check_if_timers_active).execute();
if (id(global_is_timer_active)){
id(main_display).filled_rectangle(80 , 40 , 160 , 50 , Color::WHITE );
id(main_display).rectangle(80 , 40 , 160 , 50 , Color::BLACK );
id(fetch_first_active_timer).execute();
int hours_left = floor(id(global_first_active_timer).seconds_left / 3600);
int minutes_left = floor((id(global_first_active_timer).seconds_left - hours_left * 3600) / 60);
int seconds_left = id(global_first_active_timer).seconds_left - hours_left * 3600 - minutes_left * 60 ;
auto display_hours = (hours_left < 10 ? "0" : "") + std::to_string(hours_left);
auto display_minute = (minutes_left < 10 ? "0" : "") + std::to_string(minutes_left);
auto display_seconds = (seconds_left < 10 ? "0" : "") + std::to_string(seconds_left) ;
std::string display_string = "";
if (hours_left > 0) {
display_string = display_hours + ":" + display_minute;
} else {
display_string = display_minute + ":" + display_seconds;
}
id(main_display).printf(120, 47, id(font_timer), Color::BLACK, "%s", display_string.c_str());
}
# Starts either mWW or the streaming wake word, depending on the configured location
- id: start_wake_word
then:
- if:
condition:
and:
- not:
- voice_assistant.is_running:
- lambda: return id(wake_word_engine_location).state == "On device";
then:
- lambda: id(va).set_use_wake_word(false);
- micro_wake_word.start:
- if:
condition:
and:
- not:
- voice_assistant.is_running:
- lambda: return id(wake_word_engine_location).state == "In Home Assistant";
then:
- lambda: id(va).set_use_wake_word(true);
- voice_assistant.start_continuous:
# Stops either mWW or the streaming wake word, depending on the configured location
- id: stop_wake_word
then:
- if:
condition:
lambda: return id(wake_word_engine_location).state == "In Home Assistant";
then:
- lambda: id(va).set_use_wake_word(false);
- voice_assistant.stop:
- if:
condition:
lambda: return id(wake_word_engine_location).state == "On device";
then:
- micro_wake_word.stop:
# Set the voice assistant phase to idle or muted, depending on if the software mute switch is activated
- id: set_idle_or_mute_phase
then:
- if:
condition:
switch.is_off: mute
then:
- lambda: id(voice_assistant_phase) = ${voice_assist_idle_phase_id};
else:
- lambda: id(voice_assistant_phase) = ${voice_assist_muted_phase_id};
switch:
- platform: gpio
name: Speaker Enable
pin: GPIO4
restore_mode: RESTORE_DEFAULT_ON
entity_category: config
disabled_by_default: true
- platform: template
name: Stream
optimistic: true
on_turn_on:
- microphone.capture:
on_turn_off:
- microphone.stop_capture:
- platform: template
name: Mute
id: mute
icon: "mdi:microphone-off"
optimistic: true
restore_mode: RESTORE_DEFAULT_OFF
entity_category: config
on_turn_off:
- microphone.unmute:
- lambda: id(voice_assistant_phase) = ${voice_assist_idle_phase_id};
- script.execute: draw_display
on_turn_on:
- microphone.mute:
- lambda: id(voice_assistant_phase) = ${voice_assist_muted_phase_id};
- script.execute: draw_display
- platform: template
id: timer_ringing
optimistic: true
internal: true
restore_mode: ALWAYS_OFF
on_turn_off:
# Turn off the repeat mode and disable the pause between playlist items
- delay: 15min
- switch.turn_off: timer_ringing
select:
- platform: template
entity_category: config
name: Wake word engine location
id: wake_word_engine_location
icon: "mdi:account-voice"
optimistic: true
restore_value: true
options:
- In Home Assistant
- On device
initial_option: On device
on_value:
- if:
condition:
lambda: return !id(init_in_progress);
then:
- wait_until:
lambda: return id(voice_assistant_phase) == ${voice_assist_muted_phase_id} || id(voice_assistant_phase) == ${voice_assist_idle_phase_id};
- if:
condition:
lambda: return x == "In Home Assistant";
then:
- micro_wake_word.stop
- delay: 500ms
- if:
condition:
switch.is_off: mute
then:
- lambda: id(va).set_use_wake_word(true);
- voice_assistant.start_continuous:
- if:
condition:
lambda: return x == "On device";
then:
- lambda: id(va).set_use_wake_word(false);
- voice_assistant.stop
- delay: 500ms
- if:
condition:
switch.is_off: mute
then:
- micro_wake_word.start
output:
- platform: template
id: backlight_output
type: binary
write_action:
- lambda: |-
// Write a single byte
static uint8_t data[] = {0x10};
static uint8_t data_on[] = {0x06};
static uint8_t data_off[] = {0x05};
id(pca9557).write(data, sizeof(data));
delay(20);
if (state) {
id(pca9557).write(data_on, sizeof(data_on));
} else {
id(pca9557).write(data_off, sizeof(data_off));
}
light:
- platform: binary
name: "Backlight"
output: backlight_output
id: backlight
restore_mode: ALWAYS_ON
globals:
- id: init_in_progress
type: bool
restore_value: false
initial_value: "true"
- id: voice_assistant_phase
type: int
restore_value: false
initial_value: ${voice_assist_not_ready_phase_id}
- id: global_first_active_timer
type: voice_assistant::Timer
restore_value: false
- id: global_is_timer_active
type: bool
restore_value: false
- id: global_first_timer
type: voice_assistant::Timer
restore_value: false
- id: global_is_timer
type: bool
restore_value: false
image:
- file: mdi:sun-wireless-outline
id: solar_power_icon
resize: 50x50
type: grayscale
- file: ${error_illustration_file}
id: casita_error
resize: 320x240
type: RGB565
transparency: alpha_channel
- file: ${idle_illustration_file}
id: casita_idle
resize: 320x240
type: RGB565
transparency: alpha_channel
- file: ${listening_illustration_file}
id: casita_listening
resize: 320x240
type: RGB565
transparency: alpha_channel
- file: ${thinking_illustration_file}
id: casita_thinking
resize: 320x240
type: RGB565
transparency: alpha_channel
- file: ${replying_illustration_file}
id: casita_replying
resize: 320x240
type: RGB565
transparency: alpha_channel
- file: ${timer_finished_illustration_file}
id: casita_timer_finished
resize: 320x240
type: RGB565
transparency: alpha_channel
- file: ${loading_illustration_file}
id: casita_initializing
resize: 320x240
type: RGB565
transparency: alpha_channel
- file: https://github.com/esphome/wake-word-voice-assistants/raw/main/error_box_illustrations/error-no-wifi.png
id: error_no_wifi
resize: 320x240
type: RGB565
transparency: alpha_channel
- file: https://github.com/esphome/wake-word-voice-assistants/raw/main/error_box_illustrations/error-no-ha.png
id: error_no_ha
resize: 320x240
type: RGB565
transparency: alpha_channel
font:
- file: "arialr.ttf"
id: font_clock
size: 120
- file: "arialr.ttf" # Example: Add another size if needed
id: font_medium
size: 24
- file: "arialr.ttf" # Example: Add another size if needed
id: font_large
size: 32
- file: "arialr.ttf" # Example: Add another size if needed
id: font_small
size: 1
- file:
type: gfonts
family: ${font_family}
weight: 300
italic: true
id: font_request
size: 15
glyphsets:
- ${font_glyphsets}
- file:
type: gfonts
family: ${font_family}
weight: 300
id: font_response
size: 15
glyphsets:
- ${font_glyphsets}
- file:
type: gfonts
family: ${font_family}
weight: 300
id: font_timer
size: 30
glyphsets:
- ${font_glyphsets}
text_sensor:
- id: text_request
platform: template
on_value:
lambda: |-
if(id(text_request).state.length()>32) {
std::string name = id(text_request).state.c_str();
std::string truncated = esphome::str_truncate(name.c_str(),31);
id(text_request).state = (truncated+"...").c_str();
}
- id: text_response
platform: template
on_value:
lambda: |-
if(id(text_response).state.length()>32) {
std::string name = id(text_response).state.c_str();
std::string truncated = esphome::str_truncate(name.c_str(),31);
id(text_response).state = (truncated+"...").c_str();
}
color:
- id: idle_color
hex: ${idle_illustration_background_color}
- id: listening_color
hex: ${listening_illustration_background_color}
- id: thinking_color
hex: ${thinking_illustration_background_color}
- id: replying_color
hex: ${replying_illustration_background_color}
- id: loading_color
hex: ${loading_illustration_background_color}
- id: error_color
hex: ${error_illustration_background_color}
- id: active_timer_color
hex: "26ed3a"
- id: paused_timer_color
hex: "3b89e3"
display:
- platform: rpi_dpi_rgb
id: main_display
color_order: RGB
invert_colors: true
update_interval: never
auto_clear_enabled: false
dimensions:
width: 800
height: 480
de_pin: 42
hsync_pin: 40
vsync_pin: 41
pclk_pin: 39
pclk_frequency: 20MHz
hsync_pulse_width: 4
hsync_front_porch: 8
hsync_back_porch: 8
vsync_pulse_width: 4
vsync_front_porch: 8
vsync_back_porch: 8
data_pins:
red:
- 7
- 17
- 18
- 3
- 46
green:
- 9
- 10
- 11
- 12
- 13
- 14
blue:
- 21
- 47
- 48
- 45
- 38
i have to split the yaml