I couldn’t reliably get the wake word to work so i left it out entirely but this is what I made based on what I’ve found around the internet about it. The speaker works fine for me, you hold button a until it detects silence and starts processing then you can let go and it will read out the response on the display and say it on the speaker. I tried the media player component but it was slowing it down so i kept with speaker, it’s not very loud but you can monitor a sensor in HA and pull the tts file from it when it changes and play it on something else if you want. I also have code in it to use it as an RFID scanner with the RFID 2 but it was slowing it down so it’s commented out. Honestly it works fine, I’m not sure if I just don’t understand the silence detection flag or it just doesn’t turn off when you set it to false to act like proper ptt, but waiting for it to start processing on its own is fine for now. I also put in a battery bar and charge detection script but it’s not very reliable.
substitutions:
devicename: m5stickc-plus2
upper_devicename: M5StickC PLUS2
esphome:
name: $devicename
friendly_name: M5StickC PLUS2
on_boot:
- priority: 600
then:
- output.turn_on: hold_pin # Set GPIO4 high to maintain power
- select.set:
id: output_mode_select
option: "Full Mode"
- text_sensor.template.publish:
id: display_text
state: " Voice Assistant\n\n Button B - Backlight ↖\n\n Button A - Voice →\n\n Button C - Refresh ↘"
- light.turn_on: display_bl
on_shutdown:
- priority: 600
then:
- output.turn_off: hold_pin
- component.update: battery_percentage
platformio_options:
upload_speed: 115200
wifi:
ssid: !secret wifi_ssid
password: !secret wifi_password
ap:
ssid: "M5Stickc-Plus2 Fallback Hotspot"
password: !secret wifi_password
captive_portal:
esp32:
board: m5stick-c
framework:
type: arduino
flash_size: 8MB
logger:
ota:
- platform: esphome
password: "..."
api:
encryption:
key: "..."
sensor:
- platform: adc
pin: GPIO38
attenuation: 12db
update_interval: 1s
name: "Battery Voltage"
id: battery_voltage
filters:
- multiply: 2.0
- round: 2
- quantile:
window_size: 7
send_every: 4
send_first_at: 3
quantile: 0.9
- platform: template
name: "Battery Percentage"
id: battery_percentage
update_interval: 1s
unit_of_measurement: "%"
lambda: |-
if (!id(battery_voltage).has_state()) {
return 0.0; // Default to 0% if the voltage is not available
}
float voltage = id(battery_voltage).state;
// Adjust voltage ranges if needed
float min_voltage = 3.0;
float max_voltage = 4.2;
// Calculate percentage with a simple linear interpolation
float percentage = (voltage - min_voltage) / (max_voltage - min_voltage) * 100.0;
// Apply limits
percentage = percentage > 100.0 ? 100.0 : (percentage < 0.0 ? 0.0 : percentage);
// Exponential moving average to smooth changes (adjust alpha as needed)
static float smoothed_percentage = percentage;
float alpha = 0.5; // Smoothing factor between 0 and 1
smoothed_percentage = alpha * percentage + (1 - alpha) * smoothed_percentage;
return smoothed_percentage;
- platform: template
name: "Inactivity Timer"
id: inactivity_timer
update_interval: 1s
lambda: |-
if (id(inactivity_counter) > 0) {
id(inactivity_counter)--;
}
if (id(inactivity_counter) == 0) {
auto call = id(display_bl).turn_off();
// set parameters (optional)
call.set_transition_length(1000); // in ms
// perform action:
call.perform();
}
return id(inactivity_counter);
unit_of_measurement: "s"
internal: True
# Voice Assistant Configuration
voice_assistant:
id: va
microphone: stickc_microphone
speaker: media_out
noise_suppression_level: 2
auto_gain: 31dBFS
volume_multiplier: 2.0
on_listening:
- select.set:
id: output_mode_select
option: "Full Mode"
- text_sensor.template.publish:
id: display_text
state: "Listening..."
- light.turn_on: display_bl
on_stt_vad_end:
- light.turn_on: display_bl
- light.turn_on: led1
- text_sensor.template.publish:
id: display_text
state: "Processing..."
on_tts_start:
- light.turn_on: display_bl
- light.turn_off: led1
- select.set:
id: output_mode_select
option: "Word Mode"
- text_sensor.template.publish:
id: display_text
state: !lambda 'return x;'
- text_sensor.template.publish:
id: text_response
state: !lambda 'return x;'
on_tts_end:
- globals.set:
id: inactivity_counter
value: "10"
- text_sensor.template.publish:
id: voice_response
state: !lambda 'return x;'
on_error:
- light.turn_on: display_bl
- light.turn_off: led1
- text_sensor.template.publish:
id: display_text
state: !lambda 'return message;'
on_idle:
- light.turn_off: led1
- select.set:
id: output_mode_select
option: "Full Mode"
- text_sensor.template.publish:
id: display_text
state: " Voice Assistant\n\n Button B - Backlight ↖\n\n Button A - Voice →\n\n Button C - Refresh ↘"
- globals.set:
id: inactivity_counter
value: "10"
select:
- platform: template
name: "Output Mode"
id: output_mode_select
optimistic: true
options:
- "Character Mode"
- "Word Mode"
- "Full Mode"
initial_option: "Full Mode"
internal: true
globals:
- id: inactivity_counter
type: int
restore_value: no
initial_value: "10"
number:
- platform: template
name: "Brightness"
optimistic: true
min_value: 0.65
max_value: 1.00
step: 0.01
initial_value: 0.8
restore_value: True
set_action:
- light.turn_on:
id: display_bl
brightness: !lambda "return x;"
# Button A to activate the microphone and trigger voice assistant
binary_sensor:
- platform: gpio
pin:
number: GPIO37
inverted: true
name: ${upper_devicename} Button A
disabled_by_default: true
entity_category: diagnostic
on_press:
- if:
condition:
not:
- voice_assistant.is_running
then:
- output.turn_off: buzzer
- voice_assistant.start:
silence_detection: false
- text_sensor.template.publish:
id: display_text
state: "Listening..."
- light.turn_on: display_bl
on_release:
- delay: 300ms
- voice_assistant.stop:
- if:
condition:
and:
- lambda: |-
return id(display_text).state != id(text_response).state;
- lambda: |-
return id(display_text).state != "Processing...";
then:
- select.set:
id: output_mode_select
option: "Full Mode"
- text_sensor.template.publish:
id: display_text
state: " Voice Assistant\n\n Button B - Backlight ↖\n\n Button A - Voice →\n\n Button C - Refresh ↘"
- globals.set:
id: inactivity_counter
value: "10"
- platform: gpio
pin:
number: GPIO39
inverted: true
name: ${upper_devicename} Button B
disabled_by_default: true
entity_category: diagnostic
on_click:
min_length: 50ms
max_length: 350ms
then:
- light.toggle: display_bl
- platform: gpio
pin:
number: GPIO35
inverted: true
name: ${upper_devicename} Button C
id: button_c
disabled_by_default: true
entity_category: diagnostic
on_click:
then:
- light.turn_on: display_bl
- platform: template
name: "Charging Status"
id: charging_status
lambda: |-
// Retrieve the current battery percentage
float current_percentage = id(battery_percentage).state;
// Static variables to store the last three percentage readings and last valid reading
static float last_readings[3] = {current_percentage, current_percentage, current_percentage};
static float last_valid_reading = current_percentage;
static int index = 0;
// Define thresholds
float charging_threshold = 0.5; // 0.5% increase
float discharging_threshold = 0.1; // 0.1% decrease
// Only update the buffer if the percentage has changed
if (current_percentage != last_valid_reading) {
// Update the readings buffer with the current percentage
last_readings[index] = current_percentage;
index = (index + 1) % 3; // Cycle through the last three readings
// Update the last valid reading
last_valid_reading = current_percentage;
}
// Calculate the trend over the last three readings
bool is_charging = true;
bool is_discharging = true;
for (int i = 1; i < 3; i++) {
if (last_readings[i] <= last_readings[i - 1] + charging_threshold) {
is_charging = false;
}
if (last_readings[i] >= last_readings[i - 1] - discharging_threshold) {
is_discharging = false;
}
}
// Return true if charging trend is detected, false if discharging, otherwise hold the previous state
return is_charging ? true : (is_discharging ? false : id(charging_status).state);
# Microphone and buzzer output configuration
output:
- platform: ledc
pin: GPIO19
id: builtin_led
# Buzzer output
- platform: ledc
pin:
number: GPIO2
id: buzzer
inverted: false
# Backlight TFT LCD
- platform: ledc
pin: 27
inverted: false
id: backlight
- platform: gpio
id: hold_pin
pin:
number: GPIO4
mode: OUTPUT
inverted: false
i2s_audio:
- id: mic_adc
i2s_lrclk_pin:
number: GPIO0
ignore_strapping_warning: true
i2s_bclk_pin: GPIO26
microphone:
- platform: i2s_audio
id: stickc_microphone
i2s_audio_id: mic_adc
i2s_din_pin: GPIO34
adc_type: external
pdm: true
channel: left
# media_player:
# - platform: i2s_audio
# id: media_out
# name: ${upper_devicename} I2S Media Player
# dac_type: external
# i2s_dout_pin: GPIO25
# mode: mono
speaker:
- platform: i2s_audio
dac_type: external
i2s_dout_pin: GPIO25
id: media_out
text_sensor:
- platform: template
name: "Voice Assistant Response"
id: voice_response
icon: "mdi:message-text-outline"
update_interval: never
# - platform: template
# name: "NFC Tag Sensor"
# id: nfc_tag_sensor
# icon: "mdi:nfc" # Optional icon for NFC
# update_interval: never # Update only when tag is scanned
- platform: template
name: "Voice Assistant Response"
id: display_text
internal: true
update_interval: never
- platform: template
name: "Voice Assistant Response Text"
id: text_response
internal: true
update_interval: never
light:
- platform: monochromatic
output: builtin_led
name: ${upper_devicename} Led
id: led1
- platform: monochromatic
output: backlight
name: ${upper_devicename} Backlight
id: display_bl
restore_mode: RESTORE_DEFAULT_ON
on_turn_on:
then:
- globals.set:
id: inactivity_counter
value: "10"
display:
- platform: ili9xxx
model: st7789v
cs_pin: GPIO5
dc_pin: GPIO14
reset_pin: GPIO12
rotation: 90
dimensions:
height: 240
width: 135
offset_height: 40
offset_width: 52
invert_colors: true
update_interval: 150ms
lambda: |-
// Add a static flag for the button press state
static bool button_pressed = false;
// Check if the button has been pressed
if (id(button_c).state) {
button_pressed = true;
}
enum ProcessingMode {
CHAR_MODE,
WORD_MODE,
FULL_MODE
};
// Variable to hold the current processing mode
ProcessingMode process_mode;
// Static variables
static size_t current_char_index = 0;
static std::deque<std::string> lines;
static std::string text_buffer;
static std::string line_buffer;
static size_t last_space_index = std::string::npos; // Index of the last space in line_buffer
// Variables for word mode
static std::vector<std::string> tokens;
static size_t current_token_index = 0;
const int max_chars_per_line = 26;
const int max_lines = 8;
// Get the selected mode from the select component
std::string selected_mode = id(output_mode_select).state;
if (selected_mode == "Character Mode") {
process_mode = CHAR_MODE;
} else if (selected_mode == "Word Mode") {
process_mode = WORD_MODE;
} else if (selected_mode == "Full Mode") {
process_mode = FULL_MODE;
} else {
process_mode = FULL_MODE; // Default mode
}
// Detect if the mode has changed
static ProcessingMode last_process_mode = FULL_MODE;
bool mode_changed = (process_mode != last_process_mode);
std::string response = id(display_text).state;
// If the response has changed, the mode has changed, or the button has been pressed, reset the tracking
if (text_buffer != response || mode_changed || button_pressed) {
lines.clear();
text_buffer = response;
line_buffer.clear();
last_space_index = std::string::npos;
// Reset indices
current_char_index = 0;
current_token_index = 0;
// Reset button_pressed state
button_pressed = false;
// In word mode or full mode, split the text into tokens
if (process_mode == WORD_MODE || process_mode == FULL_MODE) {
tokens.clear();
// Custom tokenization that handles words, whitespace, and newlines
size_t pos = 0;
while (pos < text_buffer.length()) {
char current_char = text_buffer[pos];
if (current_char == '\n') {
// Add a newline token
tokens.push_back("\n");
pos++;
} else if (current_char == ' ' || current_char == '\t') {
// Collect consecutive whitespace characters
size_t start = pos;
while (pos < text_buffer.length() && (text_buffer[pos] == ' ' || text_buffer[pos] == '\t')) {
pos++;
}
tokens.push_back(text_buffer.substr(start, pos - start));
} else {
// Collect consecutive non-whitespace, non-newline characters (words)
size_t start = pos;
while (pos < text_buffer.length() && text_buffer[pos] != ' ' && text_buffer[pos] != '\t' && text_buffer[pos] != '\n') {
pos++;
}
tokens.push_back(text_buffer.substr(start, pos - start));
}
}
}
// In FULL_MODE, prepare lines immediately
if (process_mode == FULL_MODE) {
// Clear any existing lines and rebuild them based on the entire text
lines.clear();
line_buffer.clear();
current_token_index = 0;
while (current_token_index < tokens.size()) {
std::string token = tokens[current_token_index];
if (token == "\n") {
// Handle newline: add the current line_buffer to lines
if (lines.size() == max_lines) {
lines.pop_front();
}
lines.push_back(line_buffer);
line_buffer.clear();
current_token_index++;
continue;
}
// Check if the token fits in the remaining space
if (line_buffer.length() + token.length() <= max_chars_per_line) {
line_buffer += token;
current_token_index++;
} else {
if (!line_buffer.empty()) {
// Add current line_buffer to lines and start new line
if (lines.size() == max_lines) {
lines.pop_front();
}
lines.push_back(line_buffer);
line_buffer.clear();
} else {
// Token is too big to fit on one line, need to split it
size_t token_pos = 0;
while (token_pos < token.length()) {
size_t remaining_space = max_chars_per_line - line_buffer.length();
size_t chars_to_copy = std::min(remaining_space, token.length() - token_pos);
line_buffer += token.substr(token_pos, chars_to_copy);
token_pos += chars_to_copy;
if (line_buffer.length() >= max_chars_per_line) {
if (lines.size() == max_lines) {
lines.pop_front();
}
lines.push_back(line_buffer);
line_buffer.clear();
}
}
current_token_index++;
}
}
}
// Add any remaining text in line_buffer to lines
if (!line_buffer.empty()) {
if (lines.size() == max_lines) {
lines.pop_front();
}
lines.push_back(line_buffer);
line_buffer.clear();
}
}
// Update the last process mode
last_process_mode = process_mode;
}
// Process based on the selected mode
if (process_mode == CHAR_MODE) {
// *** Character Mode Processing ***
if (current_char_index < text_buffer.length()) {
char next_char = text_buffer[current_char_index];
current_char_index++;
// Handle newline character
if (next_char == '\n') {
// Add the current line_buffer to lines
if (lines.size() == max_lines) {
lines.pop_front();
}
lines.push_back(line_buffer);
line_buffer.clear();
// Reset last_space_index
last_space_index = std::string::npos;
} else {
// Add the character to line_buffer
line_buffer += next_char;
// Update last_space_index if the character is a space
if (next_char == ' ' || next_char == '\t') {
last_space_index = line_buffer.length() - 1;
}
// Check if line_buffer exceeds max_chars_per_line
if (line_buffer.length() >= max_chars_per_line) {
std::string line_to_add; // Declare line_to_add here
if (last_space_index != std::string::npos) {
// Wrap at the last space
size_t wrap_index = last_space_index;
line_to_add = line_buffer.substr(0, wrap_index);
// Remove the line up to the wrap_index from line_buffer
line_buffer = line_buffer.substr(wrap_index + 1);
// Reset last_space_index in the new line_buffer
size_t new_space_index = line_buffer.find_last_of(' ');
if (new_space_index != std::string::npos) {
last_space_index = new_space_index;
} else {
last_space_index = std::string::npos;
}
} else {
// No space found, we have to wrap mid-word
line_to_add = line_buffer.substr(0, max_chars_per_line);
line_buffer = line_buffer.substr(max_chars_per_line);
last_space_index = std::string::npos;
}
// Add the line to lines
if (lines.size() == max_lines) {
lines.pop_front();
}
lines.push_back(line_to_add);
}
}
id(inactivity_counter) = 10;
}
} else if (process_mode == WORD_MODE) {
// *** Word Mode Processing ***
if (current_token_index < tokens.size()) {
std::string token = tokens[current_token_index];
if (token == "\n") {
// Handle newline: add the current line_buffer to lines
if (lines.size() == max_lines) {
lines.pop_front();
}
lines.push_back(line_buffer);
line_buffer.clear();
current_token_index++;
} else {
// Check if the token fits in the remaining space
if (line_buffer.length() + token.length() <= max_chars_per_line) {
line_buffer += token;
current_token_index++;
} else {
if (!line_buffer.empty()) {
// Add current line_buffer to lines and start new line
if (lines.size() == max_lines) {
lines.pop_front();
}
lines.push_back(line_buffer);
line_buffer.clear();
} else {
// Token is too big to fit on one line, need to split it
size_t token_pos = 0;
while (token_pos < token.length()) {
size_t remaining_space = max_chars_per_line - line_buffer.length();
size_t chars_to_copy = std::min(remaining_space, token.length() - token_pos);
line_buffer += token.substr(token_pos, chars_to_copy);
token_pos += chars_to_copy;
if (line_buffer.length() >= max_chars_per_line) {
if (lines.size() == max_lines) {
lines.pop_front();
}
lines.push_back(line_buffer);
line_buffer.clear();
}
}
current_token_index++;
}
}
}
id(inactivity_counter) = 10;
}
} else if (process_mode == FULL_MODE) {
// *** Full Mode Processing ***
// No incremental processing needed; lines are already prepared
}
// After processing, we need to render the content
// Clear the screen
it.fill(id(my_black));
// it.image(0, 0, id(background_image));
// Before rendering, add 'line_buffer' to 'lines' if not empty
bool added_line_buffer = false;
if (!line_buffer.empty()) {
if (lines.size() == max_lines) {
lines.pop_front();
}
lines.push_back(line_buffer);
added_line_buffer = true;
}
// Render the lines
int y_offset = 0;
for (const auto& line : lines) {
it.print(0, y_offset, id(font_id), id(my_green), line.c_str());
y_offset += 16;
}
// After rendering, remove 'line_buffer' if it was added
if (added_line_buffer) {
lines.pop_back();
}
// Draw battery bar based on battery level and charging status
float battery_level = id(battery_percentage).state;
int bar_height = static_cast<int>(battery_level / 100.0 * 135);
int bar_width = 5;
// Draw the battery bar on the right side based on charging status
if (id(charging_status).state) {
it.filled_rectangle(235, 135 - bar_height, bar_width, bar_height, id(my_blue)); // Blue when charging
} else {
it.filled_rectangle(235, 135 - bar_height, bar_width, bar_height, id(my_green)); // Green when not charging
}
// Draw red bar to fill the remaining space
it.filled_rectangle(235, 0, bar_width, 135 - bar_height, id(my_red));
spi:
clk_pin: GPIO13
mosi_pin: GPIO15
font:
- file: "gfonts://LXGW WenKai Mono TC"
id: font_id
size: 18
glyphs: " !\"#$%&'()*+,-—./:;<>?@[\\]^_`{|}~°±×÷=≠≤≥∞√∑∏∫∂∇€£¥¢₩‘’“”0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ︵︶︷︸︹︺︻︼︽︾︿﹀□←↑→↓↔↖↗↘↙╭╮╯╰─━┄┅│┃┆┇┌┍┏┐┑┓└┕┗┘┙┛├┝┠┤┥┨┬┯┳┴┷┻┼┽┿╀╁╂"
color:
- id: my_white
red: 100%
green: 100%
blue: 100%
- id: my_blue
red: 0%
green: 0%
blue: 100%
- id: my_red
red: 100%
green: 0%
blue: 0%
- id: my_green
red: 0%
green: 100%
blue: 0%
- id: my_yellow
red: 100%
green: 100%
blue: 0%
- id: my_orange
red: 100%
green: 50%
blue: 0%
- id: my_black
red: 0%
green: 0%
blue: 0%
# external_components:
# - source:
# type: git
# url: https://github.com/chill-Division/M5Stack-ESPHome/
# ref: main
# components: mfrc522_i2c
# i2c:
# - id: bus_a
# sda: GPIO32
# scl: GPIO33
# scan: true
# mfrc522_i2c:
# - i2c_id: bus_a
# address: 0x28 # I2C adress
# update_interval: 200ms # It seems happiest with 200ms minimum
# on_tag:
# then:
# - light.turn_on: display_bl
# - select.set:
# id: output_mode_select
# option: "Character Mode"
# - text_sensor.template.publish:
# id: display_text
# state: !lambda 'return x;'
# - homeassistant.tag_scanned: !lambda 'return x;'
# - text_sensor.template.publish:
# id: nfc_tag_sensor
# state: !lambda 'return x;'
# - light.turn_on: led1
# - delay: 1s
# - light.turn_off: led1