As soon as I disable the mic the speaker is working as aspected.
If not sometimes word are missing / sounds choppy.
Doesnt matter if aec is on or off.
The mic is working nice all time.
Feels like the device cant handle mic and speaker at same time.
esphome:
name: "jc4880p443"
friendly_name: JC4880P443
on_boot:
priority: 600
then:
- output.turn_on: speaker_enable
- delay: 1s
# Configure ES8311 for digital feedback AEC
# Register 0x44 bits[6:4] = ADCDAT_SEL: 4 = DACL + ADC
# This makes ASDOUT output stereo: L=DAC loopback (reference), R=ADC (mic)
- lambda: |-
ESP_LOGI("es8311", "Configuring ES8311 register 0x44 for digital feedback...");
uint8_t reg = 0x44;
uint8_t current_val = 0;
id(i2c_bus).write(0x18, ®, 1);
id(i2c_bus).read(0x18, ¤t_val, 1);
ESP_LOGI("es8311", "Register 0x44 current value: 0x%02X", current_val);
uint8_t data[2] = {0x44, 0x48};
auto err = id(i2c_bus).write(0x18, data, 2);
if (err == esphome::i2c::ERROR_OK) {
ESP_LOGI("es8311", "Wrote register 0x44=0x48 for digital feedback AEC");
id(i2c_bus).write(0x18, ®, 1);
id(i2c_bus).read(0x18, ¤t_val, 1);
ESP_LOGI("es8311", "Register 0x44 after write: 0x%02X", current_val);
} else {
ESP_LOGE("es8311", "Failed to write register 0x44: error %d", (int)err);
}
# Restore ES8311 volume and sync AEC reference
- lambda: |-
float vol = 0.15 + (id(speaker_volume).state / 100.0) * 0.60;
id(es8311_dac).set_volume(vol);
id(i2s_duplex).set_aec_reference_volume(vol);
id(peer_name) = id(intercom).get_current_destination();
logger:
hardware_uart: USB_SERIAL_JTAG
level: DEBUG
logs:
intercom_api: INFO
i2s_duplex: DEBUG
component: INFO
wifi: INFO
api: INFO
ota: INFO
mdns: INFO
sensor: INFO
switch: INFO
light: INFO
display: INFO
image: INFO
animation: INFO
spi: INFO
i2c: INFO
esp32: INFO
wifi:
ssid: !secret wifi_ssid
password: !secret wifi_password
fast_connect: true
post_connect_roaming: false
time:
- platform: sntp
id: my_time
timezone: Europe/Rome
servers:
- 0.pool.ntp.org
- 1.pool.ntp.org
- 2.pool.ntp.org
ota:
- platform: esphome
# =============================================================================
# CONNECTIVITY
# =============================================================================
api:
on_client_connected:
- lambda: |-
static bool published = false;
if (!published) {
published = true;
id(intercom).publish_entity_states();
}
font:
- file: "gfonts://Montserrat"
id: montserrat_28
size: 28
output:
- id: gpio_backlight_pwm
platform: ledc
pin: 23
- platform: gpio
id: speaker_enable
pin: GPIO11
light:
- id: backlight
name: Backlight
platform: monochromatic
output: gpio_backlight_pwm
restore_mode: ALWAYS_ON
binary_sensor:
- platform: status
name: Status
# =============================================================================
# GLOBALS
# =============================================================================
globals:
- id: init_in_progress
type: bool
restore_value: false
initial_value: "true"
- id: text_page_index
type: int
restore_value: false
initial_value: "0"
- id: text_pages
type: std::vector<std::vector<std::string>>
restore_value: false
- id: global_is_timer_active
type: bool
restore_value: false
- id: global_is_timer
type: bool
restore_value: false
# Ping-pong animation direction
- id: anim_direction
type: bool
restore_value: false
initial_value: "true"
# Geometry cache (precomputed for circular display)
- id: x_metrics
type: std::vector<int>
restore_value: false
- id: y_metrics
type: std::vector<int>
restore_value: false
- id: chord_widths_cache
type: std::vector<int>
restore_value: false
# Mode: 0=VA, 1=Intercom
- id: current_mode
type: int
restore_value: no
initial_value: "0"
# Previous mode (for restoring after incoming call)
- id: previous_mode
type: int
restore_value: no
initial_value: "0"
# Intercom peer name for display
- id: peer_name
type: std::string
restore_value: no
initial_value: '"Home Assistant"'
esp32:
board: esp32-p4-evboard
#cpu_frequency: 360MHz
flash_size: 16MB
framework:
type: esp-idf
sdkconfig_options:
CONFIG_ESP32S3_DEFAULT_CPU_FREQ_240: "y"
CONFIG_ESP32S3_DATA_CACHE_64KB: "y"
CONFIG_ESP32S3_DATA_CACHE_LINE_64B: "y"
CONFIG_FREERTOS_HZ: "1000"
CONFIG_ESP32_WIFI_TASK_PINNED_TO_CORE_1: "y"
CONFIG_LWIP_MAX_SOCKETS: "16"
# Use dynamic TLS buffers to reduce peak memory usage for SSL connections
CONFIG_MBEDTLS_DYNAMIC_BUFFER: "y"
CONFIG_MBEDTLS_DYNAMIC_FREE_PEER_CERT: "y"
CONFIG_MBEDTLS_DYNAMIC_FREE_CONFIG_DATA: "y"
# Smaller TLS buffers (default 16384 is too much with all components active)
CONFIG_MBEDTLS_SSL_IN_CONTENT_LEN: "8192"
CONFIG_MBEDTLS_SSL_OUT_CONTENT_LEN: "4096"
# ==============================================================================
# EXTERNAL COMPONENTS
# ==============================================================================
external_components:
- source:
type: local
path: esphome_components
components: [intercom_api, i2s_audio_duplex, esp_aec]
# ==============================================================================
# AEC (Acoustic Echo Cancellation)
# ==============================================================================
esp_aec:
id: aec_component
sample_rate: 16000
filter_length: 4 # 64ms echo tail (sufficient for integrated codec)
mode: voip_low_cost # Lightest mode, same quality as high_perf on ESP32-S3
i2s_audio_duplex:
id: i2s_duplex
i2s_lrclk_pin: GPIO10
i2s_bclk_pin: GPIO12
i2s_mclk_pin: GPIO13
i2s_din_pin: GPIO48
i2s_dout_pin: GPIO9
sample_rate: 16000
aec_id: aec_component
# ES8311 digital feedback: RX is stereo L=DAC(reference), R=ADC(mic).
# Sample-accurate reference alignment, no ring buffer delay needed.
# Requires ES8311 register 0x44 bits[6:4]=4 (configured in on_boot via I2C).
use_stereo_aec_reference: true
aec_reference_delay_ms: 10 # Minimal (sample-aligned via stereo feedback)
# ==============================================================================
# MICROPHONE (via duplex platform)
# ==============================================================================
microphone:
- platform: i2s_audio_duplex
id: mic_aec
i2s_audio_duplex_id: i2s_duplex
- platform: i2s_audio_duplex
id: mic_raw
i2s_audio_duplex_id: i2s_duplex
pre_aec: true
# =============================================================================
# SPEAKERS (mixer topology: VA + Intercom -> hw_speaker)
# =============================================================================
speaker:
- platform: i2s_audio_duplex
id: hw_speaker
i2s_audio_duplex_id: i2s_duplex
- platform: mixer
id: audio_mixer
output_speaker: hw_speaker
num_channels: 1
source_speakers:
- id: va_speaker
timeout: 10s
- id: intercom_speaker
timeout: 10s
# =============================================================================
# MEDIA PLAYER (VA TTS output through va_speaker)
# =============================================================================
media_player:
- platform: speaker
name: None
id: speaker_media_player
volume_min: 0.0
volume_max: 0.8
announcement_pipeline:
speaker: va_speaker
format: FLAC
sample_rate: 16000
num_channels: 1
files:
- id: timer_finished_sound
file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/timer_finished.flac
# ==============================================================================
# INTERCOM API (TCP-based, port 6054)
# ==============================================================================
# Auto-creates these sensors:
# - text_sensor: intercom_state (Idle/Ringing/Streaming)
# - text_sensor: destination (selected contact) [full mode only]
# - text_sensor: caller (who is calling) [full mode only]
# - text_sensor: contacts (count) [full mode only]
intercom_api:
id: intercom
mode: full
microphone: mic_aec
speaker: intercom_speaker
ringing_timeout: 30s
# === FSM event callbacks ===
on_incoming_call:
- logger.log: "Incoming call"
on_outgoing_call:
# Fire HA event when calling "Home Assistant" (for notifications/automations)
- if:
condition:
lambda: 'return id(intercom).get_current_destination() == "Home Assistant";'
then:
- homeassistant.event:
event: esphome.intercom_call
data:
caller: !lambda 'return App.get_friendly_name();'
destination: "Home Assistant"
type: "doorbell"
on_answered:
- logger.log: "Call answered"
on_streaming:
- lambda: |-
std::string caller = id(intercom).get_caller();
if (!caller.empty()) {
id(peer_name) = caller;
} else {
id(peer_name) = id(intercom).get_current_destination();
}
#- output.turn_on: speaker_enable
on_idle:
- lambda: 'id(peer_name) = id(intercom).get_current_destination();'
#- output.turn_off: speaker_enable
# Restore previous mode after call ends
on_hangup:
- logger.log:
format: "Hangup: %s"
args: ['reason.c_str()']
on_call_failed:
- logger.log:
format: "Call failed: %s"
args: ['reason.c_str()']
# ==============================================================================
# BUTTONS
# ==============================================================================
button:
# Smart Call button: idleācall, ringingāanswer, streamingāhangup
# The on_outgoing_call callback handles the HA event for doorbell notifications
- platform: template
id: call_button
name: "Call"
icon: "mdi:phone"
on_press:
- intercom_api.call_toggle:
id: intercom
# Next contact (full mode)
- platform: template
id: next_contact_button
name: "Next Contact"
icon: "mdi:arrow-right"
on_press:
- intercom_api.next_contact:
id: intercom
# Previous contact (full mode)
- platform: template
id: prev_contact_button
name: "Previous Contact"
icon: "mdi:arrow-left"
on_press:
- intercom_api.prev_contact:
id: intercom
# Decline incoming call
- platform: template
id: decline_button
name: "Decline"
icon: "mdi:phone-hangup"
on_press:
- intercom_api.decline_call:
id: intercom
- platform: template
id: refresh_contacts_button
name: "Refresh Contacts"
icon: "mdi:refresh"
entity_category: config
on_press:
- intercom_api.set_contacts:
id: intercom
contacts_csv: !lambda 'return id(ha_active_devices).state;'
- platform: restart
name: "Restart"
icon: "mdi:restart"
# =============================================================================
# SWITCHES AND SELECTS
# =============================================================================
switch:
- platform: template
name: Mute
id: mute
icon: "mdi:microphone-off"
optimistic: true
restore_mode: RESTORE_DEFAULT_OFF
entity_category: config
on_turn_off:
- microphone.unmute:
id: mic_aec
- microphone.unmute:
id: mic_raw
on_turn_on:
- microphone.mute:
id: mic_aec
- microphone.mute:
id: mic_raw
- platform: template
id: timer_ringing
optimistic: true
internal: true
restore_mode: ALWAYS_OFF
on_turn_off:
- lambda: |-
id(speaker_media_player)
->make_call()
.set_command(media_player::MediaPlayerCommand::MEDIA_PLAYER_COMMAND_REPEAT_OFF)
.set_announcement(true)
.perform();
id(speaker_media_player)->set_playlist_delay_ms(speaker::AudioPipelineType::ANNOUNCEMENT, 0);
- media_player.stop:
announcement: true
on_turn_on:
- lambda: |-
id(speaker_media_player)
->make_call()
.set_command(media_player::MediaPlayerCommand::MEDIA_PLAYER_COMMAND_REPEAT_ONE)
.set_announcement(true)
.perform();
id(speaker_media_player)->set_playlist_delay_ms(speaker::AudioPipelineType::ANNOUNCEMENT, 1000);
- media_player.speaker.play_on_device_media_file:
media_file: timer_finished_sound
announcement: true
- delay: 15min
- switch.turn_off: timer_ringing
# Intercom switches
- platform: intercom_api
intercom_api_id: intercom
auto_answer:
id: auto_answer_switch
name: "Auto Answer"
restore_mode: RESTORE_DEFAULT_OFF
- platform: i2s_audio_duplex
i2s_audio_duplex_id: i2s_duplex
aec:
id: aec_switch
name: "Echo Cancellation"
restore_mode: RESTORE_DEFAULT_ON
# =============================================================================
# NUMBERS
# =============================================================================
number:
- platform: intercom_api
intercom_api_id: intercom
mic_gain:
id: mic_gain
name: "Mic Gain"
- platform: template
id: speaker_volume
name: "Speaker Volume"
icon: "mdi:volume-high"
min_value: 0
max_value: 80
step: 5
initial_value: 80
optimistic: true
restore_value: true
unit_of_measurement: "%"
set_action:
- lambda: |-
float es8311_vol = 0.15 + (x / 100.0) * 0.60;
id(es8311_dac).set_volume(es8311_vol);
id(i2s_duplex).set_aec_reference_volume(es8311_vol);
- platform: template
name: Screen timeout
optimistic: true
id: display_timeout
unit_of_measurement: "m"
initial_value: 5 #minutes
restore_value: true
min_value: 0 #0 is no timeout
max_value: 99
step: 1
mode: box
# ==============================================================================
# TEXT SENSORS
# ==============================================================================
text_sensor:
# Subscribe to HA's centralized contacts sensor
- platform: homeassistant
id: ha_active_devices
entity_id: sensor.intercom_active_devices
on_value:
- intercom_api.set_contacts:
id: intercom
contacts_csv: !lambda 'return x;'
- platform: wifi_info
ip_address:
name: IP Address
entity_category: diagnostic
ssid:
name: Connected SSID
entity_category: diagnostic
mac_address:
name: Mac Address
entity_category: diagnostic
# ==============================================================================
# DIAGNOSTICS
# ==============================================================================
sensor:
- platform: uptime
name: "Uptime"
update_interval: 60s
- platform: internal_temperature
name: "CPU Temperature"
update_interval: 60s
- id: wifi_signal_db
name: WiFi Signal
platform: wifi_signal
update_interval: 60s
entity_category: diagnostic
- id: wifi_signal_strength
name: WiFi Strength
platform: copy
source_id: wifi_signal_db
filters:
- lambda: return min(max(2 * (x + 100.0), 0.0), 100.0);
unit_of_measurement: "%"
entity_category: diagnostic
display:
- platform: mipi_dsi
id: device_display
model: JC4880P443
byte_order: little_endian
rotation: 90
lambda: |-
it.fill(Color::BLACK);
it.print(340, 100, id(montserrat_28), Color(0,255,0), TextAlign::LEFT, "Hello World1");
it.print(340, 200, id(montserrat_28), Color::WHITE, TextAlign::LEFT, "Hello World2");
it.print(340, 300, id(montserrat_28), Color(255,0,0), TextAlign::LEFT, "Hello World3");
touchscreen:
platform: gt911
i2c_id: i2c_bus
id: device_touchscreen
reset_pin: GPIO3
update_interval: 100ms
transform: #This is for 90 degree display rotation
swap_xy: true
mirror_x: false
mirror_y: true
on_update:
then:
- lambda: |-
if (touches.size() > 0) {
auto touch = touches[0];
ESP_LOGI("TOUCH", "X=%d Y=%d", touch.x, touch.y);
}
esp_ldo:
- channel: 3
voltage: 2.5V
psram:
mode: hex
speed: 200MHz
preferences:
flash_write_interval: 5min
esp32_hosted:
variant: ESP32C6
reset_pin: GPIO54
cmd_pin: GPIO19
clk_pin: GPIO18
d0_pin: GPIO14
d1_pin: GPIO15
d2_pin: GPIO16
d3_pin: GPIO17
active_high: true
i2c:
id: i2c_bus
sda: 7
scl: 8
scan: false
frequency: 400kHz
# =============================================================================
# AUDIO CODEC (ES8311)
# =============================================================================
audio_dac:
- platform: es8311
id: es8311_dac
bits_per_sample: 16bit
sample_rate: 16000
mic_gain: 24DB