Home Assistant hangs with excessive memory usage

Running HomeAssistant on 16G of RAM, recently I have seen occasional hangs where the system stops responding for several minutes.

Using ssh to access the supervisor I can run top (and sort by memory: ‘Shift-M’). I get the above screenshot, showing that the python3 process is gobbling all the memory.

Running

ha host reboot

fails with:

Post "http://supervisor/host/reboot": context deadline exceeded (Client.Timeout exceeded while awaiting headers)

Sometimes it recovers in time, otherwise, I have to run a reboot command to recover; or power cycle the PC.

The logs don’t give any particular insight into the cause as nothing gets written was the process memory spikes. This has been happening with increasing frequency so it now occurs every day or two.

I followed these Instructions to install Py-spy on HAOS, the results on the dump were:

Python v3.11.6 (/usr/local/bin/python3.11)

Thread 66 (active+gil): "MainThread"
    connect (aiohttp/connector.py:520)
    _request (aiohttp/client.py:574)
    __aenter__ (aiohttp/client.py:1187)
    create_request (aiohue/v2/__init__.py:211)
    request (aiohue/v2/__init__.py:167)
    update (aiohue/v2/controllers/base.py:153)
    set_state (aiohue/v2/controllers/lights.py:97)
    async_request_call (homeassistant/components/hue/bridge.py:123)
    async_turn_on (homeassistant/components/hue/v2/light.py:246)
    async_handle_light_on_service (homeassistant/components/light/__init__.py:624)
    _run (asyncio/events.py:80)
    _run_once (asyncio/base_events.py:1925)
    run_forever (asyncio/base_events.py:607)
    run_until_complete (asyncio/base_events.py:640)
    run (homeassistant/runner.py:188)
    main (homeassistant/__main__.py:209)
    <module> (homeassistant/__main__.py:221)
    _run_code (<frozen runpy>:88)
    _run_module_as_main (<frozen runpy>:198)
Thread 87 (idle): "Thread-1 (_monitor)"
    dequeue (logging/handlers.py:1528)
    _monitor (logging/handlers.py:1579)
    run (threading.py:982)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 89 (idle): "SyncWorker_0"
    _worker (concurrent/futures/thread.py:81)
    run (threading.py:982)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 91 (idle): "SyncWorker_1"
    _worker (concurrent/futures/thread.py:81)
    run (threading.py:982)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 93 (idle): "SyncWorker_2"
    _worker (concurrent/futures/thread.py:81)
    run (threading.py:982)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 94 (idle): "SyncWorker_3"
    _worker (concurrent/futures/thread.py:81)
    run (threading.py:982)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 97 (idle): "SyncWorker_4"
    _worker (concurrent/futures/thread.py:81)
    run (threading.py:982)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 98 (idle): "SyncWorker_5"
    _worker (concurrent/futures/thread.py:81)
    run (threading.py:982)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 101 (idle): "SyncWorker_6"
    _worker (concurrent/futures/thread.py:81)
    run (threading.py:982)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 102 (idle): "SyncWorker_7"
    _worker (concurrent/futures/thread.py:81)
    run (threading.py:982)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 103 (idle): "SyncWorker_8"
    _worker (concurrent/futures/thread.py:81)
    run (threading.py:982)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 104 (idle): "SyncWorker_9"
    _worker (concurrent/futures/thread.py:81)
    run (threading.py:982)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 105 (idle): "Recorder"
    fetchall_for_returning (sqlalchemy/engine/default.py:1726)
    _deliver_insertmanyvalues_batches (sqlalchemy/engine/default.py:798)
    _exec_insertmany_context (sqlalchemy/engine/base.py:2030)
    _execute_context (sqlalchemy/engine/base.py:1843)
    _execute_clauseelement (sqlalchemy/engine/base.py:1639)
    _execute_on_connection (sqlalchemy/sql/elements.py:516)
    execute (sqlalchemy/engine/base.py:1416)
    _emit_insert_statements (sqlalchemy/orm/persistence.py:1136)
    save_obj (sqlalchemy/orm/persistence.py:93)
    execute (sqlalchemy/orm/unitofwork.py:642)
    execute_aggregate (sqlalchemy/orm/unitofwork.py:567)
    execute (sqlalchemy/orm/unitofwork.py:463)
    _flush (sqlalchemy/orm/session.py:4408)
    flush (sqlalchemy/orm/session.py:4312)
    _prepare_impl (sqlalchemy/orm/session.py:1231)
    _go (sqlalchemy/orm/state_changes.py:139)
    _prepare_impl (<string>:2)
    commit (sqlalchemy/orm/session.py:1256)
    _go (sqlalchemy/orm/state_changes.py:139)
    commit (<string>:2)
    commit (sqlalchemy/orm/session.py:1969)
    _commit_event_session (homeassistant/components/recorder/core.py:1183)
    _commit_event_session_or_retry (homeassistant/components/recorder/core.py:1163)
    run (homeassistant/components/recorder/tasks.py:305)
    _process_one_task_or_recover (homeassistant/components/recorder/core.py:912)
    _guarded_process_one_task_or_recover (homeassistant/components/recorder/core.py:901)
    _run_event_loop (homeassistant/components/recorder/core.py:873)
    _run (homeassistant/components/recorder/core.py:779)
    run (homeassistant/components/recorder/core.py:699)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 115 (idle): "Thread-2 (_thread_main)"
    _loop (paho/mqtt/client.py:1150)
    loop_forever (paho/mqtt/client.py:1756)
    _thread_main (paho/mqtt/client.py:3591)
    run (threading.py:982)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 116 (idle): "SyncWorker_10"
    _worker (concurrent/futures/thread.py:81)
    run (threading.py:982)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 117 (idle): "SyncWorker_11"
    _worker (concurrent/futures/thread.py:81)
    run (threading.py:982)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 118 (idle): "SyncWorker_12"
    _worker (concurrent/futures/thread.py:81)
    run (threading.py:982)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 119 (idle): "SyncWorker_13"
    _worker (concurrent/futures/thread.py:81)
    run (threading.py:982)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 122 (idle): "zeroconf-ServiceBrowser-_googlecast._tcp-122"
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 123 (idle): "Thread-3"
    wait (threading.py:331)
    wait (threading.py:629)
    run (pychromecast/discovery.py:304)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 124 (idle): "Thread-5"
    run_once (pychromecast/socket_client.py:579)
    run (pychromecast/socket_client.py:540)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 125 (idle): "Thread-6"
    run_once (pychromecast/socket_client.py:579)
    run (pychromecast/socket_client.py:540)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 126 (idle): "Thread-7"
    run_once (pychromecast/socket_client.py:579)
    run (pychromecast/socket_client.py:540)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 127 (idle): "Thread-8"
    run_once (pychromecast/socket_client.py:579)
    run (pychromecast/socket_client.py:540)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 128 (idle): "Thread-9"
    run_once (pychromecast/socket_client.py:579)
    run (pychromecast/socket_client.py:540)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 129 (idle): "Thread-10"
    run_once (pychromecast/socket_client.py:579)
    run (pychromecast/socket_client.py:540)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 144 (idle): "Thread-CallbackRequestDispatcher"
    wait (threading.py:327)
    get (queue.py:171)
    _get_many (pubsub_v1/subscriber/_protocol/helper_threads.py:56)
    __call__ (pubsub_v1/subscriber/_protocol/helper_threads.py:103)
    run (threading.py:982)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 145 (idle): "Thread-ConsumeBidirectionalStream"
    wait (threading.py:331)
    _wait_once (grpc/_common.py:116)
    wait (grpc/_common.py:156)
    _next (grpc/_channel.py:958)
    __next__ (grpc/_channel.py:541)
    __next__ (api_core/grpc_helpers.py:119)
    _recv (api_core/bidi.py:566)
    _recoverable (api_core/bidi.py:512)
    recv (api_core/bidi.py:569)
    _thread_main (api_core/bidi.py:663)
    run (threading.py:982)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 146 (idle): "Thread-LeaseMaintainer"
    wait (threading.py:331)
    wait (threading.py:629)
    maintain_leases (pubsub_v1/subscriber/_protocol/leaser.py:234)
    run (threading.py:982)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 148 (idle): "Thread-Heartbeater"
    wait (threading.py:331)
    wait (threading.py:629)
    heartbeat (pubsub_v1/subscriber/_protocol/heartbeater.py:49)
    run (threading.py:982)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 149 (idle): "Thread-13 (_run)"
    channel_spin (grpc/_channel.py:1656)
    run (threading.py:982)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 152 (idle): "DbWorker_0"
    _worker (concurrent/futures/thread.py:81)
    _worker_with_shutdown_hook (homeassistant/components/recorder/executor.py:17)
    run (threading.py:982)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 153 (idle): "DbWorker_1"
    _worker (concurrent/futures/thread.py:81)
    _worker_with_shutdown_hook (homeassistant/components/recorder/executor.py:17)
    run (threading.py:982)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 154 (idle): "DHCPWatcher"
    select (scapy/supersocket.py:264)
    _run (scapy/sendrecv.py:1219)
    run (threading.py:982)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)
Thread 268 (idle): "Thread-111 (_run)"
    wait (threading.py:331)
    get (queue.py:180)
    __iter__ (api_core/bidi.py:107)
    consume_request_iterator (grpc/_channel.py:274)
    run (threading.py:982)
    _bootstrap_inner (threading.py:1045)
    _bootstrap (threading.py:1002)

Here are the svgs:
Rate 100
Rate 200

This seems to suggest that adaptive lighting or the Hue Bridge is to blame.

Removing the lights that were controlled via the hue bridge from adaptive lighting seems to have stopped the issue for now, will raise with the author.