Jinja templating: mean/average of every n values in a list

simenopsahl · May 9, 2024, 11:10pm

Hi, I am looking to reduce the number of entries in a list.
Currently I have a pollen forecast for the next 48 hours in a list format. I extracted the values I need and changed the epoch timestamp to a more readable local timestamp. However I don’t really need the values for every hour, every 2nd, 3rd, 4th or 6th would do fine. But I would like to not completely discard the unused entries.
This is how the list looks after my modifications:

{% set tl = value_json|map(attribute='time')|list %} # time list
{% set cgl = value_json|map(attribute='grass')|list %} # count grass particles list
{% set ctl = value_json|map(attribute='tree')|list %} # count tree particles list
{% set cwl = value_json|map(attribute='weed')|list %} # count weed particles list
{% set ns = namespace(forecast=[]) %}
{% for i in range(tl|length) %}
  {% set ns.forecast = ns.forecast + [{'time': (tl[i] | timestamp_local(0) ), 'grass': cgl[i], 'tree': ctl[i], 'weed': cwl[i]}] %}
{% endfor %}
{{ ns.forecast }}

[
  {
    "time": "2024-05-09T22:00:00+02:00",
    "grass": 37,
    "tree": 161,
    "weed": 6
  },
  {
    "time": "2024-05-09T23:00:00+02:00",
    "grass": 28,
    "tree": 163,
    "weed": 12
  },
  {
    "time": "2024-05-10T00:00:00+02:00",
    "grass": 32,
    "tree": 155,
    "weed": 17
  },
  {
    "time": "2024-05-10T01:00:00+02:00",
    "grass": 35,
    "tree": 177,
    "weed": 17
  },
  {
    "time": "2024-05-10T02:00:00+02:00",
    "grass": 19,
    "tree": 92,
    "weed": 10
  },
  {
    "time": "2024-05-10T03:00:00+02:00",
    "grass": 63,
    "tree": 316,
    "weed": 31
  },
  {
    "time": "2024-05-10T04:00:00+02:00",
    "grass": 25,
    "tree": 121,
    "weed": 13
  },
  {
    "time": "2024-05-10T05:00:00+02:00",
    "grass": 34,
    "tree": 194,
    "weed": 14
  },
  {
    "time": "2024-05-10T06:00:00+02:00",
    "grass": 31,
    "tree": 148,
    "weed": 0
  },
  {
    "time": "2024-05-10T07:00:00+02:00",
    "grass": 58,
    "tree": 251,
    "weed": 10
  },
  {
    "time": "2024-05-10T08:00:00+02:00",
    "grass": 41,
    "tree": 347,
    "weed": 2
  },
  {
    "time": "2024-05-10T09:00:00+02:00",
    "grass": 60,
    "tree": 284,
    "weed": 32
  },
  {
    "time": "2024-05-10T10:00:00+02:00",
    "grass": 15,
    "tree": 79,
    "weed": 7
  },
  {
    "time": "2024-05-10T11:00:00+02:00",
    "grass": 21,
    "tree": 100,
    "weed": 11
  },
  {
    "time": "2024-05-10T12:00:00+02:00",
    "grass": 32,
    "tree": 182,
    "weed": 13
  },
  {
    "time": "2024-05-10T13:00:00+02:00",
    "grass": 29,
    "tree": 133,
    "weed": 0
  },
  {
    "time": "2024-05-10T14:00:00+02:00",
    "grass": 45,
    "tree": 196,
    "weed": 8
  },
  {
    "time": "2024-05-10T15:00:00+02:00",
    "grass": 24,
    "tree": 152,
    "weed": 0
  },
  {
    "time": "2024-05-10T16:00:00+02:00",
    "grass": 53,
    "tree": 252,
    "weed": 28
  },
  {
    "time": "2024-05-10T17:00:00+02:00",
    "grass": 29,
    "tree": 147,
    "weed": 14
  },
  {
    "time": "2024-05-10T18:00:00+02:00",
    "grass": 28,
    "tree": 133,
    "weed": 15
  },
  {
    "time": "2024-05-10T19:00:00+02:00",
    "grass": 17,
    "tree": 100,
    "weed": 7
  },
  {
    "time": "2024-05-10T20:00:00+02:00",
    "grass": 23,
    "tree": 99,
    "weed": 4
  },
  {
    "time": "2024-05-10T21:00:00+02:00",
    "grass": 30,
    "tree": 131,
    "weed": 5
  },
  {
    "time": "2024-05-10T22:00:00+02:00",
    "grass": 24,
    "tree": 149,
    "weed": 0
  },
  {
    "time": "2024-05-10T23:00:00+02:00",
    "grass": 23,
    "tree": 112,
    "weed": 12
  },
  {
    "time": "2024-05-11T00:00:00+02:00",
    "grass": 32,
    "tree": 160,
    "weed": 16
  },
  {
    "time": "2024-05-11T01:00:00+02:00",
    "grass": 25,
    "tree": 118,
    "weed": 13
  },
  {
    "time": "2024-05-11T02:00:00+02:00",
    "grass": 23,
    "tree": 118,
    "weed": 11
  },
  {
    "time": "2024-05-11T03:00:00+02:00",
    "grass": 27,
    "tree": 129,
    "weed": 14
  },
  {
    "time": "2024-05-11T04:00:00+02:00",
    "grass": 31,
    "tree": 178,
    "weed": 13
  },
  {
    "time": "2024-05-11T05:00:00+02:00",
    "grass": 31,
    "tree": 151,
    "weed": 0
  },
  {
    "time": "2024-05-11T06:00:00+02:00",
    "grass": 59,
    "tree": 255,
    "weed": 10
  },
  {
    "time": "2024-05-11T07:00:00+02:00",
    "grass": 47,
    "tree": 449,
    "weed": 2
  },
  {
    "time": "2024-05-11T08:00:00+02:00",
    "grass": 76,
    "tree": 361,
    "weed": 40
  },
  {
    "time": "2024-05-11T09:00:00+02:00",
    "grass": 101,
    "tree": 505,
    "weed": 50
  },
  {
    "time": "2024-05-11T10:00:00+02:00",
    "grass": 77,
    "tree": 762,
    "weed": 14
  },
  {
    "time": "2024-05-11T11:00:00+02:00",
    "grass": 71,
    "tree": 402,
    "weed": 29
  },
  {
    "time": "2024-05-11T12:00:00+02:00",
    "grass": 100,
    "tree": 1015,
    "weed": 2
  },
  {
    "time": "2024-05-11T13:00:00+02:00",
    "grass": 118,
    "tree": 508,
    "weed": 20
  },
  {
    "time": "2024-05-11T14:00:00+02:00",
    "grass": 82,
    "tree": 1169,
    "weed": 9
  },
  {
    "time": "2024-05-11T15:00:00+02:00",
    "grass": 28,
    "tree": 134,
    "weed": 15
  },
  {
    "time": "2024-05-11T16:00:00+02:00",
    "grass": 30,
    "tree": 154,
    "weed": 15
  },
  {
    "time": "2024-05-11T17:00:00+02:00",
    "grass": 57,
    "tree": 449,
    "weed": 7
  },
  {
    "time": "2024-05-11T18:00:00+02:00",
    "grass": 54,
    "tree": 307,
    "weed": 22
  },
  {
    "time": "2024-05-11T19:00:00+02:00",
    "grass": 34,
    "tree": 171,
    "weed": 0
  },
  {
    "time": "2024-05-11T20:00:00+02:00",
    "grass": 51,
    "tree": 221,
    "weed": 9
  },
  {
    "time": "2024-05-11T21:00:00+02:00",
    "grass": 46,
    "tree": 435,
    "weed": 2
  }
]

Ideally I would reduce the list to something like:

[
  {
    "time": "2024-05-09T22:00:00+02:00", # starting timestamp
    "grass": 33, # median int of next 8 entries
    "tree": 162, # median int of next 8 entries
    "weed": 12 # median int of next 8 entries
  },
  {
    "time": "2024-05-10T06:00:00+02:00", # 8 or whatever hours later
    "grass": xx, # median int of next 8 entries
    "tree": xxx, # median int of next 8 entries
    "weed": xx # median int of next 8 entries
  },
  {
    "time": "2024-05-10T14:00:00+02:00", # 8 or whatever hours later
    "grass": xx, # median int of next 8 entries
    "tree": xxx, # median int of next 8 entries
    "weed": xx # median int of next 8 entries
  },
  {
    "time": "2024-05-11T00:00:00+02:00", # 8 or whatever hours later
    "grass": xx, # median int of next 8 entries
    "tree": xxx, # median int of next 8 entries
    "weed": xx # median int of next 8 entries
  }
]

But not necessarily ever 8th hour, probably every 3rd would make more sense.
Can any of you wizards help me achieve this?
Heres a snippet of the starting json I had to work with:

{
    "message": "success",
    "lat": redacted,
    "lng": redacted,
    "data": [
        {
            "Count": {
                "grass_pollen": 59,
                "tree_pollen": 254,
                "weed_pollen": 10
            },
            "Risk": {
                "grass_pollen": "Moderate",
                "tree_pollen": "High",
                "weed_pollen": "Low"
            },
            "Species": {
                "Grass": {
                    "Grass / Poaceae": 59
                },
                "Others": 2,
                "Tree": {
                    "Alder": 2,
                    "Birch": 71,
                    "Cypress": 5,
                    "Elm": 0,
                    "Hazel": 0,
                    "Oak": 81,
                    "Pine": 66,
                    "Plane": 27,
                    "Poplar / Cottonwood": 2
                },
                "Weed": {
                    "Chenopod": 0,
                    "Mugwort": 0,
                    "Nettle": 8,
                    "Ragweed": 0
                }
            },
            "time": 1683799200,
            "updatedAt": "2023-05-11 09:00:00"
        },
    ]
}

It took me many hours trying to extract what I needed from that (time and counts)

simenopsahl · May 10, 2024, 8:41pm

Because I already have each value in its own list prior to constructing my final list I am guessing I could go through those lists first doing something like this:

{% set value_json = state_attr('sensor.cl_tadata_pollen_forecast', 'series') %}
{% set tl = value_json|map(attribute='time')|list %}
{% set cgl = value_json|map(attribute='grass')|list %}
{% set ctl = value_json|map(attribute='tree')|list %}
{% set cwl = value_json|map(attribute='weed')|list %}
{% set interval = 6 %} # something divisible by 48
{{ median(cgl[:interval]) }} # this gets me the median of the first x

But I still need help with writing the loop logic to construct the intermediary list of values. The list of timestamps should be easier, because it just extracts every x value

{% set value_json = state_attr('sensor.cl_tadata_pollen_forecast', 'series') %}
{% set tl = value_json|map(attribute='time')|list %}
{% set cgl = value_json|map(attribute='grass')|list %}
{% set ctl = value_json|map(attribute='tree')|list %}
{% set cwl = value_json|map(attribute='weed')|list %}
{% set interval = 6 %}
{% set newl = ((tl|length) / interval) | int %} # length of new list
{% set ns = namespace(newcgl=[]) %} # empty new grass count list
{% for i in range(newl) %}
  # this goes 0 to 6 but should really only do so for the first iteration
  {% set ns.newcgl = ns.newcgl + [{'grass': (median(cgl[:0-interval]))}] %}
  # some statement to increment the interval
  # i tried incorporating i, but 0-6, 1-7 etc would still be wrong
{% endfor %}
{{ ns.newcgl }}

As you can see I have some understanding, but not enough to make it on my own

Troon · May 10, 2024, 8:52pm

You can take every third item from a list like this:

{{ [1, 2, 3, 4, 5, 6][::3] }}

# returns [1, 4]

simenopsahl · May 10, 2024, 8:54pm

Thank you, that gets me the timestamps

{% set value_json = state_attr('sensor.cl_tadata_pollen_forecast', 'series') %}
{% set tl = value_json|map(attribute='time')|list %}
{% set cgl = value_json|map(attribute='grass')|list %}
{% set ctl = value_json|map(attribute='tree')|list %}
{% set cwl = value_json|map(attribute='weed')|list %}
{% set interval = 6 %}
{% set ntl = tl[::interval] %}

simenopsahl · May 10, 2024, 11:06pm

I think I have it!

{% set value_json = state_attr('sensor.cl_tadata_pollen_forecast', 'series') %}
{% set tl = value_json|map(attribute='time')|list %}
{% set cgl = value_json|map(attribute='grass')|list %}
{% set ctl = value_json|map(attribute='tree')|list %}
{% set cwl = value_json|map(attribute='weed')|list %}
# i made a dummy list to check that I got the correct values
{% set test = [1, 1, 1, 1, 1, 1,
               2, 2, 2, 2, 2, 2,
               3, 3, 3, 3, 3, 3,
               4, 4, 4, 4, 4, 4,
               5, 5, 5, 5, 5, 5,
               6, 6, 6, 6, 6, 6,
               7, 7, 7, 7, 7, 7,
               8, 8, 8, 8, 8, 8] %}
{% set ns = namespace() %}
{% set interval = 6 %} # the interval to go through the list
{% set ns.end = interval %} # the first end to go to
{% set ntl = tl[::ns.end] %} # new list of timestamps
{{ ntl }}

returns

[1715371200, 1715392800, 1715414400, 1715436000, 1715457600, 1715479200, 1715500800, 1715522400]

To calculate the values

{% set ns.ncgl=[] %} # new grass pollen counts
{% for i in range(ntl|length) %} # for length of timestamp list
                                                  # [(6-6):6]
  {% set ns.ncgl = ns.ncgl + [{'grass': (median(test[(ns.end-interval):ns.end], 0) | int)}] %}
  {% set ns.end = ns.end + interval %} # set new endpoint to +6
{% endfor %}
{{ ns.ncgl }}

returns

[{'grass': 1}, {'grass': 2}, {'grass': 3}, {'grass': 4}, {'grass': 5}, {'grass': 6}, {'grass': 7}, {'grass': 8}]

Now to do this for the actual values and the rest of the counts and combine it into one list

{% set value_json = state_attr('sensor.cl_tadata_pollen_forecast', 'series') %}
{% set tl = value_json|map(attribute='time')|list %}
{% set cgl = value_json|map(attribute='grass')|list %}
{% set ctl = value_json|map(attribute='tree')|list %}
{% set cwl = value_json|map(attribute='weed')|list %}
{% set ns = namespace() %}
{% set interval = 6 %}
{% set ns.end = interval %}
{% set ntl = tl[::ns.end] %}
{% set ns.ncgl=[] %}
{% set ns.nctl=[] %}
{% set ns.ncwl=[] %}
{% for i in range(ntl|length) %}
  {% set ns.ncgl = ns.ncgl + [{'grass': (median(cgl[(ns.end-interval):ns.end], 0) | int)}] %}
  {% set ns.nctl = ns.nctl + [{'tree': (median(ctl[(ns.end-interval):ns.end], 0) | int)}] %}
  {% set ns.ncwl = ns.ncwl + [{'weed': (median(cwl[(ns.end-interval):ns.end], 0) | int)}] %}
  {% set ns.end = ns.end + interval %}
{% endfor %}
{{ ns.ncgl }}
{{ ns.nctl }}
{{ ns.ncwl }}

returns

[{'grass': 27}, {'grass': 31}, {'grass': 41}, {'grass': 30}, {'grass': 48}, {'grass': 37}, {'grass': 34}, {'grass': 33}]
[{'tree': 165}, {'tree': 155}, {'tree': 247}, {'tree': 157}, {'tree': 249}, {'tree': 184}, {'tree': 170}, {'tree': 159}]
[{'weed': 7}, {'weed': 6}, {'weed': 6}, {'weed': 6}, {'weed': 16}, {'weed': 8}, {'weed': 8}, {'weed': 11}]

Now for the complete code

{% set value_json = state_attr('sensor.cl_tadata_pollen_forecast', 'series') %}
{% set tl = value_json|map(attribute='time')|list %}
{% set cgl = value_json|map(attribute='grass')|list %}
{% set ctl = value_json|map(attribute='tree')|list %}
{% set cwl = value_json|map(attribute='weed')|list %}
{% set ns = namespace() %}
{% set interval = 6 %}
{% set ns.end = interval %}
{% set ntl = tl[::ns.end] %}
{% set ns.ncgl=[] %}
{% set ns.nctl=[] %}
{% set ns.ncwl=[] %}
{% for i in range(ntl|length) %}
  {% set ns.ncgl = ns.ncgl + [(median(cgl[(ns.end-interval):ns.end], 0) | int)] %}
  {% set ns.nctl = ns.nctl + [(median(ctl[(ns.end-interval):ns.end], 0) | int)] %}
  {% set ns.ncwl = ns.ncwl + [(median(cwl[(ns.end-interval):ns.end], 0) | int)] %}
  {% set ns.end = ns.end + interval %}
{% endfor %}
{% set ns.forecast=[] %}
{% for i in range(ntl|length) %}
  {% set ns.forecast = ns.forecast + [{'time': (ntl[i] | timestamp_local(0) ), 'grass': ns.ncgl[i], 'tree': ns.nctl[i], 'weed': ns.ncwl[i]}] %}
{% endfor %}
{{ ns.forecast }}

returns

[
  {
    "time": "2024-05-10T22:00:00+02:00",
    "grass": 27,
    "tree": 165,
    "weed": 7
  },
  {
    "time": "2024-05-11T04:00:00+02:00",
    "grass": 31,
    "tree": 155,
    "weed": 6
  },
  {
    "time": "2024-05-11T10:00:00+02:00",
    "grass": 41,
    "tree": 247,
    "weed": 6
  },
  {
    "time": "2024-05-11T16:00:00+02:00",
    "grass": 30,
    "tree": 157,
    "weed": 6
  },
  {
    "time": "2024-05-11T22:00:00+02:00",
    "grass": 48,
    "tree": 249,
    "weed": 16
  },
  {
    "time": "2024-05-12T04:00:00+02:00",
    "grass": 37,
    "tree": 184,
    "weed": 8
  },
  {
    "time": "2024-05-12T10:00:00+02:00",
    "grass": 34,
    "tree": 170,
    "weed": 8
  },
  {
    "time": "2024-05-12T16:00:00+02:00",
    "grass": 33,
    "tree": 159,
    "weed": 11
  }
]

I would still welcome help to improve this. There is probably a better more efficient way of doing it, especially without creating all those intermediary lists.