I’ve also updated my python script. The YAML for the sensors remains the same:
import json
import urllib.request
import paho.mqtt.client as mqtt
from datetime import datetime
mqtt_host = "10.1.1.4"
mqtt_port = 1883
mqtt_user = 'homeassistant'
mqtt_pass = 'Pub1i5h.'
#webrequest = urllib.request.urlopen("https://www.canada.ca/en/public-health/services/diseases/2019-novel-coronavirus-infection.html")
webrequest = urllib.request.urlopen("https://health-infobase.canada.ca/src/data/covidLive/covid19.csv")
csv_lines = webrequest.read().decode("utf8").splitlines()
webrequest.close()
data = dict()
# The CSV contains comma-separated lines for every stat update since the government started
# posting statistics updates. This loop creates a dictionary that only keeps the latest
# updates. Somebody may want the history for something though...
#
# Also re-format the date, since it is not directly comparable as dd-mm-yy.
#
# Skip the first line, since that is just the labels
for line in csv_lines[1:]:
# ID,EnglishName,FrenchName,Date,Confirmed,Presumptive,Deaths,Total,NumToday,PercentToday,NumTested
parts = line.split(",")
updated = {
'id': int(parts[0]),
'name': parts[1],
'date': datetime.strptime(parts[3], '%d-%m-%Y').strftime('%Y-%m-%d'),
'confirmed': int(parts[4]),
'presumptive': int(parts[5]),
'deaths': int(parts[6]),
'current': int(parts[7])
}
id = updated['id']
if (id not in data.keys()) or (data[id]['date'] <= updated['date']):
data[id] = updated
# Re-key the dictionary on the province names to make the JSON for the sensor
# look a little nicer.
provinceData = dict()
for key in data.keys():
datum = data[key]
provinceData[datum['name'].lower().replace(' ', '_')] = datum
print(datum)
# Dump the dictionary out to JSON and publish it to MQTT
jsonData = json.dumps(provinceData)
client = mqtt.Client()
client.username_pw_set(mqtt_user, mqtt_pass)
client.connect(mqtt_host, mqtt_port)
client.publish("coronavirus/canada_ca", jsonData)
import json
from requests import get
from datetime import datetime
response = get("https://health-infobase.canada.ca/src/data/covidLive/covid19.csv")
csv_lines = response.text.splitlines()
response.close()
data = dict()
# Create a dictionary containing just the latest updates
# Skip the first line in csv_lines which contains headings
for line in csv_lines[1:]:
# ID,EnglishName,FrenchName,Date,Confirmed,Presumptive,Deaths,Total,NumToday,PercentToday,NumTested
parts = line.split(",")
updated = {
'id': int(parts[0]),
'name': parts[1],
'date': datetime.strptime(parts[3], '%d-%m-%Y').strftime('%Y-%m-%d'),
'confirmed': int(parts[4]),
'presumptive': int(parts[5]),
'deaths': int(parts[6]),
'current': int(parts[7])
}
id = updated['id']
if (id not in data.keys()) or (data[id]['date'] <= updated['date']):
data[id] = updated
# Re-key the dictionary on the province names
provinceData = dict()
for key in data.keys():
datum = data[key]
provinceData[datum['name'].lower().replace(' ', '_')] = datum
print(json.dumps(provinceData))
Here’s the resulting sensor:
NOTE
A limitation of Command Line Sensor is its json_attributes option. An MQTT Sensor offers json_attributes_template which is more flexible when it comes to extracting attributes.
Some of the “numbers” in this CSV file have started containing commas. I’ve adjusted the pertinent part of the script to strip the quotes and remove the commas:
For anyone in Nova Scotia, the provincial government has a page with the positive vs. negative test numbers that updates faster than the Canadian government one. I’ve created a similar script to parse the numbers from that one as well. It doesn’t contain the same fields, but it’s still useful.
import json
from requests import get
from datetime import datetime
import xml.etree.ElementTree as ET
def string_to_int(str):
return 0 if (str == "") else int(str.strip('"').replace(",", ""))
response = get("https://novascotia.ca/coronavirus/data/COVID-19-data.csv", verify=False)
data_lines = response.text.splitlines()
response.close()
data = dict()
data["total"] = 0
data["deaths"] = 0
for row in data_lines[2:]:
col = row.split(",")
data["date"] = col[0]
data["total"] += string_to_int(col[1])
data["new"] = string_to_int(col[1])
data["recovered"] = string_to_int(col[3])
data["hospitalized"] = string_to_int(col[4])
data["deaths"] += string_to_int(col[6])
data["new_deaths"] = string_to_int(col[6])
data["current"] = data["total"] - data["recovered"] - data["deaths"]
json_data = json.dumps(data)
print(json_data)