Show Code
#!/usr/bin/python
# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
from urllib.request import urlopen, Request
import paho.mqtt.client as mqtt
import time
from threading import Thread
from datetime import datetime
import json
import secrets
MQTT_Host = secrets.MQTT_Host
MQTT_Port = secrets.MQTT_Port
MQTT_User = secrets.MQTT_User
MQTT_Password = secrets.MQTT_Password
client = mqtt.Client("HA_Scraper") # must be unique on MQTT network
client.username_pw_set(str(MQTT_User),str(MQTT_Password))
client.connect(MQTT_Host, port=MQTT_Port, keepalive=60)
client.loop_start()
headers = {'User-Agent': 'Mozilla/5.0'}
class Radio:
# Initializer / Instance Attributes
def __init__(self, name, url, pic, pic_sel, pic_att, artist, artist_sel, artist_att, track, track_sel, track_att):
self.name = name
self.url = url
self.pic = pic
self.pic_sel = pic_sel
self.pic_att = pic_att
self.artist = artist
self.artist_sel = artist_sel
self.artist_att = artist_att
self.track = track
self.track_sel = track_sel
self.track_att = track_att
class Podcast:
# Initializer / Instance Attributes
def __init__(self, name, url, track, track_sel, track_att, mp3_url, mp3_sel, mp3_att):
self.name = name
self.url = url
self.track = track
self.track_sel = track_sel
self.track_att = track_att
self.mp3_url = mp3_url
self.mp3_sel = mp3_sel
self.mp3_att = mp3_att
RTL2 = Radio("RTL2", "https://www.6play.fr/rtl2/quel-est-ce-titre", "", ".ecfper-2", "src", "", ".ecfper-6", "", "", ".ecfper-5", "") #https://timeline.rtl.fr/RTL2/songs
Absolute_Radio_CR = Radio("Absolute_Radio_CR", "https://planetradio.co.uk/absolute-radio-60s/player/", "", ".station-cards.cf > div:nth-child(3) > a .main-img", "style", "", ".station-cards.cf > div:nth-child(3) > a > .text-wrapper > div > .artist", "", "", ".station-cards.cf > div:nth-child(3) > a > .text-wrapper > div > .track", "")
Absolute_Radio_CR2 = Radio("Absolute_Radio_CR2", # Name
"https://planetradio.co.uk/absolute-classic-rock/player/", # URL
"", #Leave Blank
".now-playing-wrapper > .now-playing.cf > .now-playing-block.cf.right.extended-info > .image", # Picture Select
"style", # Picture Select Attribute
"", #Leave Blank
".now-playing-wrapper > .now-playing.cf > .now-playing-block.cf.right.extended-info > .info-wrapper.fr > div > .title.extended-info", # Artist Select
"", # Artist Select Attribute
"", #Leave Blank
".now-playing-wrapper > .now-playing.cf > .now-playing-block.cf.right.extended-info > .info-wrapper.fr > div > .track.extended-info", # Track Select
"" # Track Select Attribute
)
Absolute_Radio = Radio("Absolute_Radio", "https://planetradio.co.uk/absolute-radio-60s/player/", "", ".station-cards.cf > div:nth-child(1) > a .main-img", "style", "", ".station-cards.cf > div:nth-child(1) > a > .text-wrapper > div > .artist", "", "", ".station-cards.cf > div:nth-child(1) > a > .text-wrapper > div > .track", "")
Absolute_Radio2 = Radio("Absolute_Radio2", # Name
"https://planetradio.co.uk/absolute-radio/player/", # URL
"", #Leave Blank
".now-playing-wrapper > .now-playing.cf > .now-playing-block.cf.right.extended-info > .image", # Picture Select
"style", # Picture Select Attribute
"", #Leave Blank
".now-playing-wrapper > .now-playing.cf > .now-playing-block.cf.right.extended-info > .info-wrapper.fr > div > .title.extended-info", # Artist Select
"", # Artist Select Attribute
"", #Leave Blank
".now-playing-wrapper > .now-playing.cf > .now-playing-block.cf.right.extended-info > .info-wrapper.fr > div > .track.extended-info", # Track Select
"" # Track Select Attribute
)
IoT_Podcast = Podcast("IoT Podcast", "https://iotpodcast.com/feed/", "", "item:nth-of-type(1) title", "", "","enclosure:nth-of-type(1)", "url")
Hass_Podcast = Podcast("Hass Podcast", "https://hasspodcast.io/feed/podcast", "", "item:nth-of-type(1) title", "", "","enclosure:nth-of-type(1)", "url")
Chill = Radio("Chill", "https://www.smoothradio.com/chill/radio/playlist/", "", ".js-lazy", "data-src", "", ".now-playing__text-content__details__artist", "", "", ".now-playing__text-content__details__track", "")
Scala = Radio("Scala", "https://planetradio.co.uk/jazz-fm/player/", "", ".station-cards.cf > div:nth-child(1) > a .main-img", "style", "", ".station-cards.cf > div:nth-child(1) > a > .text-wrapper > div > .artist", "", "", ".station-cards.cf > div:nth-child(1) > a > .text-wrapper > div > .track", "")
Scala2 = Radio("Scala2", # Name
"https://planetradio.co.uk/scala-radio/player/", # URL
"", #Leave Blank
".now-playing-wrapper > .now-playing.cf > .now-playing-block.cf.right.extended-info > .image", # Picture Select
"style", # Picture Select Attribute
"", #Leave Blank
".now-playing-wrapper > .now-playing.cf > .now-playing-block.cf.right.extended-info > .info-wrapper.fr > div > .title.extended-info", # Artist Select
"", # Artist Select Attribute
"", #Leave Blank
".now-playing-wrapper > .now-playing.cf > .now-playing-block.cf.right.extended-info > .info-wrapper.fr > div > .track.extended-info", # Track Select
"" # Track Select Attribute
)
def bs_get_value_radio(radio):
req = Request(radio.url, headers=headers)
content = urlopen(req).read()
try:
raw_data = BeautifulSoup(content,"html.parser")
except Exception as e:
print("%s Unable to get BS from URL" % e)
try:
if(radio.pic != raw_data.select(radio.pic_sel)[0][radio.pic_att]):
radio.pic = raw_data.select(radio.pic_sel)[0][radio.pic_att].replace("background-image:url(", "").replace(")", "")
client.publish("RadioStream/"+radio.name+"/pic_url",radio.pic.encode('ascii'), qos=0, retain=True)
except Exception as e:
print ("Can't get "+radio.name+" Image")
try:
if (radio.artist_att):
if(radio.artist != raw_data.select(radio.artist_sel)[0][radio.artist_att]):
radio.artist = raw_data.select(radio.artist_sel)[0][radio.artist_att]
client.publish("RadioStream/"+radio.name+"/artist",radio.artist, qos=0, retain=True)
else:
if(radio.artist != raw_data.select(radio.artist_sel)[0].text.title()):
radio.artist = raw_data.select(radio.artist_sel)[0].text.title().strip()
client.publish("RadioStream/"+radio.name+"/artist",radio.artist, qos=0, retain=True)
except Exception as e:
print ("Can't get "+radio.name+" Artist")
try:
if (radio.track_att):
if(radio.track != raw_data.select(radio.track_sel)[0][radio.track_att]):
radio.track = raw_data.select(radio.track_sel)[0][radio.track_att]
client.publish("RadioStream/"+radio.name+"/track",radio.track, qos=0, retain=True)
else:
if(radio.track != raw_data.select(radio.track_sel)[0].text.title()):
radio.track = raw_data.select(radio.track_sel)[0].text.title().strip()
client.publish("RadioStream/"+radio.name+"/track",radio.track, qos=0, retain=True)
except Exception as e:
print ("Can't get "+radio.name+" Track")
def bs_get_value_podcast(podcast):
req = Request(podcast.url, headers=headers)
content = urlopen(req).read()
try:
raw_data = BeautifulSoup(content,"html.parser")
except Exception as e:
print("%s Unable to get BS from URL" % e)
try:
if (podcast.mp3_att):
if(podcast.mp3_url != raw_data.select(podcast.mp3_sel)[0][podcast.mp3_att]):
podcast.mp3_url = raw_data.select(podcast.mp3_sel)[0][podcast.mp3_att]
client.publish("RadioStream/"+podcast.name+"/mp3_url",podcast.mp3_url, qos=0, retain=True)
else:
if(podcast.mp3_url != raw_data.select(podcast.mp3_sel)[0].text.title()):
podcast.mp3_url = raw_data.select(podcast.mp3_sel)[0].text.title().strip()
client.publish("RadioStream/"+podcast.name+"/mp3_url",podcast.mp3_url, qos=0, retain=True)
except Exception as e:
print ("Can't get "+podcast.name+" MP3 URL")
try:
if (podcast.track_att):
if(podcast.track != raw_data.select(podcast.track_sel)[0][podcast.track_att]):
podcast.track = raw_data.select(podcast.track_sel)[0][podcast.track_att]
client.publish("RadioStream/"+podcast.name+"/track",podcast.track, qos=0, retain=True)
else:
if(podcast.track != raw_data.select(podcast.track_sel)[0].text.title()):
podcast.track = raw_data.select(podcast.track_sel)[0].text.title().strip()
client.publish("RadioStream/"+podcast.name+"/track",podcast.track, qos=0, retain=True)
except Exception as e:
print ("Can't get "+podcast.name+" Track")
def planetradio(name, url):
url = url + str(datetime.now().strftime('%Y-%m-%d')) + "/" + str(datetime.now().strftime('%H:%M')) +"/1"
req = Request(url, headers=headers)
content = str(urlopen(req).read()).replace("b'","").replace("'","")
json_content = json.loads(content)
client.publish("RadioStream/"+name+"/pic_url",json_content[0]["nowPlayingImage"], qos=0, retain=True)
client.publish("RadioStream/"+name+"/artist",json_content[0]["nowPlayingArtist"], qos=0, retain=True)
client.publish("RadioStream/"+name+"/track",json_content[0]["nowPlayingTrack"], qos=0, retain=True)
def streamguys(url):
# url = "https://jeta.streamguys.com:8444/8944a2fe68caa986fdee0f2a3c03675d624bab9a/scraper/9cebb028-9f73-4062-830b-478535e516a1/metadata"
req = Request(url, headers=headers)
content = str(urlopen(req).read()).replace("b'","").replace("'","")
json_content = json.loads(content)
metadata = json_content["StreamTitle"].split(" - ")
try:
client.publish("RadioStream/Radio_Fiji_Two/artist",metadata[-1], qos=0, retain=True)
except:
client.publish("RadioStream/Radio_Fiji_Two/artist","N/A", qos=0, retain=True)
pass
try:
client.publish("RadioStream/Radio_Fiji_Two/track",metadata[-2], qos=0, retain=True)
except:
client.publish("RadioStream/Radio_Fiji_Two/track","N/A", qos=0, retain=True)
pass
Refresh_Timer = 0
while(True):
Thread(target=bs_get_value_radio, args=[RTL2]).start()
Thread(target=planetradio, args=["Absolute_Radio_CR","https://listenapi.bauerradio.com/api9/eventsdadi/absolute-classic-rock/"]).start()
Thread(target=planetradio, args=["Absolute_Radio","https://listenapi.bauerradio.com/api9/eventsdadi/absolute-radio/"]).start()
Thread(target=planetradio, args=["Scala","https://listenapi.bauerradio.com/api9/eventsdadi/scala-radio/"]).start()
Thread(target=streamguys, args=["https://jeta.streamguys.com:8444/8944a2fe68caa986fdee0f2a3c03675d624bab9a/scraper/9cebb028-9f73-4062-830b-478535e516a1/metadata"]).start()
Thread(target=bs_get_value_podcast, args=[IoT_Podcast]).start()
if(Refresh_Timer == 240):
Thread(target=bs_get_value_podcast, args=[Hass_Podcast]).start()
Refresh_Timer = 0
Thread(target=bs_get_value_radio, args=[Chill]).start()
time.sleep(15)
Refresh_Timer = Refresh_Timer + 1
it probably needs a bit of cleaning… I originally scraped content of the html player page, but that would fail every now and again. I then found out that some radios offered a json page with the details I needed (e.g. planet radio stations) so I adjusted.
I then call the script at HA startup and it keeps running…
Hope it makes sense / is helpful to others