Chromecast Radio with station and player selection

BS4 is Beautiful Soup. It’s the engine that’s used by the Scrape Sensor
But the scrape sensor only refreshes once every min and I find it a bit more complicated than using BS4 directly. In addition I want a single call per radio, as opposed to 1 call per attribute, which might end up black listing me as spam/DDOS
Here is the python script I’ve written to extract artist, track and album art from the various radios I listen to:

Show Code
#!/usr/bin/python
# -*- coding: utf-8 -*-

from bs4 import BeautifulSoup
from urllib.request import urlopen, Request
import paho.mqtt.client as mqtt
import time
from threading import Thread
from datetime import datetime
import json

import secrets

MQTT_Host = secrets.MQTT_Host
MQTT_Port = secrets.MQTT_Port
MQTT_User = secrets.MQTT_User
MQTT_Password = secrets.MQTT_Password

client = mqtt.Client("HA_Scraper") # must be unique on MQTT network
client.username_pw_set(str(MQTT_User),str(MQTT_Password))
client.connect(MQTT_Host, port=MQTT_Port, keepalive=60)
client.loop_start()
headers = {'User-Agent': 'Mozilla/5.0'}



class Radio:

	# Initializer / Instance Attributes
	def __init__(self, name, url, pic, pic_sel, pic_att, artist, artist_sel, artist_att, track, track_sel, track_att):
		self.name = name
		self.url = url
		self.pic = pic
		self.pic_sel = pic_sel
		self.pic_att = pic_att
		self.artist = artist
		self.artist_sel = artist_sel
		self.artist_att = artist_att
		self.track = track
		self.track_sel = track_sel
		self.track_att = track_att



class Podcast:

	# Initializer / Instance Attributes
	def __init__(self, name, url, track, track_sel, track_att, mp3_url, mp3_sel, mp3_att):
		self.name = name
		self.url = url
		self.track = track
		self.track_sel = track_sel
		self.track_att = track_att
		self.mp3_url = mp3_url
		self.mp3_sel = mp3_sel
		self.mp3_att = mp3_att


RTL2 = Radio("RTL2", "https://www.6play.fr/rtl2/quel-est-ce-titre", "", ".ecfper-2", "src", "", ".ecfper-6", "", "", ".ecfper-5", "") #https://timeline.rtl.fr/RTL2/songs
Absolute_Radio_CR = Radio("Absolute_Radio_CR", "https://planetradio.co.uk/absolute-radio-60s/player/", "", ".station-cards.cf > div:nth-child(3) > a .main-img", "style", "", ".station-cards.cf > div:nth-child(3) > a > .text-wrapper > div > .artist", "", "", ".station-cards.cf > div:nth-child(3) > a > .text-wrapper > div > .track", "")
Absolute_Radio_CR2 = Radio("Absolute_Radio_CR2", # Name
                "https://planetradio.co.uk/absolute-classic-rock/player/", # URL
                "", #Leave Blank
                ".now-playing-wrapper > .now-playing.cf > .now-playing-block.cf.right.extended-info > .image", # Picture Select
                "style", # Picture Select Attribute
                "", #Leave Blank
                ".now-playing-wrapper > .now-playing.cf > .now-playing-block.cf.right.extended-info > .info-wrapper.fr > div > .title.extended-info", # Artist Select
                "", # Artist Select Attribute
                "", #Leave Blank
                ".now-playing-wrapper > .now-playing.cf > .now-playing-block.cf.right.extended-info > .info-wrapper.fr > div > .track.extended-info", # Track Select
                "" # Track Select Attribute
            )
Absolute_Radio = Radio("Absolute_Radio", "https://planetradio.co.uk/absolute-radio-60s/player/", "", ".station-cards.cf > div:nth-child(1) > a .main-img", "style", "", ".station-cards.cf > div:nth-child(1) > a > .text-wrapper > div > .artist", "", "", ".station-cards.cf > div:nth-child(1) > a > .text-wrapper > div > .track", "")
Absolute_Radio2 = Radio("Absolute_Radio2", # Name
                "https://planetradio.co.uk/absolute-radio/player/", # URL
                "", #Leave Blank
                ".now-playing-wrapper > .now-playing.cf > .now-playing-block.cf.right.extended-info > .image", # Picture Select
                "style", # Picture Select Attribute
                "", #Leave Blank
                ".now-playing-wrapper > .now-playing.cf > .now-playing-block.cf.right.extended-info > .info-wrapper.fr > div > .title.extended-info", # Artist Select
                "", # Artist Select Attribute
                "", #Leave Blank
                ".now-playing-wrapper > .now-playing.cf > .now-playing-block.cf.right.extended-info > .info-wrapper.fr > div > .track.extended-info", # Track Select
                "" # Track Select Attribute
            )
IoT_Podcast = Podcast("IoT Podcast", "https://iotpodcast.com/feed/", "", "item:nth-of-type(1) title", "", "","enclosure:nth-of-type(1)", "url")
Hass_Podcast = Podcast("Hass Podcast", "https://hasspodcast.io/feed/podcast", "", "item:nth-of-type(1) title", "", "","enclosure:nth-of-type(1)", "url")
Chill = Radio("Chill", "https://www.smoothradio.com/chill/radio/playlist/", "", ".js-lazy", "data-src", "", ".now-playing__text-content__details__artist", "", "", ".now-playing__text-content__details__track", "")
Scala = Radio("Scala", "https://planetradio.co.uk/jazz-fm/player/", "", ".station-cards.cf > div:nth-child(1) > a .main-img", "style", "", ".station-cards.cf > div:nth-child(1) > a > .text-wrapper > div > .artist", "", "", ".station-cards.cf > div:nth-child(1) > a > .text-wrapper > div > .track", "")
Scala2 = Radio("Scala2", # Name
                "https://planetradio.co.uk/scala-radio/player/", # URL
                "", #Leave Blank
                ".now-playing-wrapper > .now-playing.cf > .now-playing-block.cf.right.extended-info > .image", # Picture Select
                "style", # Picture Select Attribute
                "", #Leave Blank
                ".now-playing-wrapper > .now-playing.cf > .now-playing-block.cf.right.extended-info > .info-wrapper.fr > div > .title.extended-info", # Artist Select
                "", # Artist Select Attribute
                "", #Leave Blank
                ".now-playing-wrapper > .now-playing.cf > .now-playing-block.cf.right.extended-info > .info-wrapper.fr > div > .track.extended-info", # Track Select
                "" # Track Select Attribute
            )



def bs_get_value_radio(radio):
	req = Request(radio.url, headers=headers)
	content = urlopen(req).read()

	try:
		raw_data = BeautifulSoup(content,"html.parser")
	except Exception as e:
		print("%s Unable to get BS from URL" % e)
	try:
		if(radio.pic != raw_data.select(radio.pic_sel)[0][radio.pic_att]):
			radio.pic = raw_data.select(radio.pic_sel)[0][radio.pic_att].replace("background-image:url(", "").replace(")", "")
			client.publish("RadioStream/"+radio.name+"/pic_url",radio.pic.encode('ascii'), qos=0, retain=True)

	except Exception as e:
		print ("Can't get "+radio.name+" Image")
	try:
		if (radio.artist_att):
			if(radio.artist != raw_data.select(radio.artist_sel)[0][radio.artist_att]):
				radio.artist = raw_data.select(radio.artist_sel)[0][radio.artist_att]
				client.publish("RadioStream/"+radio.name+"/artist",radio.artist, qos=0, retain=True)
		else:
			if(radio.artist != raw_data.select(radio.artist_sel)[0].text.title()):
				radio.artist = raw_data.select(radio.artist_sel)[0].text.title().strip()
				client.publish("RadioStream/"+radio.name+"/artist",radio.artist, qos=0, retain=True)
	except Exception as e:
		print ("Can't get "+radio.name+" Artist")

	try:
		if (radio.track_att):
			if(radio.track != raw_data.select(radio.track_sel)[0][radio.track_att]):
				radio.track = raw_data.select(radio.track_sel)[0][radio.track_att]
				client.publish("RadioStream/"+radio.name+"/track",radio.track, qos=0, retain=True)
		else:
			if(radio.track != raw_data.select(radio.track_sel)[0].text.title()):
				radio.track = raw_data.select(radio.track_sel)[0].text.title().strip()
				client.publish("RadioStream/"+radio.name+"/track",radio.track, qos=0, retain=True)
	except Exception as e:
		print ("Can't get "+radio.name+" Track")

def bs_get_value_podcast(podcast):
	req = Request(podcast.url, headers=headers)
	content = urlopen(req).read()

	try:
		raw_data = BeautifulSoup(content,"html.parser")
	except Exception as e:
		print("%s Unable to get BS from URL" % e)

	try:
		if (podcast.mp3_att):
			if(podcast.mp3_url != raw_data.select(podcast.mp3_sel)[0][podcast.mp3_att]):
				podcast.mp3_url = raw_data.select(podcast.mp3_sel)[0][podcast.mp3_att]
				client.publish("RadioStream/"+podcast.name+"/mp3_url",podcast.mp3_url, qos=0, retain=True)
		else:
			if(podcast.mp3_url != raw_data.select(podcast.mp3_sel)[0].text.title()):
				podcast.mp3_url = raw_data.select(podcast.mp3_sel)[0].text.title().strip()
				client.publish("RadioStream/"+podcast.name+"/mp3_url",podcast.mp3_url, qos=0, retain=True)
	except Exception as e:
		print ("Can't get "+podcast.name+" MP3 URL")

	try:
		if (podcast.track_att):
			if(podcast.track != raw_data.select(podcast.track_sel)[0][podcast.track_att]):
				podcast.track = raw_data.select(podcast.track_sel)[0][podcast.track_att]
				client.publish("RadioStream/"+podcast.name+"/track",podcast.track, qos=0, retain=True)
		else:
			if(podcast.track != raw_data.select(podcast.track_sel)[0].text.title()):
				podcast.track = raw_data.select(podcast.track_sel)[0].text.title().strip()
				client.publish("RadioStream/"+podcast.name+"/track",podcast.track, qos=0, retain=True)
	except Exception as e:
		print ("Can't get "+podcast.name+" Track")

def planetradio(name, url):
    url = url + str(datetime.now().strftime('%Y-%m-%d')) + "/" + str(datetime.now().strftime('%H:%M')) +"/1"
    req = Request(url, headers=headers)
    content = str(urlopen(req).read()).replace("b'","").replace("'","")
    json_content = json.loads(content)
    client.publish("RadioStream/"+name+"/pic_url",json_content[0]["nowPlayingImage"], qos=0, retain=True)
    client.publish("RadioStream/"+name+"/artist",json_content[0]["nowPlayingArtist"], qos=0, retain=True)
    client.publish("RadioStream/"+name+"/track",json_content[0]["nowPlayingTrack"], qos=0, retain=True)


def streamguys(url):
    # url = "https://jeta.streamguys.com:8444/8944a2fe68caa986fdee0f2a3c03675d624bab9a/scraper/9cebb028-9f73-4062-830b-478535e516a1/metadata"
    req = Request(url, headers=headers)
    content = str(urlopen(req).read()).replace("b'","").replace("'","")
    json_content = json.loads(content)
    metadata = json_content["StreamTitle"].split(" - ")
    try:
        client.publish("RadioStream/Radio_Fiji_Two/artist",metadata[-1], qos=0, retain=True)
    except:
        client.publish("RadioStream/Radio_Fiji_Two/artist","N/A", qos=0, retain=True)
        pass
    try:
        client.publish("RadioStream/Radio_Fiji_Two/track",metadata[-2], qos=0, retain=True)
    except:
        client.publish("RadioStream/Radio_Fiji_Two/track","N/A", qos=0, retain=True)
        pass

Refresh_Timer = 0
while(True):

	Thread(target=bs_get_value_radio, args=[RTL2]).start()
	Thread(target=planetradio, args=["Absolute_Radio_CR","https://listenapi.bauerradio.com/api9/eventsdadi/absolute-classic-rock/"]).start()
	Thread(target=planetradio, args=["Absolute_Radio","https://listenapi.bauerradio.com/api9/eventsdadi/absolute-radio/"]).start()
	Thread(target=planetradio, args=["Scala","https://listenapi.bauerradio.com/api9/eventsdadi/scala-radio/"]).start()
	Thread(target=streamguys, args=["https://jeta.streamguys.com:8444/8944a2fe68caa986fdee0f2a3c03675d624bab9a/scraper/9cebb028-9f73-4062-830b-478535e516a1/metadata"]).start()
	Thread(target=bs_get_value_podcast, args=[IoT_Podcast]).start()
	if(Refresh_Timer == 240):
		Thread(target=bs_get_value_podcast, args=[Hass_Podcast]).start()
		Refresh_Timer = 0
	Thread(target=bs_get_value_radio, args=[Chill]).start()
	time.sleep(15)
	Refresh_Timer = Refresh_Timer + 1

it probably needs a bit of cleaning… I originally scraped content of the html player page, but that would fail every now and again. I then found out that some radios offered a json page with the details I needed (e.g. planet radio stations) so I adjusted.
I then call the script at HA startup and it keeps running…
Hope it makes sense / is helpful to others

2 Likes