Scrape scanner attributes?

I am using scrape sensor to get fuel prices like so:

- platform: scrape
  resource: https://www.neste.lv/lv/content/degvielas-cenas
  name: Neste Futura 95
  select: "table tr:nth-of-type(2) td:nth-of-type(2) p strong"
  value_template: '{{ value }}'
  unit_of_measurement: "EUR/L"
  scan_interval: 3600

Fuel price is set as sensor state but I that is not sufficient - list of stations is also important and I imagine the best way would be to all custom attribute to sensor.


AFAIK scraper sensor does not have such functionality.

Maybe someone has written custom component that supports that or I should use some other type of sensors.
What would be the best way how to get this done?

1 Like

Feel free to open a PR with additional functionality.

I would love to but unfortunately I have very limited knowledge of python.

A quick work-around would be to use a second scrape sensor to get the address.

Or much simpler, use a custom component. Something like this should work:

"""Support for getting data from neste.lv."""
import asyncio
from datetime import timedelta
import logging

import aiohttp
import async_timeout
import voluptuous as vol

from homeassistant.components.sensor import PLATFORM_SCHEMA
from homeassistant.const import ATTR_ATTRIBUTION, CONF_NAME
from homeassistant.helpers.aiohttp_client import async_get_clientsession
import homeassistant.helpers.config_validation as cv
from homeassistant.helpers.entity import Entity

REQUIREMENTS = ['beautifulsoup4==4.6.3']

_LOGGER = logging.getLogger(__name__)

CONF_ATTRIBUTION = "Date provided by neste.lv."

DEFAULT_NAME = 'Neste'

RESOURCE = 'https://www.neste.lv/lv/content/degvielas-cenas'

SCAN_INTERVAL = timedelta(minutes=5)

PLATFORM_SCHEMA = PLATFORM_SCHEMA.extend({
    vol.Optional(CONF_NAME, default=DEFAULT_NAME): cv.string,
})


async def async_setup_platform(
        hass, config, async_add_entities, discovery_info=None):
    """Set up the Neste.lv sensor."""
    name = config.get(CONF_NAME)
    session = async_get_clientsession(hass)

    async_add_entities([
        NesteSensor(session, name)], True)


class NesteSensor(Entity):
    """Representation of a Neste.lv sensor."""

    def __init__(self, session, name):
        """Initialize a Neste.lv sensor."""
        self._name = name
        self._state = None
        self._session = session
        self._unit_of_measurement = 'EUR/l'
        self._attrs = {ATTR_ATTRIBUTION: CONF_ATTRIBUTION}

    @property
    def name(self):
        """Return the name of the sensor."""
        return self._name

    @property
    def unit_of_measurement(self):
        """Return the unit the value is expressed in."""
        return self._unit_of_measurement

    @property
    def state(self):
        """Return the state of the device."""
        return self._state

    @property
    def device_state_attributes(self):
        """Return the device state attributes."""
        return self._attrs

    async def async_update(self):
        """Get the latest data from the source and updates the state."""
        from bs4 import BeautifulSoup

        try:
            with async_timeout.timeout(10, loop=self.hass.loop):
                response = await self._session.get(RESOURCE)

            _LOGGER.debug(
                "Response from Neste.lv: %s", response.status)
            data = await response.text()
            _LOGGER.debug(data)
        except (asyncio.TimeoutError, aiohttp.ClientError):
            _LOGGER.error("Can not load data from Neste.lv")
            return

        raw_data = BeautifulSoup(data, 'html.parser')

        try:
            value = raw_data.select(
                "table tr:nth-of-type(2) td:nth-of-type(2) p strong")[0].text
            self._attrs['station'] = raw_data.select(
                "table tr:nth-of-type(2) td:nth-of-type(3) p")[0].text
        except IndexError:
            _LOGGER.error("Unable to extract data from HTML")
            return

        self._state = value

Screenshot%20from%202018-10-20%2012-37-13

1 Like

Thanks. Will try that out

It appears I haven’t updated topic with final solution.
So here it goes.

I slightly modified component to be able to create several sensors and put in
<config_dir>/custom_components/sensor/neste_price_scraper.py:

"""Support for getting data from neste.lv."""
import asyncio
from datetime import timedelta
import logging

import aiohttp
import async_timeout
import voluptuous as vol

from homeassistant.components.sensor import PLATFORM_SCHEMA
from homeassistant.const import ATTR_ATTRIBUTION, CONF_NAME
from homeassistant.helpers.aiohttp_client import async_get_clientsession
import homeassistant.helpers.config_validation as cv
from homeassistant.helpers.entity import Entity

REQUIREMENTS = ['beautifulsoup4==4.6.3']

_LOGGER = logging.getLogger(__name__)

CONF_ATTRIBUTION = "Data provided by neste.lv."

CONF_TABLE_ROW = "table_row"

RESOURCE = 'https://www.neste.lv/lv/content/degvielas-cenas'

SCAN_INTERVAL = timedelta(minutes=60)

PLATFORM_SCHEMA = PLATFORM_SCHEMA.extend({
    vol.Required(CONF_NAME): cv.string,
    vol.Required(CONF_TABLE_ROW): cv.string,
})


async def async_setup_platform(
        hass, config, async_add_entities, discovery_info=None):
    """Set up the Neste.lv fuel prices sensor."""
    name = config.get(CONF_NAME)
    table_row = config.get(CONF_TABLE_ROW)
    session = async_get_clientsession(hass)

    async_add_entities([
        NesteSensor(session, name, table_row)], True)


class NesteSensor(Entity):
    """Representation of a Neste.lv sensor."""

    def __init__(self, session, name, table_row):
        """Initialize a Neste.lv sensor."""
        self._name = name
        self._table_row = table_row
        self._state = None
        self._session = session
        self._unit_of_measurement = 'EUR/l'
        self._attrs = {ATTR_ATTRIBUTION: CONF_ATTRIBUTION}

    @property
    def name(self):
        """Return the name of the sensor."""
        return self._name

    @property
    def unit_of_measurement(self):
        """Return the unit the value is expressed in."""
        return self._unit_of_measurement

    @property
    def state(self):
        """Return the state of the device."""
        return self._state

    @property
    def device_state_attributes(self):
        """Return the device state attributes."""
        return self._attrs

    async def async_update(self):
        """Get the latest data from the source and updates the state."""
        from bs4 import BeautifulSoup

        try:
            with async_timeout.timeout(10, loop=self.hass.loop):
                response = await self._session.get(RESOURCE)

            _LOGGER.debug(
                "Response from Neste.lv: %s", response.status)
            data = await response.text()
            _LOGGER.debug(data)
        except (asyncio.TimeoutError, aiohttp.ClientError):
            _LOGGER.error("Can not load data from Neste.lv")
            return

        raw_data = BeautifulSoup(data, 'html.parser')

        try:
            value = raw_data.select(
                "table tr:nth-of-type(%s) td:nth-of-type(2) p strong" % self._table_row)[0].text
            self._attrs['station'] = raw_data.select(
                "table tr:nth-of-type(%s) td:nth-of-type(3) p" % self._table_row)[0].text
        except IndexError:
            _LOGGER.error("Unable to extract data from HTML")
            return

        self._state = value

Then I replaced existing scrape sensors in HA configuration with following:

sensor: 
    - platform: neste_price_scraper
      name: Neste Futura 95
      table_row: 2

    - platform: neste_price_scraper
      name: Neste Futura D
      table_row: 4

Now I am able to dynamically include station addresses in Telegram notifications about fuel price changes.

Huge thanks, @fabaff