Web page Scraping - MQTT message structure

Hi guys,
I’ve been working on my first flow as a demo to web scrape data and present it as a sensor in Home assistant.

In this flow I’m pulling cruise ship data from a table, converting each row to an array object, finding the record where the date equals today and then transmit that data to my MQTT server as 4 sensors - Cruise ship name, arrival date, arrival time and departure time.

Ill then extend it to handle no ship in port and the next ship in port.

I’m currently stuck with getting the structure right for the MQTT messages - My understanding is I have to send some to setup the message structure (shows as Config Below) and then a message sending the actual consents.

Can anyone see what wrong?

Im not sure about the value_template: "{{ value_json… stuff as I dont think im using json but the demo code I stole had it.

Message Config
var config1 = {
payload: {
name: “Name”,
state_topic: “homeassistant/sensor/cruiseship_current_name/state”,
value_template: “{{ value_json.Name }}”
},
topic: “homeassistant/sensor/cruiseship_current_name/config”
};
var config2 = {
payload: {
name: “Cruiseship Arrival Date”,
state_topic: “homeassistant/sensor/cruiseship_current_arrivedate/state”,
value_template: “{{ value_json.arrivedate }}”
},
topic: “homeassistant/sensor/cruiseship_current_arrivedate/config”
};
var config3 = {
payload: {
name: “Cruiseship Arrival Time”,
state_topic: “homeassistant/sensor/cruiseship_current_arrivetime/state”,
value_template: “{{ value_json.arrivetime }}”
},
topic: “homeassistant/sensor/cruiseship_current_arrivetime/config”
};
var config4 = {
payload: {
name: “Cruiseship Departure Time”,
state_topic: “homeassistant/sensor/cruiseship_current_departtime/state”,
value_template: “{{ value_json.departtime }}”
},
topic: “homeassistant/sensor/cruiseship_current_departtime/config”
};
return [config1, config2, config3, config4];

Message

//Convert text to date
var msec = Date.parse(msg.payload[4]);
var d = new Date(msec);
var Currdate = new Date(Date.now());

//compare ship date to todays date
if (d.toDateString() == Currdate.toDateString())
{
    //if it matches then build and send this message
    var CurrentCruiseship = { 
        topic: "homeassistant/sensor/cruiseship_current/state",
        payload: {cruiseship_current_name:msg.payload[0], cruiseship_current_arrivedate: msg.payload[4], cruiseship_current_arrivetime:msg.payload[5], cruiseship_current_departtime:msg.payload[6]}
};
    
return CurrentCruiseship;
}
else 
{
return null;
}

Here is my flow, you will need to connect it to you MQTT server but it should just work as intended.

[
    {
        "id": "e83007b1.58cd58",
        "type": "tab",
        "label": "CruiseShipInDock",
        "disabled": false,
        "info": ""
    },
    {
        "id": "38f97bd5.4d8e54",
        "type": "inject",
        "z": "e83007b1.58cd58",
        "name": "Run every 6 hours",
        "topic": "",
        "payload": "",
        "payloadType": "date",
        "repeat": "21600",
        "crontab": "",
        "once": false,
        "onceDelay": 0.1,
        "x": 310,
        "y": 120,
        "wires": [
            [
                "bb53b1fc.2d7be"
            ]
        ]
    },
    {
        "id": "bb53b1fc.2d7be",
        "type": "http request",
        "z": "e83007b1.58cd58",
        "name": "Get From port list",
        "method": "GET",
        "ret": "txt",
        "url": "http://www.port-tauranga.co.nz/cargo-and-shipping/cruise-schedules/",
        "tls": "",
        "x": 430,
        "y": 240,
        "wires": [
            [
                "566023fb.ce5fcc"
            ]
        ]
    },
    {
        "id": "ed663881.877508",
        "type": "html",
        "z": "e83007b1.58cd58",
        "name": "",
        "property": "payload",
        "outproperty": "payload",
        "tag": "tr",
        "ret": "html",
        "as": "multi",
        "x": 930,
        "y": 240,
        "wires": [
            [
                "42433032.ec8ba"
            ]
        ]
    },
    {
        "id": "566023fb.ce5fcc",
        "type": "html",
        "z": "e83007b1.58cd58",
        "name": "",
        "property": "payload",
        "outproperty": "payload",
        "tag": "table[class=cruise]",
        "ret": "html",
        "as": "multi",
        "x": 690,
        "y": 240,
        "wires": [
            [
                "ed663881.877508"
            ]
        ]
    },
    {
        "id": "42433032.ec8ba",
        "type": "html",
        "z": "e83007b1.58cd58",
        "name": "",
        "property": "payload",
        "outproperty": "payload",
        "tag": "td",
        "ret": "html",
        "as": "single",
        "x": 1090,
        "y": 240,
        "wires": [
            [
                "8141bb5c.3b0bf8",
                "2e2bd612.75ae7a"
            ]
        ]
    },
    {
        "id": "8141bb5c.3b0bf8",
        "type": "function",
        "z": "e83007b1.58cd58",
        "name": "Find Next Cruiseship",
        "func": "\nvar msec = Date.parse(msg.payload[4]);\nvar d = new Date(msec);\nvar Currdate = new Date(Date.now());\n\n\nif (d.toDateString() == Currdate.toDateString())\n{\n    var CurrentCruiseship = { \n        topic: \"homeassistant/sensor/cruiseship_current/state\",\n        payload: {cruiseship_current_name:msg.payload[0], cruiseship_current_arrivedate: msg.payload[4], cruiseship_current_arrivetime:msg.payload[5], cruiseship_current_departtime:msg.payload[6]}\n};\n    \nreturn CurrentCruiseship;\n}\nelse \n{\nreturn null;\n}\n",
        "outputs": 1,
        "noerr": 0,
        "x": 1080,
        "y": 400,
        "wires": [
            [
                "86a5ca5e.8e9898",
                "73312c35.357034"
            ]
        ]
    },
    {
        "id": "73312c35.357034",
        "type": "debug",
        "z": "e83007b1.58cd58",
        "name": "match",
        "active": true,
        "tosidebar": true,
        "console": false,
        "tostatus": false,
        "complete": "payload",
        "x": 1550,
        "y": 160,
        "wires": []
    },
    {
        "id": "a5283b8.cf878c8",
        "type": "mqtt out",
        "z": "e83007b1.58cd58",
        "name": "",
        "topic": "",
        "qos": "",
        "retain": "",
        "broker": "3353dd4a.a2d0d2",
        "x": 1550,
        "y": 240,
        "wires": []
    },
    {
        "id": "2e2bd612.75ae7a",
        "type": "function",
        "z": "e83007b1.58cd58",
        "name": "Format Messages",
        "func": "var config1 = {\n    payload: {\n        name: \"Name\",\n        state_topic: \"homeassistant/sensor/cruiseship_current_name/state\",\n        value_template: \"{{ value_json.Name }}\"\n    },\n    topic: \"homeassistant/sensor/cruiseship_current_name/config\"\n};\nvar config2 = {\n    payload: {\n        name: \"Cruiseship Arrival Date\",\n        state_topic: \"homeassistant/sensor/cruiseship_current_arrivedate/state\",\n        value_template: \"{{ value_json.arrivedate }}\"\n    },\n    topic: \"homeassistant/sensor/cruiseship_current_arrivedate/config\"\n};\nvar config3 = {\n    payload: {\n        name: \"Cruiseship Arrival Time\",\n        state_topic: \"homeassistant/sensor/cruiseship_current_arrivetime/state\",\n        value_template: \"{{ value_json.arrivetime }}\"\n    },\n    topic: \"homeassistant/sensor/cruiseship_current_arrivetime/config\"\n};\nvar config4 = {\n    payload: {\n        name: \"Cruiseship Departure Time\",\n        state_topic: \"homeassistant/sensor/cruiseship_current_departtime/state\",\n        value_template: \"{{ value_json.departtime }}\"\n    },\n    topic: \"homeassistant/sensor/cruiseship_current_departtime/config\"\n};\nreturn [config1, config2, config3, config4];",
        "outputs": 4,
        "noerr": 0,
        "x": 1270,
        "y": 120,
        "wires": [
            [
                "a5283b8.cf878c8"
            ],
            [
                "a5283b8.cf878c8"
            ],
            [
                "a5283b8.cf878c8"
            ],
            [
                "a5283b8.cf878c8"
            ]
        ]
    },
    {
        "id": "86a5ca5e.8e9898",
        "type": "delay",
        "z": "e83007b1.58cd58",
        "name": "",
        "pauseType": "delay",
        "timeout": "1",
        "timeoutUnits": "seconds",
        "rate": "1",
        "nbRateUnits": "1",
        "rateUnits": "second",
        "randomFirst": "1",
        "randomLast": "5",
        "randomUnits": "seconds",
        "drop": false,
        "x": 1380,
        "y": 400,
        "wires": [
            [
                "a5283b8.cf878c8"
            ]
        ]
    },
    {
        "id": "3353dd4a.a2d0d2",
        "type": "mqtt-broker",
        "z": "",
        "name": "Home Broker",
        "broker": "",
        "port": "1883",
        "clientid": "Nred",
        "usetls": false,
        "compatmode": true,
        "keepalive": "60",
        "cleansession": true,
        "birthTopic": "",
        "birthQos": "0",
        "birthPayload": "",
        "closeTopic": "",
        "closePayload": "",
        "willTopic": "",
        "willQos": "0",
        "willPayload": ""
    }
]

First send the message structure? May I ask where you read this?

I’m unfamiliar with this requirement … and have never needed to do it. The flow just publishes a message to a topic and the sensor is subscribed to the topic. The onus is on the sensor to interpret the message. The message can be a simple value or in JSON format so that one message can contain multiple values.

For example I can choose to publish simple messages to four topics:

ship/name
ship/arrival/date
ship/arrival/time
ship/departure/time

Each sensor subscribes to its associated topic.

Alternately, I could publish the data in JSON format to one topic:
ship
Where the message format would be:
{"name":"Sea Wind", "arrival_date": "2019-01-01", "arrival_time":"08:35", "departure_time":"21:15"}
All sensors subscribe to the one topic and use an appropriate value_template to extract the desired property.

1 Like

Ah ok, I was trying to use the MQTT auto discovery feature and that is what I thought what the homeassistant/sensor/cruiseship_current_****/config was for - although I could be wrong as I am just learning

The guide is used is this one below and I tried to convert it to my needs but I got a bit stuck on their request response is Json and mine is a TR/TD html table
https://webworxshop.com/2018/05/08/home-assistant-mqtt-discovery-sensors-in-node-red

I’m going to try your recommendation with having one topic and subscribing the 4 sensors to it, a change of tact will be good as I have no idea how to get the auto discover working.

I went with the logic that you really only care about the top row in the table. From that, you can check the date to see if it’s the returned arrival day is the current day then you know the ship is in port or if not it would be the next ship in port. You can add that logic into the function node.

- platform:            mqtt
  state_topic:         'homeassistant/sensor/cruiseship/state'
  name:                'Cruiseship Name'
  value_template:      '{{ value_json.name }}'
- platform:            mqtt
  state_topic:         'homeassistant/sensor/cruiseship/state'
  name:                'Cruiseship Arrive Date'
  value_template:      '{{ value_json.arrivedate }}'
- platform:            mqtt
  state_topic:         'homeassistant/sensor/cruiseship/state'
  name:                'Cruiseship Arrive Time'
  value_template:      '{{ value_json.arrivetime }}'
- platform:            mqtt
  state_topic:         'homeassistant/sensor/cruiseship/state'
  name:                'Cruiseship Depart Time'
  value_template:      '{{ value_json.departtime }}'
[{"id":"c977d80c.abe9d8","type":"inject","z":"b502711a.cd8a4","name":"Run every 6 hours","topic":"","payload":"","payloadType":"date","repeat":"21600","crontab":"","once":false,"onceDelay":0.1,"x":138,"y":208,"wires":[["e01c3001.59498"]]},{"id":"e01c3001.59498","type":"http request","z":"b502711a.cd8a4","name":"Get From port list","method":"GET","ret":"txt","url":"http://www.port-tauranga.co.nz/cargo-and-shipping/cruise-schedules/","tls":"","x":346,"y":208,"wires":[["86a1db4c.655628"]]},{"id":"86a1db4c.655628","type":"html","z":"b502711a.cd8a4","name":"","property":"payload","outproperty":"payload","tag":"table[class=cruise]>tr:first-of-type>td","ret":"text","as":"single","x":614,"y":208,"wires":[["7e8dbb75.16aa54"]]},{"id":"7bb9a9c5.3e1bc8","type":"mqtt out","z":"b502711a.cd8a4","name":"publish","topic":"homeassistant/sensor/cruiseship/state","qos":"","retain":"true","broker":"ec7a0501.233bf8","x":972,"y":208,"wires":[]},{"id":"7e8dbb75.16aa54","type":"function","z":"b502711a.cd8a4","name":"","func":"msg.payload = {\n    \"name\": msg.payload[0].trim(),\n    \"arrivedate\": msg.payload[4].trim(),\n    \"arrivetime\": msg.payload[5].trim(),\n    \"departtime\": msg.payload[6].trim()\n};\nreturn msg;","outputs":1,"noerr":0,"x":834,"y":208,"wires":[["7bb9a9c5.3e1bc8","65612c01.af2244"]]},{"id":"65612c01.af2244","type":"debug","z":"b502711a.cd8a4","name":"","active":true,"tosidebar":true,"console":false,"tostatus":false,"complete":"false","x":982,"y":256,"wires":[]}]
1 Like

Far out, I spent 8 hours working on a solution and you’ve done it in 7 lines, that is cool! I’m really blown away with the power of node red and i’m quite keen to learn it.

How did you know what to put into the HTML node? is there a wiki somewhere, I really struggled to find relevant demos.“table[class=cruise]>tr:first-of-type>td” - It took me ages to understand how to handle the html and you managed to condense 3 nodes in to one.

Just tested it out and sure enough its all there. Thanks for that, i’m going to use this as a template to scrape some tide times for HA next.

Thanks you both heaps for your help.