For other people finding this with a similar problem:
lgvalle posted a helpful gist how to scrape websites in cloud functions:
const rp = require('request-promise');
const cheerio = require('cheerio');
const functions = require('firebase-functions');
const admin = require('firebase-admin');
admin.initializeApp();
const db = admin.firestore();
exports.allyPallyFarmersMarket = functions.https.onRequest((request, response) => {
const topic = "allyPallyFarmersMarket"
const url = 'https://weareccfm.com/city-country-farmers-markets/market-profiles/alexandra-palace-market/'
const options = {
uri: url,
headers: { 'User-Agent': 'test' },
transform: (body) => cheerio.load(body)
}
rp(options)
.then(($) => {
const scrap = $('strong').text()
const [location, date, address] = scrap.split("–")
//EDIT BY neogucky:
//Here you can access scrapped vars: location, date, address
})
.catch((err) => response.status(400).send(err))
});
https://gist.github.com/lgvalle/df2a0a7ee10266ca8056fa15654307d8
Add the needed dependencies, your package.json should look like this:
"dependencies": {
"firebase-admin": "~6.0.0",
"firebase-functions": "^2.0.3",
"request-promise": "~4.2.2",
"cheerio": "~0.22.0"
},
If you send JSON data {website: 'https://myurl.org'} in the request request you can access it with:
request.body.website