page-scraper
v2.0.5
Published
Web page scraper with a jQuery-like syntax for Node.
Downloads
60
Readme
Page Scraper
Web page scraper with a jQuery-like syntax for Node. Powered by got and cheerio.
Installation
$ npm install page-scraper
Usage
const scrape = require('page-scraper');
(async () => {
const $ = await scrape('https://example.com');
// Extract the page with jQuery like syntax.
console.log({
title: $('title').text(),
heading: $('h1').text(),
paragraphs: $('p').map((index, el) => $(el).text()).get(),
link: $('p > a').attr('href')
});
})();
Check the cheerio documentation for a complete guide on how to scrape the page using jQuery like syntax.
Recipes
Handling Error
const scrape = require('page-scraper');
(async () => {
try {
const $ = await scrape('https://httpbin.org/status/400');
} catch(error) {
// The error message.
console.error(error.message);
if (error.hasOwnProperty('response')) {
// The HTTP status code.
console.error(error.response.statusCode);
}
if (error.hasOwnProperty('$')) {
// The HTML document.
console.error(error.$.html());
}
}
})();
Note that if the page is not an HTML document, it will throw an error too.
const scrape = require('./src');
(async () => {
try {
const $ = await scrape('https://httpbin.org/json');
} catch(error) {
console.error(error.message);
if (error.hasOwnProperty('response')) {
// The response body.
console.error(error.response.body);
}
}
})();
Scraping Multiple Pages
const scrape = require('./src');
(async () => {
const $ = await Promise.all([
scrape('https://example.com'),
scrape('https://httpbin.org/html')
]);
console.log({
heading_1: $[0]('h1').text(),
heading_2: $[1]('h1').text()
});
})();