@web-master/node-web-scraper
v0.10.0
Published
Scrape web as easy as possible
Downloads
147
Maintainers
Readme
Description
It scrapes the specific page :)
Installation
$ npm install --save @web-master/node-web-scraper
Usage
Basic
import scrape from '@web-master/node-web-scraper';
const data = await scrape({
target: 'http://example.com',
fetch: {
title: 'h1',
info: {
selector: 'p > a',
attr: 'href',
},
},
});
console.log(data);
// {
// title: 'Example Domain',
// info: 'http://www.iana.org/domains/example'
// }
Waitable (by using puppeteer
)
import scrape from '@web-master/node-web-scraper';
const data = await scrape({
target: 'https://news.ycombinator.com/item?id=20821022',
waitFor: 3 * 1000, // wait for the content loaded! (like single page apps)
fetch: {
title: '.title > a',
},
});
console.log(data);
// {
// title: 'How we reduced deployment times by 95%'
// }
TypeScript Support
import scrape from '@web-master/node-web-scraper';
interface WikiSite {
url: string;
}
interface Wikipedia {
sites: WikiSite[];
}
const wiki: Wikipedia = await scrape({
target: 'https://www.wikipedia.org',
fetch: {
sites: {
listItem: '.central-featured a.link-box',
data: {
url: {
attr: 'href',
convert: (x: string) => `https:${x}`,
},
},
},
},
});
console.log(wiki.sites);
// [
// { url: 'https://en.wikipedia.org/' },
// { url: 'https://ja.wikipedia.org/' },
// ...
// ...
// { url: 'https://de.wikipedia.org/' }
// ]