@web-master/node-web-crawler
v0.10.0
Published
Crawl web as easy as possible
Downloads
136
Maintainers
Readme
Description
It crawls the target page, collects links and scrapes data on each page :)
Installation
$ npm install --save @web-master/node-web-crawler
Usage
Basic
import crawl from '@web-master/node-web-crawler';
// crawl data on each link
const data = await crawl({
target: {
url: 'https://news.ycombinator.com',
iterator: {
selector: 'span.age > a',
convert: (x) => `https://news.ycombinator.com/${x}`,
},
},
fetch: () => ({
title: '.title > a',
}),
});
console.log(data);
// [
// { title: 'An easiest crawling and scraping module for NestJS' },
// { title: 'A minimalistic boilerplate on top of Webpack, Babel, TypeScript and React' },
// ...
// ...
// { title: '[Experimental] React SSR as a view template engine' }
// ]
Waitable (by using puppeteer
)
import crawl from '@web-master/node-web-crawler';
// crawl data on each link
const data = await crawl({
target: {
url: 'https://news.ycombinator.com',
iterator: {
selector: 'span.age > a',
convert: (x) => `https://news.ycombinator.com/${x}`,
},
},
waitFor: 3 * 1000, // wait for the content loaded! (like single page apps)
fetch: () => ({
title: '.title > a',
}),
});
console.log(data);
// [
// { title: 'An easiest crawling and scraping module for NestJS' },
// { title: 'A minimalistic boilerplate on top of Webpack, Babel, TypeScript and React' },
// ...
// ...
// { title: '[Experimental] React SSR as a view template engine' }
// ]
TypeScript Support
import crawl from '@web-master/node-web-crawler';
interface HackerNewsPage {
title: string;
}
const pages: HackerNewsPage[] = await crawl({
target: {
url: 'https://news.ycombinator.com',
iterator: {
selector: 'span.age > a',
convert: (x) => `https://news.ycombinator.com/${x}`,
},
},
fetch: () => ({
title: '.title > a',
}),
});
console.log(pages);
// [
// { title: 'An easiest crawling and scraping module for NestJS' },
// { title: 'A minimalistic boilerplate on top of Webpack, Babel, TypeScript and React' },
// ...
// ...
// { title: '[Experimental] React SSR as a view template engine' }
// ]