site-crawler
v1.0.4
Published
site crawler for node.
Downloads
3
Readme
Crawler
Simple site crawler for node.js
Install
npm install site-crawler
Example Codes
var Crawler = require('site-crawler')
var site = 'https://techcrunch.com'
var crawler = new Crawler({
// default is 10
concurrency:10
})
crawler
.on('found',function(url,next) {
var ok = url.startsWith(site)
if(ok) console.error('found:',url)
// set null argument for next if reject crawling this url.(or you can modify url)
next(ok ? url : null)
})
.on('crawl',function(url,res,$,next) {
// res is response object of resuest module
// $ is cheerio object
console.error('\tcrawl:',$('title').text())
next()
})
.on('error',function(url,err) {
console.error('\terror:',url,':',err.statusCode)
})
.on('complete',function() {
console.log('done.')
})
crawler.start(site)
Tests
cd crawler
npm test
Licence
MIT