jcrawler
v1.3.0
Published
Asynchronous control flow wrapper to crawl websites
Downloads
18
Maintainers
Readme
jcrawler
Asynchronous control flow wrapper to crawl websites
How to Install
npm install jcrawler
Usage
const jcrawler = require('jcrawler')
const puppeteer = require('puppeteer')
(async () => {
const crawler = jcrawler({
puppeteer,
concurrency: 2,
rateLimit: 1000, // 1 second
retries: 5,
retryInterval: 1000, // 1 second
backoff: 2, // multiplies the retryInterval for each retry
log: true
})
crawler
.on('data', data => console.log(data)) // events: data, error and end
.on('error', err => console.error(err))
.on('end', (data, results) => console.log(results.timer.time))
const fruits = ['apple', 'banana', 'orange']
await crawler.each(fruits, async (browser, page, fruit) => {
// using puppeteer
await page.goto('http://google.com')
await page.type("input[title='Search']", fruit)
await page.click("input[value=\"I'm Feeling Lucky\"]")
await page.screenshot({ path: `${fruit}.png`) })
})
})()