bquery
v0.4.0
Published
bquery is a useful node module to fetch web page, which use css selector to fetch and structure this html page content.
Downloads
24
Maintainers
Readme
bquery
Quick , simple and elegant way to fetch a web documents and structure it.
Installation
Latest release:
$ npm install bquery
var buquery = require("buqery");
bquery.query({
"url": "https://github.com/",
"selector": "ul.header-nav.left>li",
"extract": {
"title":{},
"url": {
"selector": "a",
"extract": "href"
}
}
}).then(function(docs){
console.log(docs);
//=> {"results":[{"result":[{"title":"Explore","url":"https://github.com/explore"},{"title":"Features","url":"https://github.com/features"},{"title":"Enterprise","url":"https://enterprise.github.com/"},{"title":"Blog","url":"https://github.com/blog"}]}]}
})
Options
bquery can sutomatic recognition the web document charset, but special circumstances you can also set docuemnt's charset.
var buquery = require("buqery");
bquery.query({
"url": "https://github.com/",
"selector": "ul.header-nav.left>li",
"charset": "utf-8",
"extract": {
"title":{},
"url": {
"selector": "a",
"extract": "href"
}
}
}).then(function(docs){
console.log(docs);
})
You can also set the timeout period for the request.
bquery.query({
"url": "https://github.com/",
"selector": "ul.header-nav.left>li>a",
"timeout": 3000
});
Sometimes you need to modify the page content link css, javascript or other content before you fetch the docuemnt content. you can use "preSelect" option.
bquery.query({
"url": "https://github.com/",
"selector": "ul.header-nav.left>li",
"preSelect": function($){ //=> $ is a cheerio object, you can do any operate wich base on cheerio
$("ul.header-nav.left>li").each(function(i, elem){
if($("a", elem).text() == "Explore"){
$(elem).remove()
}
});
},
"extract": {
"title":{},
"url": {
"selector": "a",
"extract": "href"
}
}
}).then(function(docs){
console.log(docs);
//=>[
//=> { title: 'Features', url: 'https://github.com/features' },
//=> { title: 'Enterprise', url: 'https://enterprise.github.com/' },
//=> { title: 'Blog', url: 'https://github.com/blog' }
//=>]
})
you can also use callback to modify selected attribute
{
"url": "https://github.com/",
"selector": "ul.header-nav.left>li",
"extract": {
"title":{
"extract": "text",
"callback": function(txt){
return "foo_" + txt;
}
},
"url": {
"selector": "a",
"extract": "href"
}
}
}