mycrawl
v0.1.19
Published
craw a definite web
Downloads
5
Readme
mycrawl
Developed for yiqicha.net to crawl compamny registration information in Shanghai that is based on the website of http://www.sgs.gov.cn/shaic/
. Any question is welcomed, contract [email protected]
This crawler can get company registration information , company certifaction process information.You should provide search parameters to crawler when use this module.
How to use?
- use npm to get the latest package
npm install mycrawl
- generate you crawler
var Crawler = require('mycrawl').Crawler;
var crawler = new Crawler();
API
searchCompanyInformation
registrationOptions = {
homeRefererUrl: 'http://www.sgs.gov.cn/lz/etpsInfo.do?method=index', // The referer url
registrationResultsUrl: 'http://www.sgs.gov.cn/lz/etpsInfo.do?method=doSearch', // results for keywords url
registrationDetailUrl: 'http://www.sgs.gov.cn/lz/etpsInfo.do?method=viewDetail' // url for keywords detail
}
var keywords = '上海东风';
crawler.searchCompanyInformation(registrationOptions, keywords, function(err, registrationResults) {
//you handle...
});
getMoreRegistrations
crawler.getMoreRegistrations(registrationOptions, keywords, allpageNo, pageNo, function(err, moreRegistrations) {
callback(err, moreRegistrations)
})
searchCompanyNameStatus
var companyStatusOptions = {
targetUrl: 'http://www.sgs.gov.cn/shaic/workonline/appStat!toNameAppList.action'
};
var keywords = '美孕国际'
crawler.searchCompanyNameStatus(companyStatusOptions, keyword, function(err, companyNameStatusInfo) {
log(keyword, err, companyNameStatusInfo)
})
searchRegistrationStatus
var registrationStatusOption = {
targetUrl : 'http://www.sgs.gov.cn/shaic/workonline/appStat!toEtpsAppList.action'
}
var keywords = '上海顺风';
crawler.searchRegistrationStatus(registrationStatusOption, keyword, function(err, registrationStatusInfo) {
log(keyword, err, registrationStatusInfo)
})
getRegistrationDisclosure
registration disclosure information
var options = {
keyword: '310114000233023' // 统一社会信用代码/注册号
}
getCompanyDisclosure
This is the same as getRegistrationDisclosure api , but this is for company disclosure information