lucene-tokenizers
v5.3.3
Published
Port from Apache Lucene
Downloads
428
Readme
Install:
npm install lucene-tokenizers
Example:
var lt = require('lucene-tokenizers');
var ts = new lt.StandardTokenizer();
ts.setReader(new lt.StringReader("Hello World!"));
var res = [], token = null;
while ((token = ts.incrementToken()) !== null) {
var pretty_token = {};
for (var prop in token) {
if(token.hasOwnProperty(prop)) {
pretty_token[prop.substring('_$esjava$'.length)] = token[prop];
}
}
res.push(pretty_token);
}
console.log(JSON.stringify(res, null, 2));
Output:
[
{
"text": "Hello",
"type": "<ALPHANUM>",
"start": 0,
"positionIncrement": 1
},
{
"text": "World",
"type": "<ALPHANUM>",
"start": 6,
"positionIncrement": 1
}
]