ngrams-search
v0.0.4
Published
search by Ngram similarity. An emitation of the python NGram module
Downloads
8
Maintainers
Readme
ngrams
nodejs module for searching by Ngram similarity of characters. An emitation of the python NGram module
basic usage
var NGrams = require('ngram-search');
var n = new NGrams() //default N=3 (size of ngram) w=1 (warp, use greater than 1 to increase the similarity of shorter string pairs)
n.add("spam"); //add single items
n.add(["span", "eg"]); //or an array of items
console.log(n.search("spa")); // second argument is optional - threshold - return only items with similarity greater than threshold. default is 0
/*
will output an array of items with similarity greater than threshold ordered by similarity
//[{
item: "spam",
similarity: 0.375
}, {
item: "span",
similarity: 0.375
}]
*/
n.getMaxNgram("spam");
/*
returns the item with the maximum ngram similarity or undefined if none
{
item: "spam",
similarity: 1.0
}
*/
more usage examples
var n = new NGrams(2); //create ngrams of size 2
n.pad("word"); //returns " word " padding is of size N-1
n.split("ab");
/*
returns the ngrams of the item "ab" after padding
[
[' ', 'a'],
['a', 'b'],
['b', ' ']
]
*/
n.getSharedNgrams("abe", "abc");
/*
returns all the ngrams that both items share:
[
[' ', 'a'],
['a', 'b']
]
*/
n.getCountSharedNgrams("abe", "abc"); // returns 2
n.getStatsSharedNgrams("abe", "abc");
/*
returns
{
all: 8, //count of all ngrams in both items
same: 2, //ngrams sahred by both items
distinct: 6, //count of distinct ngrams in total
diff: 4 //count of unique ngrams - which do not appear in both items
}
*/
n.compare("abe","abc"); //third argument is warp - optional, default is 1
/*
returns 0.3333333333333333
formula is: ((distinct ^ warp)-(diff ^ warp))/(distinct^warp)
*/
for more use cases look at test.js