dedupe-elasticsearch
v1.0.2
Published
Utility to search and destroy duplicate records
Downloads
4
Maintainers
Readme
dedupe-elasticsearch
JS version of https://github.com/alexander-marquardt/deduplicate-elasticsearch
Use WITH EXTREME CAUTION like:
const { getAllDuplicates, deleteAllDuplicates } = require('dedupe-elasticsearch');
const localClient = new Client({ node: 'http://localhost:9200' });
const indexName = 'node-test5';
// Returns map of {hashKey: [esId1, esId2, esId3], hashKey2: [esId4, esId5]}
const duplicates = await getAllDuplicates(localClient, indexName, ["character", "quote"]);
// Returns an esClient.bulk response
const deleteResponse = await deleteAllDuplicates(localClient, indexName, duplicates);
// Returns an esClient.bulk response
const findDeleteResponse = await findDeleteDuplicates(localClient, indexName, ["character", "quote"]);
// Pass an optional query to delete duplicates in a time range
const timeRangeQuery = {
"query": {
"bool": {
"must": [
{
"range": {
"@timestamp": {
"format": "strict_date_optional_time",
"gte": `${start.toISOString()}`,
"lte": `${stop.toISOString()}`
}
}
}
]
}
}
};
const findDeleteResponse = await findDeleteDuplicates(localClient, indexName, ["character", "quote"], timeRangeQuery);