scrubby
v0.0.4
Published
A simple CSV scrubber.
Downloads
3
Readme
Scrubby
A simple CSV scrubber.
Example Usage
Command line
We read from STDIN and write to STDOUT
./lead-activities.js < in/leadActivities.csv > out/lead_task.csv
lead-activities.js
Here is a JavaScript file that uses scrubby.
#!/usr/bin/env node
'use strict';
const scrub = require('scrubby');
const moment = require('moment');
const oppLookup = new scrub.Lookup();
const users = require('./in/userMap');
function doDate(s) {
if (s) {
let d = moment(s, 'M/D/YYYY');
if (d.isValid()) {
return d.format('YYYY-MM-DD');
}
}
return '';
}
oppLookup.build('./in/leadMap.csv', 'oldId', 'newId', (error, leadMap) => {
const header = [
'WhoId',
'Subject',
'OwnerId',
'TaskSubtype',
'ActivityDate',
'Priority',
'Status',
'Description'
];
const w = new scrub.Writer(header);
w.transform = (oldRow) => {
if (!oldRow['Lead ID']) {
// We don't want these rows. So they will be removed from the output.
return null;
}
const newRow = {};
newRow.WhoId = leadMap[oldRow['Lead ID']];
newRow.Subject = oldRow.Subject;
newRow.OwnerId = users[oldRow.Assigned];
newRow.TaskSubtype = oldRow['Task Subtype'];
newRow.ActivityDate = doDate(oldRow.Date);
newRow.Priority = oldRow.Priority;
newRow.Status = oldRow.Status;
newRow.Description = oldRow['Full Comments'];
return newRow;
};
(new scrub.Reader(w)).read();
});
API DOCS
// This is the guy that reads your CSV file and turns each row into JSON
// to make your life easy. We always read from STDIN.
class Reader {
// I need at least one writer to do anything. You can pass me a single
// writer or an array of writers. Multiple writers let you output a bunch
// of files from one input file.
constructor(writers) {
}
// Call my read method and away we go!
read() {
}
}
// So I take the row from the reader that has nicely been turned into JSON
// and transform it into an output row (or maybe I remove a row, or maybe
// I aggregate rows). Like gumby, I am very flexible.
class Writer {
// I need a header which is just an array of your output CSV file column
// names.
// file is optional. If you don't pass me a file name/path, I just write to
// STDOUT.
constructor(header, file) {
}
// For the most simple case where you just need to transform the input row
// to a single output row (and possibly remove some rows). Just
// implement this method.
// If you return null, we will remove the row from the output.
transform(inRow) {
}
// If you have more complex requirements than you can accomplish with
// just overriding transform, you can override this method and do
// whatever you want. Whatever you stuff into outRows gets output on flush.
onFlush() {
}
// Template method. Implement this if you have custom logic that aggregates
// multiple input rows. You will also need to implement onFlush as well.
shouldFlush(row) {
}
// I get called by my buddy the Reader. You don't need to worry about me
// I know what I am doing.
// If final is true we have processed all the rows and are calling flush
// for the last time.
flush(final) {
}
}
// Build a key value lookup from a CSV file.
//
// Loads the whole thing in memory.
//
// myfile.csv
//
// Name,Rank,Serial
// Bill,Private,1234
// Bob,General,7890
//
// keyColName is required. This is the column that will be the key.
//
// valColName is optional.
//
// If you don't pass a valColName, you will receive the entire CSV row
// as a JSON as the value.
//
// Example: { Bill: { Name: 'Bill', Rank: 'Private', Serial: '1234' } }
//
// If you pass valColName, it can be either a single column name or
// an array of column names.
//
// If you pass a single valColName you will receive just a key value.
//
// Example
//
// build('myfile.csv', 'Name', 'Rank', cb);
//
// { Bill: 'Private' }
//
// You can also pass an array of column names for valColName.
//
// Example
//
// build('myfile.csv', 'Name', ['Rank','Serial'], cb);
//
// { Bill: { Rank: 'Private', Serial: '1234' } }
class Lookup {
// I will call your cb with the map you need.
build(file, keyColName, valColName, cb) {
}
// Or you can call my get method. Whichever you prefer.
get(key) {
}
}
Multiple Lookups with Async
If you have to do a bunch of lookups, it's probably nicer to use async series.
#!/usr/bin/env node
'use strict';
const scrub = require('scrubby');
const moment = require('moment');
const series = require('async').series;
const oppLookup = new scrub.Lookup();
const dadLookup = new scrub.Lookup();
const bookLookup = new scrub.Lookup();
const users = require('./in/userMap');
function doDate(s) {
if (s) {
let d = moment(s, 'M/D/YYYY');
if (d.isValid()) {
return d.format('YYYY-MM-DD');
}
}
return '';
}
const header = [
'WhatId',
'DadName',
'OwnerId',
'TaskSubtype',
'ActivityDate',
'BookTitle',
'Status',
'Description'
];
const w = new scrub.Writer(header);
w.transform = (oldRow) => {
const newRow = {};
newRow.WhatId = oppLookup.get(oldRow['Organization ID']);
newRow.DadName = dadLookup.get(oldRow['Dad ID']);
newRow.OwnerId = users[oldRow.Assigned];
newRow.TaskSubtype = oldRow['Task Subtype'];
newRow.ActivityDate = doDate(oldRow.Date);
newRow.BookTitle = bookLookup.get(oldRow.ISBN);
newRow.Status = oldRow.Status;
newRow.Description = oldRow['Full Comments'];
return newRow;
};
series(
[
oppLookup.build.bind(oppLookup, './in/accountMap.csv', 'oldId', 'newId'),
dadLookup.build.bind(dadLookup, './in/dadMap.csv', 'id', 'name'),
bookLookup.build.bind(bookLookup, './in/bookMap.csv', 'ISBN', 'title'),
() => { (new scrub.Reader(w)).read(); }
]
);