kiwis
v0.0.8
Published
A Pandas-inspired data wrangling toolkit in JavaScript
Downloads
1
Maintainers
Readme
Kiwis 🥝
A Pandas-inspired data wrangling toolkit in JavaScript
Installation
npm install kiwis
Getting started
const kw = require('kiwis');
const h2g2Characters = kw.DataFrame([
{
name: 'Marvin',
surname: '',
occupation: 'Paranoid Android'
},
{
name: 'Zaphod',
surname: 'Beeblebrox',
occupation: 'President of the Galaxy'
},
{
name: 'Arthur',
surname: 'Dent',
occupation: null
}
]);
h2g2Characters.show();
/*
| name | surname | occupation
=================================================
0 | Marvin | N/A | Paranoid Android
1 | Zaphod | Beeblebrox | President of the Galaxy
2 | Arthur | Dent | N/A
[3 rows × 3 columns]
Columns: name, surname, occupation
*/
console.log(h2g2Characters.get(1));
/*
{
name: 'Zaphod',
surname: 'Beeblebrox',
occupation: 'President of the Galaxy'
}
*/
h2g2Characters.name.show();
/*
0 | Marvin
1 | Zaphod
2 | Arthur
Length: 3
*/
Documentation
Table of Contents
- Kiwis
- DataFrame
- Series
- PivotTable
Kiwis
DataFrame
Returns a new DataFrame from the given data
Parameters
Examples
const kw = require('kiwis');
const df = kw.DataFrame([
{
name: 'Marvin',
surname: '',
occupation: 'Paranoid Android'
},
{
name: 'Zaphod',
surname: 'Beeblebrox',
occupation: 'President of the Galaxy'
},
{
name: 'Arthur',
surname: 'Dent',
occupation: null
}
]);
console.log(df.length) // 3
console.log(df.columns) // ['name', 'surname', 'occupation']
console.log(df.empty) // false
Returns DataFrame
Series
Returns a new Series from the given data
Parameters
data
Array<any> An array of values
Examples
const kw = require('kiwis');
const series = kw.Series([1, 1, 2, 3, 5, 8, 13, 21, 34]);
console.log(series.length) // 9
console.log(series.empty) // false
Returns Series
loadCSV
Loads a CSV file into a DataFrame
Parameters
Examples
const kw = require('kiwis');
// Loads a CSV file
const df = kw.loadCSV('myAwesomeData.csv');
// Loads a TSV file and prettify the columns in camelCase
const df = kw.loadCSV('myAwesomeData.tsv', { delimiter: '\t', prettify; 'camelCase' });
Returns DataFrame
parseCSV
Parses a CSV string into a DataFrame
Parameters
csv
string CSV string to parseoptions
Object? Options (optional, default{}
)options.delimiter
string Delimiter of the file (optional, default','
)options.prettify
("none"
|"camelCase"
|"snake_case"
) Prettify column names (optional, default'none'
)
Examples
const kw = require('kiwis');
// Parses a CSV string
const df = kw.parseCSV(`
name,surname,occupation\n
Marvin,,Paranoid Android\n
Zaphod,Beeblebrox,President of the Galaxy\n
Arthur,Dent,\n
`);
Returns DataFrame
isNA
Determines whether a value is N/A or not
Parameters
value
anyoptions
Object? Options (optional, default{}
)options.keep
Array<any> Array of falsy values not considered N/A (optional, default[0,false]
)
Examples
Kiwis.isNA('kiwi'); // false
Kiwis.isNA(''); // true
Kiwis.isNA('', { keep: [0, false, ''] }); // false
Returns boolean
DataFrame
Properties
length
number The number of rows in the DataFrameempty
boolean Whether the DataFrame contains any row or notcolumns
Array<string> The columns of the DataFrame
toArray
Returns the DataFrame as an array
clone
Clones the DataFrame
Returns DataFrame
get
Returns any row of the DataFrame
Parameters
index
number
Examples
// Returns the row at index 4
df.get(4);
Returns Object
first
Returns the first row of the DataFrame
Returns Object
last
Returns the last row of the DataFrame
Returns Object
find
Returns a specific row in the DataFrame
Parameters
condition
callback The returned row is the first one that matches this condition
Examples
// Returns the row where the 'name' is 'Marvin'
df.find(row => row.name === 'Marvin');
Returns Object
set
Sets the content of a cell in the DataFrame
Parameters
Examples
// Sets the value for 'name' on the 42nd row to 'Slartibartfast'
df.set(42, 'name', 'Slartibartfast');
head
Returns a new DataFrame containing the first N rows of the DataFrame
Parameters
n
number Number of rows to select (optional, default5
)
Examples
// Returns a new DataFrame with the first 10 rows
df.head(10);
Returns DataFrame
tail
Returns a new DataFrame containing the last N rows of the DataFrame
Parameters
n
number Number of rows to select (optional, default5
)
Examples
// Returns a new DataFrame with the last 5 rows
df.tail();
Returns DataFrame
slice
Returns a new DataFrame with a slice of the original rows
Parameters
start
number Zero-based index at which to start extractionend
number Zero-based index before which to end extraction (optional, defaultDataFrame.length
)
Examples
// Returns a new DataFrame with rows starting at index 10
df.slice(10);
// Returns a new DataFrame with rows between index 24 (included) and 42 (excluded)
df.slice(24, 42);
Returns DataFrame
rows
Returns the rows of the DataFrame as an iterable
Examples
for (let row of df.rows()) {
console.log(row);
}
Returns Iterable<Object>
items
Returns an array of index/row pairs as an iterable
Examples
for (let [index, row] of df.items()) {
console.log(index, row);
}
Returns Iterable<Array<number, Object>>
forEach
Applies a callback function to each row of the DataFrame
Parameters
callback
callback
Examples
// Displays each element in the 'name' column of the DataFrame
df.forEach(row => console.log(row.name));
map
Returns a new Series populated with the results of a callback function applied on each row the DataFrame
Parameters
callback
callback
Examples
// Returns a Series of full names by joining the name and surname for each row of the DataFrame
df.map(row => [row.name, row.surname].join(' '));
Returns Series
replace
Replaces all occurences of the given value in the DataFrame by another value
Parameters
oldValue
anynewValue
anyoptions
Object? (optional, default{}
)
Examples
// Replaces all occurrences of 'panda' with 'kiwi' in the column 'animal'
df.replace('panda', 'kiwi', { inPlace: true, columns: 'animal' });
Returns DataFrame
append
Appends new rows to a DataFrame
Parameters
rows
(Object | Array<Object>) Row or array of rows to append to the DataFrameoptions
Object? (optional, default{}
)options.extend
boolean Adds new columns to the DataFrame if they do not already exist (optional, defaultfalse
)
Examples
const rows = [
{
name: 'Marvin',
occupation: 'Robot'
},
{
name: 'Zaphod Beeblebrox',
occupation: 'President of the Galaxy'
}
];
df.append(rows, { extend: true });
Returns DataFrame
insert
Inserts new rows into a DataFrame
Parameters
rows
(Object | Array<Object>) Row or array of rows to insert into the DataFrameindex
number Index to insert the rows at (optional, default0
)options
Object? (optional, default{}
)options.extend
boolean Adds new columns to the DataFrame if they do not already exist (optional, defaultfalse
)
Examples
// Inserts a new row at index 2 in the DataFrame
df.insert({ name: 'Trillian', species: 'human' }, 2, { extend: true });
Returns DataFrame
concat
Concatenates another DataFrame to the DataFrame
Parameters
Examples
// Concatenates df1 and df2, adding columns from df2 into df1 if they do not exist
df1.concat(df2, { inPlace: true, extend: true });
Returns DataFrame
join
Performs a join of two DataFrames on a given column
Parameters
other
DataFramecolumn
string Column to join the DataFrames onoptions
Object? (optional, default{}
)options.how
("inner"
|"outer"
|"left"
|"right"
) How the DataFrames should be joined:'inner'
only keeps the intersection of the rows,'outer'
keeps the union of the rows,'left'
only keeps rows from the current DataFrame, and'right'
only keeps rows from theother
DataFrame (optional, default'inner'
)options.inPlace
boolean Changes the current DataFrame instead of returning a new one (optional, defaultfalse
)
Examples
// Joins DataFrames df1 and df2 along their column 'id', keeping only the rows from df1
df1.join(df2, 'id', { inPlace: true, how: 'left' });
Returns DataFrame
addColumn
Adds a new column to the DataFrame
Parameters
name
string Name of the new columncolumn
(any | Array<any> | Series) Content of the new column as an array, a Series or any value (to be set on every rows)options
Object? (optional, default{}
)
Examples
// Adds a new column 'fullName' by applying a function on the DataFrame
df.addColumn(
'fullName',
df.map(row => [row.name, row.surname].join(' ')),
{ inPlace: true }
);
// Adds a new column 'species', with 'human' on every rows
df.addColumn('species', 'human', { inPlace: true });
Returns DataFrame
rename
Rename columns of the DataFrame
Parameters
map
Object<key, string> Map of the columns to rename to their new namesoptions
Object? (optional, default{}
)options.inPlace
boolean Changes the current DataFrame instead of returning a new one (optional, defaultfalse
)
Examples
// Renames column 'occupation' into 'job'
df.rename({ occupation: 'job' }, { inPlace: true });
Returns DataFrame
reorder
Reorder the columns of the DataFrame
Parameters
names
Array<string> Array containing the new order of the columnsoptions
Object? (optional, default{}
)options.inPlace
boolean Changes the current DataFrame instead of returning a new one (optional, defaultfalse
)
Examples
console.log(df.columns) // ['occupation', 'species', 'name']
df.reorder(['name', 'occupation', 'species'], { inPlace: true });
console.log(df.columns) // ['name', 'occupation', 'species']
Returns DataFrame
dropNA
Drops N/A values from the DataFrame
Parameters
options
Object? (optional, default{}
)options.axis
("rows"
|"columns"
) Determines whether rows or columns should be dropped (optional, default'rows'
)options.keep
Array<any> Array of falsy values to keep in the DataFrame (optional, default[0,false]
)options.inPlace
boolean Changes the current DataFrame instead of returning a new one (optional, defaultfalse
)
Examples
// Drops all rows containg N/A values
df.dropNA({ inPlace: true });
// Drops all columns containing N/A values (but keeps empty strings as well as 0 and false)
df.dropNA({ axis: 'columns', keep: [0, false, ''], inPlace: true });
Returns DataFrame
dropDuplicates
Drops duplicate rows from the DataFrame
Parameters
options
Object? (optional, default{}
)
Examples
// Drops duplicate rows with similar values for 'name'
df.dropDuplicates({ columns: 'name', inPlace: true });
Returns DataFrame
filter
Filters columns or rows of the DataFrame
Parameters
filter
(callback | Array<string>) Can be a callback (applied to rows or columns) or an array of column names to keepoptions
Object? (optional, default{}
)options.axis
("rows"
|"columns"
) Determines whether the callback should apply to rows or columns (optional, default'rows'
)options.inPlace
boolean Changes the current DataFrame instead of returning a new one (optional, defaultfalse
)
Examples
// Only keeps the 'date' and 'url' columns
df.filter(['date', 'url'], { inPlace: true });
// Only keeps rows whose date is 4/20/20
df.filter(row => row.date === '2020-04-20', { inPlace: true });
// Only keeps columns whose name contains 'data'
df.filter(column => column.includes('data'), { axis: 'columns', inPlace: true });
Returns DataFrame
drop
Drops columns or rows from the DataFrame
Parameters
filter
(callback | Array<string>) Can be a callback (applied to rows or columns) or an array of column names to dropoptions
Object? (optional, default{}
)options.axis
("rows"
|"columns"
) Determines whether the callback should apply to rows or columns (optional, default'rows'
)options.inPlace
boolean Changes the current DataFrame instead of returning a new one (optional, defaultfalse
)
Examples
// Removes the 'date' and 'url' columns
df.drop(['date', 'url'], { inPlace: true });
// Removes all rows whose date is 4/20/20
df.drop(row => row.date === '2020-04-20', { inPlace: true });
// Removes columns whose name contains 'data'
df.drop(column => column.includes('data'), { axis: 'columns', inPlace: true });
Returns DataFrame
sort
Sorts the DataFrame
Parameters
by
(string | Array<string>) Key or array of keys to sort the DataFrame byoptions
Object? (optional, default{}
)
Examples
// Sorts the DataFrame alphabetically by 'name'
df.sort('name', { inPlace: true });
// Sorts the DataFrame in descending ordr by 'age'
df.sort('age', { reverse: true, inPlace: true });
Returns DataFrame
shuffle
Shuffles the rows or columns of a DataFrame
Parameters
options
Object? (optional, default{}
)options.axis
("rows"
|"columns"
) Determines whether rows or columns should be shuffled (optional, default'rows'
)options.inPlace
boolean Changes the current DataFrame instead of returning a new one (optional, defaultfalse
)
Examples
// Shuffles the columns of the DataFrame
df.shuffle({ axis: 'columns', inPlace: true });
Returns DataFrame
pivot
Returns a PivotTable along the given columns
Parameters
Examples
// Returns a PivotTable along columns 'sector' and 'date'
df.pivot(['sector', 'date']);
Returns PivotTable
toString
Formats the DataFrame for display
Returns string
show
Displays the DataFrame
toCSV
Exports the DataFrame as CSV
Parameters
path
string Path of the file to save (optional, defaultnull
)options
Object? (optional, default{}
)options.delimiter
string Delimiter to use (optional, default','
)
Examples
df.toCSV('myAwesomeData.csv'); // to CSV
df.toCSV('myAwesomeData.tsv', { delimiter: '\t' }); // to TSV
Returns (string | undefined) A CSV string if path
is not set
toJSON
Exports the DataFrame as JSON
Parameters
path
string Path of the file to save (optional, defaultnull
)options
Object? (optional, default{}
)options.prettify
boolean Prettify JSON output (optional, defaulttrue
)
Examples
df.toJSON('myAwesomeData.json');
Returns (string | undefined) A JSON string if path
is not set
Series
Properties
length
number The number of values in the Seriesempty
boolean Whether the Series contains any value or not
toArray
Returns the Series as an array
Returns Array<any>
clone
Clones the Series
Returns Series
get
Returns any row of the Series
Parameters
index
number
Examples
// Returns the value at index 4
series.get(4);
Returns any
first
Returns the first value of the Series
Returns any
last
Returns the last value of the Series
Returns any
find
Returns a specific value in the Series
Parameters
condition
callback The returned value is the first one that matches this condition
Examples
// Returns the value that contains 'fast'
series.find(value => value.includes('fast'));
Returns any
set
Sets a value in the Series
Parameters
index
numbervalue
any
Examples
// Sets the 42nd value of the Series to 'Slartibartfast'
df.set(42, 'Slartibartfast');
head
Returns a new Series containing the first N values of the Series
Parameters
n
number Number of values to select (optional, default5
)
Examples
// Returns a new Series with the first 10 values
series.head(10);
Returns Series
tail
Returns a new Series containing the last N values of the Series
Parameters
n
number Number of values to select (optional, default5
)
Examples
// Returns a new Series with the last 5 values
series.tail();
Returns Series
slice
Returns a new Series with a slice of the original values
Parameters
start
number Zero-based index at which to start extractionend
number Zero-based index before which to end extraction (optional, defaultSeries.length
)
Examples
// Returns a new Series with values starting at index 10
series.slice(10);
// Returns a new Series with values between index 24 (included) and 42 (excluded)
series.slice(24, 42);
Returns Series
values
Returns the values of the Series as an iterable
Examples
for (let value of series.values()) {
console.log(value);
}
Returns Iterable<any>
items
Returns an array of index/value pairs as an iterable
Examples
for (let [index, value] of series.items()) {
console.log(index, value);
}
Returns Iterable<Array<number, any>>
forEach
Applies a callback function to each value of the Series
Parameters
callback
callback
Examples
// Displays each value of the Series
series.forEach(value => console.log(value));
map
Returns a new Series populated with the results of a callback function applied on the Series
Parameters
callback
callback
Examples
// Double each value in the Series
series.map(value => value * 2);
Returns Series
append
Appends new values to a Series
Parameters
Examples
series.append([42, 101]);
Returns Series
insert
Inserts new values into a Series
Parameters
values
(any | Array<any>) Value or array of values to insert into the Seriesindex
number Index to insert the values at (optional, default0
)
Examples
// Inserts value 42 at index 2 in the Series
series.insert(42, 2);
Returns Series
concat
Concats another Series to the Series
Parameters
other
Seriesoptions
Object? (optional, default{}
)options.inPlace
boolean Changes the current Series instead of returning a new one (optional, defaultfalse
)
Examples
// Concatenates series1 and series2
series1.concat(series2, { inPlace: true });
Returns Series
dropNA
Drops N/A values from the Series
Parameters
options
Object? (optional, default{}
)
Examples
// Drop all N/A values from the Series
series.dropNA({ inPlace: true });
// Drop all N/A values but keep empty strings
series.dropNA({ keep: [''], inPlace: true });
Returns Series
dropDuplicates
Drops duplicate values from the Series
Parameters
options
Object? (optional, default{}
)options.inPlace
boolean Changes the current Series instead of returning a new one (optional, defaultfalse
)
Examples
series.dropDuplicates({ inPlace: true });
Returns Series
any
Returns true if any value of the series satisfies the given condition
Parameters
condition
callback (optional, default!Kiwis.isNA
)
Examples
// Returns true if any value is not N/A
series.any();
// Returns true if any value is greater than 42
series.any(value => value > 42);
Returns boolean
all
Returns true if all values of the series satisfy the given condition
Parameters
condition
callback (optional, default!Kiwis.isNA
)
Examples
// Returns true if all values are not N/A
series.all();
// Returns true if all values are greater than 42
series.all(value => value > 42);
Returns boolean
filter
Filters values of the Series
Parameters
filter
callback Callback to applyoptions
Object? (optional, default{}
)options.inPlace
boolean Changes the current Series instead of returning a new one (optional, defaultfalse
)
Examples
// Only keeps values greater than 42
series.filter(value => value > 42, { inPlace: true });
Returns Series
drop
Drops values from the Series
Parameters
filter
callback Callback to applyoptions
Object? (optional, default{}
)options.inPlace
boolean Changes the current Series instead of returning a new one (optional, defaultfalse
)
Examples
// Only drops values greater than 42
series.drop(value => value > 42, { inPlace: true });
Returns Series
sort
Sorts the Series
Parameters
options
Object? (optional, default{}
)
Examples
// Sorts the Series in descending order
series.sort({ reverse: true, inPlace: true });
Returns Series
shuffle
Shuffles the values of a Series
Parameters
options
Object? (optional, default{}
)options.inPlace
boolean Changes the current Series instead of returning a new one (optional, defaultfalse
)
Examples
series.shuffle({ inPlace: true });
Returns Series
unique
Returns the unique values in the Series as an array
Returns Array<any>
counts
Returns the number of occurrences for each value in the Series
Parameters
options
Object? (optional, default{}
)
Examples
// Returns the number of occurrences for each value in the Series, in ascending order
series.counts({ reverse: false });
Returns Array<[any, number]> Counts as an array of of value/count pairs
frequencies
Returns the frequency for each value in the Series
Parameters
options
Object? (optional, default{}
)
Examples
// Returns the frequency for each value in the Series, in ascending order
series.frequencies({ reverse: false });
Returns Array<[any, number]> Counts as an array of value/frequency pairs
round
Round the values in the Series
Parameters
digits
number Number of digits for rounding (optional, default0
)options
Object? (optional, default{}
)options.inPlace
boolean Changes the current Series instead of returning a new one (optional, defaultfalse
)
Examples
// Rounds all values in the digits to 2 decimal points
series.round(2, { inPlace: true });
// Truncates all values in the Series
series.round(0, { inPlace: true });
Returns Series
reduce
Returns a single reduced value after applying the given callback to the values of the Series
Parameters
callback
callbackinitial
any Value to use as the first argument to the first call of the callback (optional, defaultSeries.first()
)
Examples
// Returns the sum of all values in the Series
series.reduce((acc, value) => acc + value); // Equivalent to series.sum()
// Returns the product of all values in the Series
series.reduce((acc, value) => acc * value, 1);
Returns any
sum
Returns the sum of the values in the Series
Returns number
min
Returns the minimum value in the Series
Returns number
max
Returns the maximum value in the Series
Returns number
extent
Returns the extent of the Series
mean
Returns the mean of the values in the Series
Returns number
median
Returns the median of the values in the Series
Returns number
std
Returns the standard deviation of the values in the Series
Returns number
toString
Formats the Series for display
Returns string
show
Displays the Series
toCSV
Exports the Series as CSV
Parameters
path
string Path of the file to save (optional, defaultnull
)options
Object? (optional, default{}
)options.name
string Column name to use (optional, default'series'
)
Examples
series.toCSV('myAwesomeData.csv', { name: 'awesome' });
Returns (string | undefined) A JSON string if path
is not set
toJSON
Exports the Series as a JSON file
Parameters
Examples
series.toJSON('myAwesomeData.json', { name: 'awesome' });
Returns (string | undefined) A JSON string if path
is not set
PivotTable
Properties
length
number The number of rows in the PivotTableempty
boolean Whether the PivotTable contains any row or notcolumns
Array<string> The columns of the PivotTable, starting with the pivots
rollup
Applies the given callback function on the leaves of the PivotTable, returning a DataFrame
Parameters
callback
callbackoptions
Object? (optional, default{}
)options.name
string Name to use for the column in the output DataFrame (optional, default'data'
)
Examples
// For each leaf, computes the number of rows where 'score' is greater than 3
df.pivot(['sector', 'date']).rollup(
leaf => leaf.filter(row => row.score > 3).length,
{ name: 'nbHighScores' }
);
Returns DataFrame
count
Counts the number of leaves for each branch of the PivotTable
Examples
df.pivot(['sector', 'date']).count();
Returns DataFrame
sum
Computes the sum of a given column of the PivotTable
Parameters
column
Examples
// For each leaf, computes the sum of the column 'score'
df.pivot(['sector', 'date']).sum('score');
Returns DataFrame
min
Computes the minimum value of a given column of the PivotTable
Parameters
column
Examples
// For each leaf, computes the minimum of the column 'score'
df.pivot(['sector', 'date']).min('score');
Returns DataFrame
max
Computes the maximum value of a given column of the PivotTable
Parameters
column
Examples
// For each leaf, computes the maximum of the column 'score'
df.pivot(['sector', 'date']).max('score');
Returns DataFrame
mean
Computes the mean of a given column of the PivotTable
Parameters
column
Examples
// For each leaf, computes the mean of the column 'score'
df.pivot(['sector', 'date']).mean('score');
Returns DataFrame
median
Computes the median of a given column of the PivotTable
Parameters
column
Examples
// For each leaf, computes the median of the column 'score'
df.pivot(['sector', 'date']).median('score');
Returns DataFrame
std
Computes the standard deviation of a given column of the PivotTable
Parameters
column
Examples
// For each leaf, computes the standard deviation of the column 'score'
df.pivot(['sector', 'date']).std('score');
Returns DataFrame
toString
Formats the PivotTable for display
Returns string
show
Displays the DataFrame
toJSON
Exports the PivotTable as JSON
Parameters
path
string Path of the file to save (optional, defaultnull
)options
Object? (optional, default{}
)options.prettify
boolean Prettify JSON output (optional, defaulttrue
)
Examples
pivotTable.toJSON('myPivotTable.json');
Returns string