dqtokenizer
v1.0.6
Published
Tokenize strings with double quotes, with embedded escaped, backslashed double quotes
Downloads
7
Maintainers
Readme
What is this?
Tokenizes input string with double quote and single quote with rich set of options. Handles escaped quote within each. Supports single-char tokens and additional word boundaries.
Installation
npm i dqtokenizer --save
Example
Code
const dqtokenizer = require('dqtokenizer');
const str = 'eval hie "SUBSET(?x, allComponents(SQ_Server_MTM), Leftstr(?x.partNum, 2) == \\"90\\")" print nohia attributes "partNum"';
const tokens = dqtokenizer.tokenize(str, {
includeDoubleQuote: false,
removeBackslashOfInternalDoubleQuote: true
});
console.log(`str: ${str}`);
console.log(`tokens: ${tokens}`);
Output
str: eval hie "SUBSET(?x, allComponents(SQ_Server_MTM), Leftstr(?x.partNum, 2) == \"90\")" print nohia attributes "partNum"
tokens: eval,hie,SUBSET(?x, allComponents(SQ_Server_MTM), Leftstr(?x.partNum, 2) == "90"),print,nohia,attributes,partNum
More Examples
Code
const dqtokenizer = require('dqtokenizer');
const testTokenize = (str, options) => {
const tokens = dqtokenizer.tokenize(str, options);
console.log();
console.log(`str: ${str}`);
console.log(`tokens:`);
tokens.forEach((token, index) => console.log(`\t${index}: ${token}`));
}
// Example 1
testTokenize('eval hie "SUBSET(?x, allComponents(SQ_Server_MTM), Leftstr(?x.partNum, 2) == \\"90\\")" print nohia attributes "partNum"');
// Example 2
testTokenize('eval hie "SUBSET(?x, allComponents(SQ_Server_MTM), Leftstr(?x.partNum, 2) == \\"90\\")" print nohia attributes "partNum"', {
includeDoubleQuote: false,
removeBackslashOfInternalDoubleQuote: true,
});
// Example 3
testTokenize(`{"letters" : '321"}{}"'}{'{}{{}"': "stack{}}{"}`, {
additionalBoundaryChars: [],
singleCharTokens: ['(', ')', '{', '}', '[', ']', ':'],
});
Output:
str: eval hie "SUBSET(?x, allComponents(SQ_Server_MTM), Leftstr(?x.partNum, 2) == \"90\")" print nohia attributes "partNum"
tokens:
0: eval
1: hie
2: "SUBSET(?x, allComponents(SQ_Server_MTM), Leftstr(?x.partNum, 2) == \"90\")"
3: print
4: nohia
5: attributes
6: "partNum"
str: eval hie "SUBSET(?x, allComponents(SQ_Server_MTM), Leftstr(?x.partNum, 2) == \"90\")" print nohia attributes "partNum"
tokens:
0: eval
1: hie
2: SUBSET(?x, allComponents(SQ_Server_MTM), Leftstr(?x.partNum, 2) == "90")
3: print
4: nohia
5: attributes
6: partNum
str: {"letters" : '321"}{}"'}{'{}{{}"': "stack{}}{"}
tokens:
0: {
1: "letters"
2: :
3: '321"}{}"'
4: }
5: {
6: '{}{{}"'
7: :
8: "stack{}}{"
9: }
Options
dqtokenizer supports a second paramater, which is optional. It contains following options:
- additionalBoundaryChars - _array of chars (Defaults to [',', ';'])
- singleCharTokens - _array of chars (Defaults to ['(', ')', '{', '}', '[', ']', ':'])
- includeDoubleQuote - boolean (Defaults to true)
- includeSingleQuote - boolean (Defaults to true)
- removeBackslashOfInternalDoubleQuote - boolean (Defaults to false)
- removeBackslashOfInternalSingleQuote - boolean (Defaults to false)
- fillMissingLastDoubleQuote - boolean (Defaults to true)
- fillMissingLastSingleQuote - boolean (Defaults to true)
- debug - boolean (Defaults to false)