dqtokenizer
v1.0.6
Published
Tokenize strings with double quotes, with embedded escaped, backslashed double quotes
Downloads
21
Maintainers
Readme
What is this?
Tokenizes input string with double quote and single quote with rich set of options. Handles escaped quote within each. Supports single-char tokens and additional word boundaries.
Installation
npm i dqtokenizer --save
Example
Code
const dqtokenizer = require('dqtokenizer');
const str = 'eval hie "SUBSET(?x, allComponents(SQ_Server_MTM), Leftstr(?x.partNum, 2) == \\"90\\")" print nohia attributes "partNum"';
const tokens = dqtokenizer.tokenize(str, {
includeDoubleQuote: false,
removeBackslashOfInternalDoubleQuote: true
});
console.log(`str: ${str}`);
console.log(`tokens: ${tokens}`);
Output
str: eval hie "SUBSET(?x, allComponents(SQ_Server_MTM), Leftstr(?x.partNum, 2) == \"90\")" print nohia attributes "partNum"
tokens: eval,hie,SUBSET(?x, allComponents(SQ_Server_MTM), Leftstr(?x.partNum, 2) == "90"),print,nohia,attributes,partNum
More Examples
Code
const dqtokenizer = require('dqtokenizer');
const testTokenize = (str, options) => {
const tokens = dqtokenizer.tokenize(str, options);
console.log();
console.log(`str: ${str}`);
console.log(`tokens:`);
tokens.forEach((token, index) => console.log(`\t${index}: ${token}`));
}
// Example 1
testTokenize('eval hie "SUBSET(?x, allComponents(SQ_Server_MTM), Leftstr(?x.partNum, 2) == \\"90\\")" print nohia attributes "partNum"');
// Example 2
testTokenize('eval hie "SUBSET(?x, allComponents(SQ_Server_MTM), Leftstr(?x.partNum, 2) == \\"90\\")" print nohia attributes "partNum"', {
includeDoubleQuote: false,
removeBackslashOfInternalDoubleQuote: true,
});
// Example 3
testTokenize(`{"letters" : '321"}{}"'}{'{}{{}"': "stack{}}{"}`, {
additionalBoundaryChars: [],
singleCharTokens: ['(', ')', '{', '}', '[', ']', ':'],
});
Output:
str: eval hie "SUBSET(?x, allComponents(SQ_Server_MTM), Leftstr(?x.partNum, 2) == \"90\")" print nohia attributes "partNum"
tokens:
0: eval
1: hie
2: "SUBSET(?x, allComponents(SQ_Server_MTM), Leftstr(?x.partNum, 2) == \"90\")"
3: print
4: nohia
5: attributes
6: "partNum"
str: eval hie "SUBSET(?x, allComponents(SQ_Server_MTM), Leftstr(?x.partNum, 2) == \"90\")" print nohia attributes "partNum"
tokens:
0: eval
1: hie
2: SUBSET(?x, allComponents(SQ_Server_MTM), Leftstr(?x.partNum, 2) == "90")
3: print
4: nohia
5: attributes
6: partNum
str: {"letters" : '321"}{}"'}{'{}{{}"': "stack{}}{"}
tokens:
0: {
1: "letters"
2: :
3: '321"}{}"'
4: }
5: {
6: '{}{{}"'
7: :
8: "stack{}}{"
9: }
Options
dqtokenizer supports a second paramater, which is optional. It contains following options:
- additionalBoundaryChars - _array of chars (Defaults to [',', ';'])
- singleCharTokens - _array of chars (Defaults to ['(', ')', '{', '}', '[', ']', ':'])
- includeDoubleQuote - boolean (Defaults to true)
- includeSingleQuote - boolean (Defaults to true)
- removeBackslashOfInternalDoubleQuote - boolean (Defaults to false)
- removeBackslashOfInternalSingleQuote - boolean (Defaults to false)
- fillMissingLastDoubleQuote - boolean (Defaults to true)
- fillMissingLastSingleQuote - boolean (Defaults to true)
- debug - boolean (Defaults to false)