@multila/multila-lexer
v0.9.4
Published
Lexical Analyzer written in TypeScript
Downloads
7
Readme
multila-lexer
Lexical Analyzer for the Web and Node.js written in TypeScript.
Copyright 2022 by Andreas Schwenk
Licensed by GPLv3
Multila Website: https://www.multila.org
Personal Website: https://www.arts-and-sciences.com
Mail: [email protected]
Installation
npm install @multila/multila-lexer
Example
The following example program implements an LL(1) top-down parser for simple language with the following grammar, specified in EBNF.
It uses multila-lexer
to fetch tokens.
program = { assignment };
assignment = ID ":=" add ";";
add = mul { "+" mul };
mul = unary { "*" unary };
unary = ID | INT | "(" add ")";
A valid example program is for example:
# comment
x := 3 * (4+5);
Example code:
// import multila-lexer
const lex = require('@multila/multila-lexer');
function parse(src) {
// create a new lexer instance
const lexer = new lex.Lexer();
// configuration
lexer.configureSingleLineComments('#');
// must add operators with two or more chars
lexer.setTerminals([':=']);
// source code to be parsed
lexer.pushSource('mySource', src);
parseProgram(lexer);
}
//G program = { assignment };
function parseProgram(lexer) {
while (lexer.isNotEND()) {
parseAssignment(lexer);
}
}
//G assignment = ID ":=" add ";";
function parseAssignment(lexer) {
const id = lexer.ID();
console.log(id);
lexer.TER(':=');
parseAdd(lexer);
lexer.TER(';');
console.log('assign');
}
//G add = mul { "+" mul };
function parseAdd(lexer) {
parseMul(lexer);
while (lexer.isTER('+')) {
lexer.next();
parseMul(lexer);
console.log('add');
}
}
//G mul = unary { "*" unary };
function parseMul(lexer) {
parseUnary(lexer);
while (lexer.isTER('*')) {
lexer.next();
parseUnary(lexer);
console.log('mul');
}
}
//G unary = ID | INT | "(" add ")";
function parseUnary(lexer) {
if (lexer.isID()) {
const id = lexer.ID();
console.log(id);
} else if (lexer.isINT()) {
const value = lexer.INT();
console.log(value);
} else if (lexer.isTER('(')) {
lexer.next();
parseAdd(lexer);
lexer.TER(')');
} else {
lexer.error('expected ID or INT');
}
}
// run
const src = `# comment
x := 3 * (4+5);`;
parse(src);
// the output is:
// x 3 4 5 add mul assign
Methods
Configuration
enableEmitNewlines(value: boolean)
Enables to emit newline (
\n
) tokens that can be tested bylexer.isNEWLINE()
and consumed bylexer.NEWLINE()
. Otherwise, newline characters are considered as white spaces.enableEmitHex(value: boolean)
Enables to emit hexadecimal tokens that can be tested by
lexer.isHEX()
and consumed bylexer.HEX()
.enableEmitInt(value: boolean)
Enables to emit integer tokens that can be tested by
lexer.isINT()
and consumed bylexer.INT()
.enableEmitReal(value: boolean)
Enables to emit real valued tokens that can be tested by
lexer.isREAL()
and consumed bylexer.REAL()
.enableEmitBigint(value: boolean)
Enables to emit big integer tokens that can be tested by
lexer.isBIGINT()
and consumed bylexer.BIGINT()
.enableEmitSingleQuotes(value: boolean)
Enables to emit single quote tokens that can be tested by
lexer.isSTR()
and consumed bylexer.STR()
.enableEmitDoubleQuotes(value: boolean)
Enables to emit double quote tokens that can be tested by
lexer.isSTR()
and consumed bylexer.STR()
.enableEmitIndentation(value: boolean)
Enables to emit indentation tokens that can be tested by
lexer.isINDENT()
and consumed bylexer.INDENT()
, as well aslexer.isOUTDENT()
and consumed bylexer.OUTDENT()
, respectively.enableBackslashLineBreaks(value: boolean)
If enabled, a backslash (
\
) right before a newline concatenates the next line, ignoring indentation.
Input Files
TODO
Parsing
TODO
Error Handling
TODO
Tokens
ID
identifierINT
integer constantREAL
real valued constantEOS
end of statement (usually;
)STR
string constant in double quotes ("
) or single quotes ('
)TER
terminalINDENT
indentation beginOUTDENT
indentation endNEWLINE
newline (\n
)EOS
end of statement (;
or\n
)END
end of input