a
a
eva;
import java.util.ArrayList;
/**
* Constructor
* Pass the string to be tokenized in the constructor
* and get the tokens with the method getTokens()
* @param input String to be tokenized
*/
public Lexer(String input) {
this.input = input;
this.currentPos = 0;
this.startPos = 0;
tokenize();
}
/**
* Tokenize the input string
* Three cases: is a digit, is an identifier or is a symbol
* a) A digit is 0 - 9 and DOT character
* b) A identifier is variable name comprised of letters
* c) A symbol is a character that is not a digit or a letter
* like operators, parenthesis, etc
* @return ArrayList<Token>
*/
private void tokenize() {
while(currentPos<input.length()) {
char c = input.charAt(currentPos);
if(Character.isSpaceChar(c)) {currentPos++; continue;}
Token token;
if (Character.isDigit(c)){
token = getNextDigit();
} else if (Character.isLetter(c)) {
token = getNextIdentifier();
} else{
token = getNextSymbol();
}
this.tokens.add(token);
}
}
/**
* Get the next number, can be integer or float if there is a DOT
* @return Token
*/
private Token getNextDigit() {
startPos = currentPos;
boolean floatNumber = false;
while(Character.isDigit(input.charAt(currentPos)) ||
input.charAt(currentPos) == '.'){
if(input.charAt(currentPos) == '.' ) floatNumber = true;
currentPos++;
if(currentPos==input.length()) break;
}
/**
* Get the next identifier, a variable name
* There is a special case for function/method call, in that case all the
remaining
* chars are parsed between parenthesis returning the whole call,
* i.e. the lexer returns a token
* like TOKEN_CALL callFunction(a,b)
* @return Token
*/
private Token getNextIdentifier() {
startPos = currentPos;
while(Character.isLetter(input.charAt(currentPos))){
if(currentPos+1==input.length()) {
currentPos++;
return new Token(TOKEN_IDENTIFIER, input.substring(startPos,
currentPos), startPos);
}
currentPos++;
}
//especial case for function/method call
int numParenthesis = 0;
if(input.charAt(currentPos)=='('){
numParenthesis++;
while(input.charAt(currentPos)!=')' && numParenthesis>0){
if(input.charAt(currentPos)==')') numParenthesis--;
if(input.charAt(currentPos)=='(') numParenthesis++;
if(currentPos+1==input.length()) break;
currentPos++;
}
currentPos++;
return new Token(TOKEN_CALL, input.substring(startPos, currentPos),
startPos);
}
return new Token(TOKEN_IDENTIFIER, input.substring(startPos, currentPos),
startPos);
}
/**
* Get the next symbol, like operators, parenthesis, etc.
* There are special cases for operators that can be composed of two characters
* which are usually the comparison operators
*
* There is a special case for expression reference, which is a number preceded
by %
* meaning the expression %n is going to be replaced by the n-th expression
that appears
* in the context. ie: print(%7) will print the 7-th expression where %7 == 2+3
*
* @return Token
*/
private Token getNextSymbol() {
char c = input.charAt(currentPos);
Token token = null;
switch(c) {
case '+' -> token = new Token(TOKEN_ADDITION, "+", currentPos);
case '-' -> token = new Token(TOKEN_SUBTRACTION, "-", currentPos);
case '*' -> token = new Token(TOKEN_MULTIPLY, "*", currentPos);
case '/' -> token = new Token(TOKEN_DIVISION, "/", currentPos);
case '(' -> token = new Token(TOKEN_LEFT_PARENTHESIS, "(", currentPos);
case ')' -> token = new Token(TOKEN_RIGHT_PARENTHESIS, ")",
currentPos);
case '{' -> token = new Token(TOKEN_LEFT_BRACE, "{", currentPos);
case '}' -> token = new Token(TOKEN_RIGHT_BRACE, "}", currentPos);
case ',' -> token = new Token(TOKEN_COMMA, ",", currentPos);
case '^' -> token = new Token(TOKEN_POWER, "^", currentPos);
case '='->{
//Next token can be ==, =<, =>,
switch (lookAhead()){
case '=' -> {token = new Token(TOKEN_EQUAL_EQUAL, "==",
currentPos); currentPos++;}
case '<' -> {token = new Token(TOKEn_EQUAL_MINOR, "=<",
currentPos); currentPos++;}
case '>' -> {token = new Token(TOKEN_EQUAL_MAJOR, "=>",
currentPos); currentPos++;}
default -> token = new Token(TOKEN_EQUAL, "=", currentPos);
}
}
case '<'->{
//Next token can be ==, =<, =>,
switch (lookAhead()){
case '=' -> {token = new Token(TOKEn_LESS_EQUAL, "<=",
currentPos); currentPos++;}
case '>' -> {token = new Token(TOKEN_NOT_EQUAL, "<>",
currentPos); currentPos++;}
default -> token = new Token(TOKEN_MINOR, "<", currentPos);
}
}
case '>'->{
//Next token can be ==, =<, =>,
if (lookAhead()=='='){
token = new Token(TOKEN_GREATER_EQUAl, "<=", currentPos);
currentPos++;
} else token = new Token(TOKEN_GREATER, ">", currentPos);
}
case '%'->{
boolean isExprReference = false;
while(Character.isDigit(lookAhead()) || lookAhead()=='.'){
isExprReference = true;
currentPos++;
}
if(isExprReference) token = new Token(TOKEN_EXPRESSION_REFERENCE,
input.substring(startPos, currentPos+1), startPos);
}
default -> {
this.error = true;
currentPos++;
return new Token(TOKEN_EXPRESSION_ERROR, "Error, invalid character
" +c, currentPos);
}
}
currentPos++;
return token;
}
/**
* Helper method to look ahead in the input string
* @return char
*/
private char lookAhead(){
if(currentPos+1==input.length()) return 0;
return input.charAt(currentPos+1);
}
/**
* Helper method to print the tokens tokenized
* @return ArrayList<Token>
*/
public void printTokens(){
for (Token token : tokens) {
System.out.println(token);
}
}