a

The Lexer class is designed to tokenize an input string into various token types such as digits, identifiers, and symbols. It processes the input character by character, identifying and categorizing each token while handling special cases like function calls and multi-character operators. The class also includes methods for printing the generated tokens and managing errors during tokenization.

Uploaded by

txenet

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

3 views4 pages

a

Uploaded by

txenet

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

You are on page 1/ 4

package com.egenoves.

eva;

import java.util.ArrayList;

import static com.egenoves.eva.TokenType.*;

public class Lexer {

int currentPos;
int startPos;
boolean error = false;
final String input;
final ArrayList<Token> tokens = new ArrayList<>();

/**
* Constructor
* Pass the string to be tokenized in the constructor
* and get the tokens with the method getTokens()
* @param input String to be tokenized
*/
public Lexer(String input) {
this.input = input;
this.currentPos = 0;
this.startPos = 0;
tokenize();
}

/**
* Tokenize the input string
* Three cases: is a digit, is an identifier or is a symbol
* a) A digit is 0 - 9 and DOT character
* b) A identifier is variable name comprised of letters
* c) A symbol is a character that is not a digit or a letter
* like operators, parenthesis, etc
* @return ArrayList<Token>
*/
private void tokenize() {
while(currentPos<input.length()) {

char c = input.charAt(currentPos);
if(Character.isSpaceChar(c)) {currentPos++; continue;}

Token token;
if (Character.isDigit(c)){
token = getNextDigit();
} else if (Character.isLetter(c)) {
token = getNextIdentifier();
} else{
token = getNextSymbol();
}

this.tokens.add(token);
}
}

/**
* Get the next number, can be integer or float if there is a DOT
* @return Token
*/
private Token getNextDigit() {
startPos = currentPos;
boolean floatNumber = false;

while(Character.isDigit(input.charAt(currentPos)) ||
input.charAt(currentPos) == '.'){
if(input.charAt(currentPos) == '.' ) floatNumber = true;
currentPos++;
if(currentPos==input.length()) break;
}

if(floatNumber) return new Token(TOKEN_FLOAT, input.substring(startPos,

currentPos), startPos);

return new Token(TOKEN_INTEGER, input.substring(startPos, currentPos),

startPos) ;
}

/**
* Get the next identifier, a variable name
* There is a special case for function/method call, in that case all the
remaining
* chars are parsed between parenthesis returning the whole call,
* i.e. the lexer returns a token
* like TOKEN_CALL callFunction(a,b)
* @return Token
*/
private Token getNextIdentifier() {
startPos = currentPos;
while(Character.isLetter(input.charAt(currentPos))){
if(currentPos+1==input.length()) {
currentPos++;
return new Token(TOKEN_IDENTIFIER, input.substring(startPos,
currentPos), startPos);
}
currentPos++;
}
//especial case for function/method call
int numParenthesis = 0;
if(input.charAt(currentPos)=='('){
numParenthesis++;
while(input.charAt(currentPos)!=')' && numParenthesis>0){
if(input.charAt(currentPos)==')') numParenthesis--;
if(input.charAt(currentPos)=='(') numParenthesis++;
if(currentPos+1==input.length()) break;
currentPos++;
}
currentPos++;
return new Token(TOKEN_CALL, input.substring(startPos, currentPos),
startPos);
}
return new Token(TOKEN_IDENTIFIER, input.substring(startPos, currentPos),
startPos);
}

/**
* Get the next symbol, like operators, parenthesis, etc.
* There are special cases for operators that can be composed of two characters
* which are usually the comparison operators
*
* There is a special case for expression reference, which is a number preceded
by %
* meaning the expression %n is going to be replaced by the n-th expression
that appears
* in the context. ie: print(%7) will print the 7-th expression where %7 == 2+3
*
* @return Token
*/
private Token getNextSymbol() {
char c = input.charAt(currentPos);
Token token = null;
switch(c) {
case '+' -> token = new Token(TOKEN_ADDITION, "+", currentPos);
case '-' -> token = new Token(TOKEN_SUBTRACTION, "-", currentPos);
case '*' -> token = new Token(TOKEN_MULTIPLY, "*", currentPos);
case '/' -> token = new Token(TOKEN_DIVISION, "/", currentPos);
case '(' -> token = new Token(TOKEN_LEFT_PARENTHESIS, "(", currentPos);
case ')' -> token = new Token(TOKEN_RIGHT_PARENTHESIS, ")",
currentPos);
case '{' -> token = new Token(TOKEN_LEFT_BRACE, "{", currentPos);
case '}' -> token = new Token(TOKEN_RIGHT_BRACE, "}", currentPos);
case ',' -> token = new Token(TOKEN_COMMA, ",", currentPos);
case '^' -> token = new Token(TOKEN_POWER, "^", currentPos);
case '='->{
//Next token can be ==, =<, =>,
switch (lookAhead()){
case '=' -> {token = new Token(TOKEN_EQUAL_EQUAL, "==",
currentPos); currentPos++;}
case '<' -> {token = new Token(TOKEn_EQUAL_MINOR, "=<",
currentPos); currentPos++;}
case '>' -> {token = new Token(TOKEN_EQUAL_MAJOR, "=>",
currentPos); currentPos++;}
default -> token = new Token(TOKEN_EQUAL, "=", currentPos);
}
}
case '<'->{
//Next token can be ==, =<, =>,
switch (lookAhead()){
case '=' -> {token = new Token(TOKEn_LESS_EQUAL, "<=",
currentPos); currentPos++;}
case '>' -> {token = new Token(TOKEN_NOT_EQUAL, "<>",
currentPos); currentPos++;}
default -> token = new Token(TOKEN_MINOR, "<", currentPos);
}
}
case '>'->{
//Next token can be ==, =<, =>,
if (lookAhead()=='='){
token = new Token(TOKEN_GREATER_EQUAl, "<=", currentPos);
currentPos++;
} else token = new Token(TOKEN_GREATER, ">", currentPos);
}
case '%'->{
boolean isExprReference = false;

while(Character.isDigit(lookAhead()) || lookAhead()=='.'){
isExprReference = true;
currentPos++;
}
if(isExprReference) token = new Token(TOKEN_EXPRESSION_REFERENCE,
input.substring(startPos, currentPos+1), startPos);
}
default -> {
this.error = true;
currentPos++;
return new Token(TOKEN_EXPRESSION_ERROR, "Error, invalid character
" +c, currentPos);
}
}
currentPos++;
return token;
}

/**
* Helper method to look ahead in the input string
* @return char
*/
private char lookAhead(){
if(currentPos+1==input.length()) return 0;
return input.charAt(currentPos+1);
}

/**
* Helper method to print the tokens tokenized
* @return ArrayList<Token>
*/
public void printTokens(){
for (Token token : tokens) {
System.out.println(token);
}
}

Infix To PostFix Stack
No ratings yet
Infix To PostFix Stack
9 pages
من المفترض ان ده حل الكويز بس بيقع في كذا تيست
No ratings yet
من المفترض ان ده حل الكويز بس بيقع في كذا تيست
4 pages
FA23-BSE-126
No ratings yet
FA23-BSE-126
4 pages
Midsem + secondpart
No ratings yet
Midsem + secondpart
100 pages
LAB # 10 Stack ADT Implementation
No ratings yet
LAB # 10 Stack ADT Implementation
6 pages
tgrnjnhmksAssignment - 12
No ratings yet
tgrnjnhmksAssignment - 12
7 pages
LR Parsing Algorithm (Pseudocode) : Festin, Kerr Oliver Bscs 2A
No ratings yet
LR Parsing Algorithm (Pseudocode) : Festin, Kerr Oliver Bscs 2A
13 pages
Verb at Ives
No ratings yet
Verb at Ives
4 pages
Scanner Solution
No ratings yet
Scanner Solution
4 pages
Online Java Compiler IDE: For Multiple Files, Custom Library and File Read/Write, Use Our New - Advanced Java IDE
No ratings yet
Online Java Compiler IDE: For Multiple Files, Custom Library and File Read/Write, Use Our New - Advanced Java IDE
1 page
Compiler Design & Construction Term Project: Part 1
No ratings yet
Compiler Design & Construction Term Project: Part 1
10 pages
Infix Program
No ratings yet
Infix Program
2 pages
Stacks1 Ass4
No ratings yet
Stacks1 Ass4
3 pages
Stack - Notes
No ratings yet
Stack - Notes
5 pages
cdjavacodes (1) (1)
No ratings yet
cdjavacodes (1) (1)
23 pages
Laporan Praktikum Stack
No ratings yet
Laporan Praktikum Stack
3 pages
C2ex Java
No ratings yet
C2ex Java
6 pages
Evaluate_Arithmetic Expression
No ratings yet
Evaluate_Arithmetic Expression
7 pages
validParantehsis
No ratings yet
validParantehsis
1 page
Cs-603 Activity: Abca-1 (Coding/Debugging) Compiler: Name - Divyansh Sharma Roll No. - 0905cs211055
No ratings yet
Cs-603 Activity: Abca-1 (Coding/Debugging) Compiler: Name - Divyansh Sharma Roll No. - 0905cs211055
6 pages
SPCC Lab Expt
No ratings yet
SPCC Lab Expt
12 pages
CD LAB in Java
No ratings yet
CD LAB in Java
12 pages
JAVA
No ratings yet
JAVA
21 pages
Chapter 2 - 1 Lexical Analysis
No ratings yet
Chapter 2 - 1 Lexical Analysis
30 pages
DSAEXP5
No ratings yet
DSAEXP5
11 pages
Mid Term Project
No ratings yet
Mid Term Project
4 pages
Phase 1
No ratings yet
Phase 1
9 pages
InfixToPostFix
No ratings yet
InfixToPostFix
2 pages
Write A Program To Evaluate The Value of The Infix Expression Consisting of Operator
No ratings yet
Write A Program To Evaluate The Value of The Infix Expression Consisting of Operator
4 pages
Java8Stepwise2 JJ
No ratings yet
Java8Stepwise2 JJ
28 pages
Ornek Scanner Parser
No ratings yet
Ornek Scanner Parser
44 pages
LAB 3
No ratings yet
LAB 3
12 pages
Lab Exp 1
No ratings yet
Lab Exp 1
2 pages
Java
No ratings yet
Java
38 pages
lab2_cd_22BLC1161
No ratings yet
lab2_cd_22BLC1161
9 pages
Practical (1)
No ratings yet
Practical (1)
17 pages
Project Data - تراكيب
No ratings yet
Project Data - تراكيب
4 pages
21BAI1213 - Abhinav V - Experiment-2
No ratings yet
21BAI1213 - Abhinav V - Experiment-2
11 pages
Algorithm Infixtopostfix
No ratings yet
Algorithm Infixtopostfix
7 pages
Stacks and Queues
No ratings yet
Stacks and Queues
13 pages
expression conversion using stack
No ratings yet
expression conversion using stack
4 pages
Data STR 3
No ratings yet
Data STR 3
6 pages
INFIX TO PREFIX EXPRESSION
No ratings yet
INFIX TO PREFIX EXPRESSION
7 pages
Algorithm - Assig#3
No ratings yet
Algorithm - Assig#3
8 pages
Stack Implementation With Its Application
No ratings yet
Stack Implementation With Its Application
7 pages
System Software and Compiler Lab: Token Separation
No ratings yet
System Software and Compiler Lab: Token Separation
5 pages
assessment2_program_question
No ratings yet
assessment2_program_question
9 pages
CD 1
No ratings yet
CD 1
31 pages
Week 2a &2B
No ratings yet
Week 2a &2B
6 pages
19ELC211 DSA Lab340
No ratings yet
19ELC211 DSA Lab340
12 pages
19ELC211 DSA Lab3
No ratings yet
19ELC211 DSA Lab3
12 pages
19ELC211 DSA Lab341 NEW
No ratings yet
19ELC211 DSA Lab341 NEW
12 pages
JAVA
No ratings yet
JAVA
23 pages
Lab Assig 2021
No ratings yet
Lab Assig 2021
41 pages
Infix To Postfix Code
No ratings yet
Infix To Postfix Code
7 pages
EECE334_HW1
No ratings yet
EECE334_HW1
4 pages
Assignment 3
No ratings yet
Assignment 3
8 pages
infix
No ratings yet
infix
3 pages
6
No ratings yet
6
3 pages
C++ Functions and tutorial
From Everand
C++ Functions and tutorial
Nino Paiotta
No ratings yet
Pool Control Prof I
No ratings yet
Pool Control Prof I
2 pages
IELTs SPEAKING SAMPLES WEEK 2
No ratings yet
IELTs SPEAKING SAMPLES WEEK 2
9 pages
Acr New
No ratings yet
Acr New
3 pages
CS402 Mid Term Papers
No ratings yet
CS402 Mid Term Papers
38 pages
Unit 1
No ratings yet
Unit 1
10 pages
Fdocuments - in - MB w211 Video Interface Manual 2014 11 10 Merecedes Benz w211 User Guide Supportcar
No ratings yet
Fdocuments - in - MB w211 Video Interface Manual 2014 11 10 Merecedes Benz w211 User Guide Supportcar
18 pages
TRAC Bridge Competition Grades 11 12
No ratings yet
TRAC Bridge Competition Grades 11 12
23 pages
4 - en - MIA - O2.3 - Exp Course 6 - Course Material - Part 4 MP
No ratings yet
4 - en - MIA - O2.3 - Exp Course 6 - Course Material - Part 4 MP
46 pages
Angelo R. Padua: Recruiting & Staffing - Medical Records & Claims
No ratings yet
Angelo R. Padua: Recruiting & Staffing - Medical Records & Claims
3 pages
4a-LATIHAN ANALISA SENARIO 1 KLUSTER KERACUNAN METANOL-1
No ratings yet
4a-LATIHAN ANALISA SENARIO 1 KLUSTER KERACUNAN METANOL-1
140 pages
Ayvaz CV11 Disco Cekvalf ENG
No ratings yet
Ayvaz CV11 Disco Cekvalf ENG
2 pages
Digital-Music-Study-Report-2022-c-Final
No ratings yet
Digital-Music-Study-Report-2022-c-Final
36 pages
L TEX Maths and Graphics: A Tim Love July 27, 2006
No ratings yet
L TEX Maths and Graphics: A Tim Love July 27, 2006
16 pages
Alias Method Vector Generation
No ratings yet
Alias Method Vector Generation
19 pages
Network Architecture
No ratings yet
Network Architecture
1 page
Dcs-960l A1 Manual v1.00 WW
No ratings yet
Dcs-960l A1 Manual v1.00 WW
69 pages
L-1 and L-2: Basic Principles and Representation of Instruments
No ratings yet
L-1 and L-2: Basic Principles and Representation of Instruments
17 pages
Ah en Visu Html5 Webclient 109329 en 00
No ratings yet
Ah en Visu Html5 Webclient 109329 en 00
2 pages
Update and Document Oprational Procider
No ratings yet
Update and Document Oprational Procider
34 pages
Origin and Development of Robotic Art
100% (1)
Origin and Development of Robotic Art
6 pages
Profprac2 - RSW - FN02
No ratings yet
Profprac2 - RSW - FN02
83 pages
Catálogo Sany STC 300 Guindaste
100% (1)
Catálogo Sany STC 300 Guindaste
801 pages
Latihan1 Mhd. Riza Maulana 1812000102
No ratings yet
Latihan1 Mhd. Riza Maulana 1812000102
6 pages
Quiz Mining Seminar - Gamaliel Akita (073001800019)
No ratings yet
Quiz Mining Seminar - Gamaliel Akita (073001800019)
4 pages
1 Flowchart
No ratings yet
1 Flowchart
28 pages
RX Monza Analyzer - Uk
No ratings yet
RX Monza Analyzer - Uk
2 pages
Technological E COMMERCE PROJECT
No ratings yet
Technological E COMMERCE PROJECT
35 pages
9PX2200IRT2U
No ratings yet
9PX2200IRT2U
4 pages
AI IN BI
No ratings yet
AI IN BI
3 pages
Bao Cao Thuc Tap
No ratings yet
Bao Cao Thuc Tap
24 pages

a

Uploaded by

a

Uploaded by

package com.egenoves.

import static com.egenoves.eva.TokenType.*;

public class Lexer {

if(floatNumber) return new Token(TOKEN_FLOAT, input.substring(startPos,

return new Token(TOKEN_INTEGER, input.substring(startPos, currentPos),

You might also like