0% found this document useful (0 votes)
5 views

2

The document is a C program that implements a lexical analyzer to tokenize a given input string. It defines functions to identify delimiters, operators, valid identifiers, keywords, and numbers, and processes the input string to extract and classify these tokens. The main function demonstrates the lexical analyzer with a sample input expression.

Uploaded by

gg4480
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views

2

The document is a C program that implements a lexical analyzer to tokenize a given input string. It defines functions to identify delimiters, operators, valid identifiers, keywords, and numbers, and processes the input string to extract and classify these tokens. The main function demonstrates the lexical analyzer with a sample input expression.

Uploaded by

gg4480
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 6

#include <ctype.

h>

#include <stdbool.h>

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#define MAX_LENGTH 100

// Function to check if a character is a delimiter

bool isDelimiter(char chr) {

return (chr == ' ' || chr == '+' || chr == '-' || chr == '*' || chr == '/' ||
chr == ',' ||

chr == ';' || chr == '%' || chr == '>' || chr == '<' || chr == '=' ||
chr == '(' ||

chr == ')' || chr == '[' || chr == ']' || chr == '{' || chr == '}');

// Function to check if a character is an operator

bool isOperator(char chr) {

return (chr == '+' || chr == '-' || chr == '*' || chr == '/' || chr == '%' ||

chr == '>' || chr == '<' || chr == '=' || chr == '&' || chr == '|');

// Function to check if a string is a valid identifier

bool isValidIdentifier(char *str) {

if (!isalpha(str[0]) && str[0] != '_') // Identifiers must start with a letter


or underscore

return false;

for (int i = 1; i < strlen(str); i++) {

if (!isalnum(str[i]) && str[i] != '_') // Can contain letters, numbers, or


underscores

return false;
}

return true;

// Function to check if a string is a keyword

bool isKeyword(char *str) {

const char *keywords[] = {"auto", "break", "case", "char", "const", "continue",


"default", "do",

"double", "else", "enum", "extern", "float", "for",


"goto", "if",

"int", "long", "register", "return", "short",


"signed", "sizeof",

"static", "struct", "switch", "typedef", "union",


"unsigned",

"void", "volatile", "while"};

for (int i = 0; i < sizeof(keywords) / sizeof(keywords[0]); i++) {

if (strcmp(str, keywords[i]) == 0)

return true;

return false;

// Function to check if a string is an integer or floating point number

bool isNumber(char *str) {

bool hasDecimal = false;

int i = 0;

if (str[0] == '-') // Allow negative numbers

i++;

for (; str[i] != '\0'; i++) {


if (!isdigit(str[i])) {

if (str[i] == '.' && !hasDecimal) {

hasDecimal = true; // Allow only one decimal point

} else {

return false;

return true;

// Function to extract a substring from a string

char *getSubstring(char *str, int start, int end) {

int subLength = end - start + 1;

char *subStr = (char *)malloc((subLength + 1) * sizeof(char));

strncpy(subStr, str + start, subLength);

subStr[subLength] = '\0';

return subStr;

// Lexical Analyzer Function

void lexicalAnalyzer(char *input) {

int left = 0, right = 0, len = strlen(input);

while (right <= len && left <= right) {

// Handling string literals

if (input[right] == '"') {

left = right;

right++;

while (right < len && input[right] != '"')


right++;

right++; // Include closing quote

char *strLiteral = getSubstring(input, left, right - 1);

printf("Token: String Literal, Value: %s\n", strLiteral);

left = right;

continue;

// Handling single-line and multi-line comments

if (input[right] == '/' && input[right + 1] == '/') {

while (input[right] != '\n' && right < len)

right++;

left = right;

continue;

} else if (input[right] == '/' && input[right + 1] == '*') {

right += 2;

while (right < len && !(input[right] == '*' && input[right + 1] ==


'/'))

right++;

right += 2; // Include closing */

left = right;

continue;

// Detecting multi-character operators (>=, ==, !=, etc.)

if (isOperator(input[right])) {

if (isOperator(input[right + 1])) {

printf("Token: Operator, Value: %c%c\n", input[right], input[right


+ 1]);

right += 2;
} else {

printf("Token: Operator, Value: %c\n", input[right]);

right++;

left = right;

continue;

// Skipping delimiters

if (isDelimiter(input[right])) {

if (input[right] != ' ') // Ignore spaces

printf("Token: Delimiter, Value: %c\n", input[right]);

right++;

left = right;

continue;

// Processing words (identifiers, keywords, numbers)

while (!isDelimiter(input[right]) && right < len)

right++;

char *subStr = getSubstring(input, left, right - 1);

if (isKeyword(subStr))

printf("Token: Keyword, Value: %s\n", subStr);

else if (isNumber(subStr))

printf("Token: Number, Value: %s\n", subStr);

else if (isValidIdentifier(subStr))

printf("Token: Identifier, Value: %s\n", subStr);

else
printf("Token: Unidentified, Value: %s\n", subStr);

left = right;

// Main function

int main() {

char lex_input[MAX_LENGTH] = "int main() { int a = 10, b = 20; float c = 3.14;


if (a >= b) printf(\"Hello World!\"); return 0; }";

printf("For Expression:\n%s\n\n", lex_input);

lexicalAnalyzer(lex_input);

return 0;

You might also like