Compiler Design Lab
Compiler Design Lab
DATE: ……………………………..
1
INDEX
1.
C Program to count the
number of tokens
2.
C program to find the
nature of tokens
3.
C program to identify
whether a given line is
comment or not.
4.
C program to check
whether the given string is
accepted by the DFA or
not?
5.
C Program to eliminate
the left recursion in
compiler design
6.
C Program to eliminate
the left factoring in
compiler design
7.
C program to find the
whether the given
grammar is LL (1) or not?
2
PROGRAM 01
C Program to calculate the number of tokens
Objective: To calculate tokens generated by the lexical analyser and to find its
validity and type as well.
Theory:
What is a token?
A lexical token is a sequence of characters that can be treated as a unit in
the grammar of the programming languages.
Example of tokens:
Type token (id, number, real, . . . )
Punctuation tokens (IF, void, return, . . . )
Alphabetic tokens (keywords)
Keywords; Examples-for, while, if etc.
Identifier; Examples-Variable name, function name, etc.
Operators; Examples '+', '++', '-' etc.
Separators; Examples ',' ';' etc
PROGRAM
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
3
}
if (len == 0)
return (false);
for (i = 0; i < len; i++)
{
4
if (str[i] != '0' && str[i] != '1' && str[i] != '2' && str[i] != '3' && str[i] != '4' && str[i]
!= '5' && str[i] != '6' && str[i] != '7' && str[i] != '8' && str[i] != '9' || (str[i] == '-'
&& i > 0))
return (false);
}
return (true);
}
if (len == 0)
return (false);
for (i = 0; i < len; i++)
{
if (str[i] != '0' && str[i] != '1' && str[i] != '2' && str[i] != '3' && str[i] != '4' &&
str[i] != '5' && str[i] != '6' && str[i] != '7' && str[i] != '8' && str[i] != '9' && str[i] !=
'.' ||
(str[i] == '-' && i > 0))
return (false);
if (str[i] == '.')
hasDecimal = true;
}
return (hasDecimal);
}
5
while (right <= len && left <= right)
{
if (isDelimiter(str[right]) == false)
right++;
right++;
left = right;
}
else if (isDelimiter(str[right]) == true && left != right || (right == len &&
left != right))
{
char *subStr = subString(str, left, right - 1);
if (isKeyword(subStr) == true)
{
printf("'%s' IS A KEYWORD\n", subStr);
count++;
}
6
{
printf("'%s' IS NOT A VALID IDENTIFIER\n", subStr);
count++;
}
left = right;
}
}
printf("Total number of token is %d\n", count);
return;
}
// DRIVER FUNCTION
int main()
{
// maximum length of string is 100 here
char str[100] = "int a = 3; printf(‘Hello world’); ";
return (0);
}
OUTPUT
7
PROGRAM 02
C Program to calculate the number of tokens and determine its validity and
type:-
Objective: To calculate tokens generated by the lexical analyser and to find its
validity and type as well.
Theory:
What is a token?
A lexical token is a sequence of characters that can be treated as a unit in
the grammar of the programming languages.
Example of tokens:
Type token (id, number, real, . . . )
Punctuation tokens (IF, void, return, . . . )
Alphabetic tokens (keywords)
Keywords; Examples-for, while, if etc.
Identifier; Examples-Variable name, function name, etc.
Operators; Examples '+', '++', '-' etc.
Separators; Examples ',' ';' etc
PROGRAM
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
8
}
if (len == 0)
return (false);
for (i = 0; i < len; i++)
{
9
if (str[i] != '0' && str[i] != '1' && str[i] != '2' && str[i] != '3' && str[i] != '4' && str[i]
!= '5' && str[i] != '6' && str[i] != '7' && str[i] != '8' && str[i] != '9' || (str[i] == '-'
&& i > 0))
return (false);
}
return (true);
}
if (len == 0)
return (false);
for (i = 0; i < len; i++)
{
if (str[i] != '0' && str[i] != '1' && str[i] != '2' && str[i] != '3' && str[i] != '4' &&
str[i] != '5' && str[i] != '6' && str[i] != '7' && str[i] != '8' && str[i] != '9' && str[i] !=
'.' ||
(str[i] == '-' && i > 0))
return (false);
if (str[i] == '.')
hasDecimal = true;
}
return (hasDecimal);
}
10
int count = 0;
right++;
left = right;
}
else if (isDelimiter(str[right]) == true && left != right || (right == len &&
left != right))
{
char *subStr = subString(str, left, right - 1);
if (isKeyword(subStr) == true)
{
printf("'%s' IS A KEYWORD\n", subStr);
count++;
}
11
else if (validIdentifier(subStr) == false && isDelimiter(str[right - 1]) ==
false)
{
printf("'%s' IS NOT A VALID IDENTIFIER\n", subStr);
count++;
}
left = right;
}
}
printf("Total number of token is %d\n", count);
return;
}
// DRIVER FUNCTION
int main()
{
// maximum length of string is 100 here
char str[100] = "int a = 3; printf(‘Hello world’); ";
return (0);
}
OUTPUT
12
Program 03
C program to identify whether a given line is comment or not.
Program:-
#include<stdio.h>
void main()
{
char com [30];
int i=2,a=0;
gets(com);
if(com[0]=='/')
{
if(com[1]=='/')
printf("\n It is a Comment.");
13
}
else
}
else
}
Output:-
14
Program 04
C program to check whether the given string is accepted by the DFA or
not?
Theory:-
Description:
Given DFA has following states. State 3 leads to the acceptance of the
string, whereas states 0, 1, 2 and 4 leads to the rejection of the string.
DFA State Transition Diagram:
Program:-
#include <stdio.h>
#include <string.h>
15
void start(char c)
{
if (c == 'a') {
dfa = 1;
}
else if (c == 'b') {
dfa = 3;
}
16
else if (c == 'a') {
dfa = 4;
}
else {
dfa = -1;
}
}
else if (dfa == 1)
state1(str[i]);
else if (dfa == 2)
state2(str[i]);
else if (dfa == 3)
state3(str[i]);
else if (dfa == 4)
state4(str[i]);
else
return 0;
}
if (dfa == 3)
return 1;
else
return 0;
}
// driver code
int main()
{
char str[] = "aaaaaabbbb";
if (isAccepted(str))
printf("ACCEPTED");
17
else
printf("NOT ACCEPTED");
return 0;
}
Output:-
18
PROGRAM 05
C Program to eliminate the left recursion in compiler design
Objective: To remove the left recursion form given grammar in compiler design
Theory:
Left recursion is eliminated by converting the grammar into a right recursive
grammar.
If we have the left-recursive pair of productions-
A → Aα / β
(Left Recursive Grammar)
where β does not begin with an A.
Then, we can eliminate left recursion by replacing the pair of productions
with-
A → βA’
A’ → αA’ / ∈
(Right Recursive Grammar)
This right recursive grammar functions same as left recursive grammar.
PROGRAM
#include<stdio.h>
#include<string.h>
#define SIZE 10
int main () {
char non_terminal;
char beta,alpha;
int num;
char production[10][SIZE];
int index=3; /* starting of the string following "->" */
printf("Enter Number of Production : ");
scanf("%d",&num);
printf("Enter the grammar as E->E-A :\n");
for(int i=0;i<num;i++){
scanf("%s",production[i]);
}
for(int i=0;i<num;i++){
19
printf("\nGRAMMAR : : : %s",production[i]);
non_terminal=production[i][0];
if(non_terminal==production[i][index]) {
alpha=production[i][index+1];
printf(" is left recursive.\n");
while(production[i][index]!=0 && production[i][index]!='|')
index++;
if(production[i][index]!=0) {
beta=production[i][index+1];
printf("Grammar without left recursion:\n");
printf("%c->%c%c\'",non_terminal,beta,non_terminal);
printf("\n%c\'->%c%c\'|E\n",non_terminal,alpha,non_terminal);
}
else
printf(" can't be reduced\n");
}
else
printf(" is not left recursive.\n");
index=3;
}
}
OUTPUT
20
PROGRAM 06
C Program to eliminate the left factoring in compiler design
Objective: To remove the left factoring form given grammar in compiler design
Theory:
In LL(1) Parser in Compiler Design, Even if a context-free grammar is unambiguous
and non-left-recursion it still can not be a LL(1) Parser. That is because of Left
Factoring.
E->aE+bcD
E->aE+cBD
Here, grammar is non-left recursive, and unambiguous but there is left factoring.
How to resolve ?
E=aB | aC | aD | ............
then,
E=aX
X=B | C | D |...........
E=aE+X
X=bcD | cBD
PROGRAM
#include<stdio.h>
#include<string.h>
int main()
{
char
gram[20],part1[20],part2[20],modifiedGram[20],newGram[20],tempGram[20]
;
int i,j=0,k=0,l=0,pos;
printf("Enter Production : A->");
gets(gram);
for(i=0;gram[i]!='|';i++,j++)
21
part1[j]=gram[i];
part1[j]='\0';
for(j=++i,i=0;gram[j]!='\0';j++,i++)
part2[i]=gram[j];
part2[i]='\0';
for(i=0;i<strlen(part1)||i<strlen(part2);i++)
{
if(part1[i]==part2[i])
{
modifiedGram[k]=part1[i];
k++;
pos=i+1;
}
}
for(i=pos,j=0;part1[i]!='\0';i++,j++){
newGram[j]=part1[i];
}
newGram[j++]='|';
for(i=pos;part2[i]!='\0';i++,j++){
newGram[j]=part2[i];
}
modifiedGram[k]='X';
modifiedGram[++k]='\0';
newGram[j]='\0';
printf("\n A->%s",modifiedGram);
printf("\n X->%s\n",newGram);
}
OUTPUT
22
Program 07
C program to find whether the given grammar is LL(1) or not ?
ALGORITHM / PROCEDURE :
Step1: Start
Step2: Declare a character array w[10] and Z as an array
Step3: Enter the string with $ at the end
Step4: if (A(w[z]) then increment z and check for (B(w[z])) and if satisfies
increment z and check for ‘d’ if d is present then increment and
check for (D(w[z]))
Step5: if step 4 is satisfied then the string is accepted
Else string is not
Step 6: Exit
Program:-
23
#define addop 3
#define op 4
#define cp 5
#define err 6
#define col 7
#define size 50
int token;
char lexbuff[size];
int lookahead=0;
int main()
{
clrscr();
printf(" Enter the string :");
gets(lexbuff);
parser();
return 0;
}
parser()
{
if(E())
printf("valid string");
else
printf("invalid string");
getch();
return 0;
}
E()
{
if(T())
{
if(EPRIME())
return 1;
else
return 0;
}
else
return 0;
}
T()
{
if(F())
{
if(TPRIME())
return 1;
else
return 0;
}
else
return 0;
24
}
EPRIME()
{
token=lexer();
if(token==addop)
{
lookahead++;
if(T())
{
if(EPRIME())
return 1;
else
return 0;
}
else
return 0;
}
else
return 1;
}
TPRIME()
{
token=lexer();
if(token==mulop)
{
lookahead++;
if(F())
{
if(TPRIME())
return 1;
else
return 0;
}
else
return 0;
}
else
return 1;
}
F()
{
token=lexer();
if(token==id)
return 1;
else
{
if(token==4)
{
if(E())
25
{
if(token==5)
return 1;
else
return 0;
}
else
return 0;
}
else
return 0;
}
}
lexer()
{
if(lexbuff[lookahead]!='\n')
{
while(lexbuff[lookahead]=='\t')
lookahead++;
if(isalpha(lexbuff[lookahead]))
{
while(isalnum(lexbuff[lookahead]))
lookahead++;
return(id);
}
else
{
if(isdigit(lexbuff[lookahead]))
{
while(isdigit(lexbuff[lookahead]))
lookahead++;
return CONST;
}
else
{
if(lexbuff[lookahead]=='+')
{
return(addop);
}
else
{
if(lexbuff[lookahead]=='*')
{
return(mulop);
}
else
{
if(lexbuff[lookahead]=='(')
{
26
lookahead++;
return(op);
}
else
{
if(lexbuff[lookahead]==')')
{
return(op);
}
else
{
return(err);
}
}
}
}
}
}
}
else
return (col);
}
Output:-
27