0% found this document useful (0 votes)
88 views47 pages

CD Lab - Anubhav PDF

Uploaded by

R
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
88 views47 pages

CD Lab - Anubhav PDF

Uploaded by

R
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 47

DEPARTMENT OF COMPUTER SCIENCE AND

ENGINEERING

COMPILER DESIGN (CD)


LABORATORY MANUAL

CO - 317
DELHI TECHNOLOGICAL UNIVERSITY

Anubhav Aron
2K15/CO/034
INDEX
S.No NAME OF EXPERIMENT Signature Date

1 Write a program to convert NFA to DFA

2 Write a program for acceptance of string by DFA

3 Write a program to find different tokens in a program

4 Write a program to implement Lexical Analyser

5 Write a program to implement recursive descent parser

6 Write a program to left factor the given grammar

7 Write a program to convert left recursive grammar to right

recursive grammar
8 Write a program to compute FIRST and FOLLOW

9 Write a program to construct LL(1) parsing table

10 Write a program to implement non recursive predictive parsing.

11 Write a program to implement an error handler

12 Write a program to implement one pass compiler


Prog. 1 : Write a program to convert Non-Deterministic
Finite Automata to Deterministic Finite Automata
THEORY: NFA is represented as a set of 5 tuple which has (Qx, ∑, δx, q0, Fx) where Qx is the
set of states, ∑ is the set of symbols, δx is the Transition Function, q0 is the initial state and Fx is the
set of final states AND DFA is represented as a set of 5 tuple which has (Qy, ∑, δy, q0, Fy) where Qy is
the set of states, ∑ is the set of symbols, δy is the Transition Function, q0 is the initial state and Fy is
the set of final states.

An algorithm for the conversion of NFA to DFA is given below.


Algorithm:
1. Construct the transaction table of given NFA machine.
2. Scan the next states column in the transaction table from initial state to final state.
3. If any of the next state consists more than one state on the single input
alphabet. Then merge them and make it new state. Place this new constructed state in DFA
transaction table as present state.
4. The next state of this new constructed state on input alphabet will be the summation of each
next state which parts in the NFA transaction table.
5. Repeat step 2 to step 4 until all the states in NFA transaction table will be scanned completely.
6. The finial transaction table must have single next state at single input alphabet.

Example to illustrate the Algorithm


Step 1: Transaction Table of NFA from Figure (1):

Next State

Present State 0 1

→q0 {q2} Φ

q1 Φ {q0, q2}

q2* {q0, q1} {q0}

Step 2: Transaction Table of DFA:


Next State

Present State
0 1

→{q0} {q2} Φ

{q2} {q0, q1} {q0}

{q0, q1} {q2} {q0, q2}

{q0, q2} {q0, q1, q2} {q0}

{q0, q1, q2} {q0, q1, q2} {q0}


#include<bits/stdc++.h>
using namespace std;
vector<int> trans[10][2];
map< pair<int,vector<int> >,pair< vector<int>,vector<int> > > ans;
int finalstates[10];
map<vector<int>,bool> mp;
queue< vector<int> > myset;
vector< vector<int> > states;
void debug(vector<int> v)
{
for(int i=0;i<v.size();++i)
cout<<v[i]<<" ";
cout<<endl;
}
int main()
{
int i,n,m,j,k,a;
cout<<"No. of states in NFA : ";
cin>>n;
for(i=0;i<n;++i)
{
cout<<"Number of outgoing edges for 0 input: ";
cin>>k;
for(j=0;j<k;++j)
{
cin>>a;
trans[i][0].push_back(a);
}
cout<<"Number of outgoing edges for 1 input : ";
cin>>k;
for(j=0;j<k;++j)
{
cin>>a;
trans[i][1].push_back(a);
}
}
cout<<"Number of final states : ";
cin>>m;
for(i=0;i<m;++i)
cin>>finalstates[i];
vector<int> temp;
temp.push_back(0);
states.push_back(temp);
mp[temp]=1;
ans[{1,temp}]={trans[0][0],trans[0][1]};
temp.clear();
//debug(trans[0][0]);
//debug(trans[0][1]);
myset.push(trans[0][0]);
myset.push(trans[0][1]);
int counter=2;
while(!myset.empty())
{
temp=myset.front();
myset.pop();
//debug(temp);
if(!mp[temp])
{
mp[temp]=1;
//debug(temp);
states.push_back(temp);
vector<int> t1;
for(i=0;i<temp.size();++i)
{
int r=temp[i];
vector<int> t=trans[r][0];
for(j=0;j<t.size();++j)
{
if(find(t1.begin(),t1.end(),t[j])==t1.end())
t1.push_back(t[j]);
}
}
sort(t1.begin(),t1.end());
ans[{counter,temp}].first=t1;
myset.push(t1);
t1.clear();
for(i=0;i<temp.size();++i)
{
int r=temp[i];
vector<int> t=trans[r][1];
for(j=0;j<t.size();++j)
{
if(find(t1.begin(),t1.end(),t[j])==t1.end())
t1.push_back(t[j]);
}
}
sort(t1.begin(),t1.end());
myset.push(t1);
ans[{counter++,temp}].second=t1;
t1.clear();
}
}
cout<<"The states in DFA : \n";
for(i=0;i<states.size();++i)
{
for(j=0;j<states[i].size();++j)
cout<<states[i][j]<<" ";
cout<<endl;
}
map< pair<int,vector<int> >,pair< vector<int>,vector<int> > >::iterator it;
cout<<"DFA transition table\n\nStates\t0\t1\n";
for(it=ans.begin();it!=ans.end();++it)
{
vector<int> id=((*it).first).second;
pair<vector<int>,vector<int> > ans1=((*it).second);
vector<int> zero=ans1.first;
vector<int> one=ans1.second;
cout<<"{";
for(i=0;i<id.size();++i)
cout<<id[i]<<" ";
if(id.size()>0)
cout<<"\b}\t{";
else
cout<<"}\t{";
for(i=0;i<zero.size();++i)
cout<<zero[i]<<" ";
if(zero.size()>0)
cout<<"\b}\t{";
else
cout<<"}\t{";
for(i=0;i<one.size();++i)
cout<<one[i]<<" ";
if(one.size()>0)
cout<<"\b}\n";
else
cout<<"}\n";
}
return 0;
}
OUTPUT

RESULT AND DISCUSSION


Sometimes, it is not easy to convert regular expression to DFA. First you can convert regular
expression to NFA and then NFA to DFA. .NFA can use empty string transition while DFA cannot use
empty string transition. NFA is easier to construct while it is more difficult to construct DFA.
DFA requires more space while NFA requires less space.

FINDING & LEARNING


In this experiment we learnt the process of converting an NFA
to equivalent DFA and how this can be useful in various aspects
of Computer Science.

In some certain aspects we cannot directly convert a R.E to DFA therefore we first convert it to an
NFA
and then to equiv
Prog. 2: Write a program for acceptance of string by DFA
THEORY: A DFA accepts if it's in an accepting state after it's read its input. If the input is the empty
string, the DFA makes no transitions so, after reading it's input, it's still in its initial
state, q0q0. If q0q0 is an accepting state, the automaton accepts the empty string.

Consider an Example

This is an example of DFA which accepts string over the symbol {a,b} and all the string generated by it
are of even length.

CODE
#include<stdio.h>
#include<conio.h>

int ninputs;
int check(char,int ); //function declaration int dfa[10][10];
char c[10], string[10];
int main(){
int nstates, nfinals;
int f[10];
int i,j,s=0,final=0;
printf("Enter the number of states in your DFA : ");
scanf("%d",&nstates);
printf("Enter the number of input symbol in your DFA : ");
scanf("%d",&ninputs);
printf("Enter input symbols");
for(i=0; i<ninputs; i++)
{
fflush(stdin);
printf("\n\n %d input\t", i+1);
scanf("%c",&c[i]);
}
printf("\n\nEnter number of final states : \t");
scanf("%d",&nfinals);
for(i=0;i<nfinals;i++)
{
printf("\n\nFinal state %d : q",i+1);
scanf("%d",&f[i]);

}
printf("-----------------------------------------------------------------------");
printf("\n\ndefine transition rule as (initial state, input symbol ) = final state\n");
for(i=0; i<ninputs; i++)
{
for(j=0; j<nstates; j++)
{
printf("\n(q%d , %c ) = q",j,c[i]);
scanf("%d",&dfa[i][j]);
}
}
do
{
i=0;
printf("\n\nEnter Input String.. ");
scanf("%s",string);
while(string[i]!='\0') if((s=check(string[i++],s))<0) break;
for(i=0 ;i<nfinals ;i++)
if(f[i] ==s )
final=1;
if(final==1)
printf("\n valid string");
else
printf("invalid string");
getch();
printf("\nDo you want to continue.? \n(y/n) ");
}
while(getch()=='y');
getch();
}
int check(char b,int d)
{
int j;
for(j=0; j<ninputs; j++) if(b==c[j]) return(dfa[d][j]);
return -1;
}
RESULT &
DISCUSSION
Only those Strings will be accepted by the DFA in which the termination is at one of the
final states and this fact is helpful in generating Regular expressions for a DFA and vice
versa.

FINDING &
LEARNING
In this experiment we learnt the method of the acceptance of a string by a DFA and which
strings can be accepted by the DFA and which are not accepted by it and also we saw
various examples regarding it.
OUTPUT
Prog. 3 : Write a program to identify tokens
THEORY:
∙ C tokens are the basic buildings blocks in C language which are constructed together
to write a C program.

∙ Each and every smallest individual units in a C program are known as C


tokens. C tokens are of six types. They are,
1. Keywords (eg: int, while),
2. Identifiers (eg: main, total),
3. Constants (eg: 10, 20),
4. Strings (eg: “total”, “hello”),

5. Special symbols (eg: (), {}),


6. Operators (eg: +, /,-,*)

ALGORITHM: First mark all the tokens available in a Language then starting from the top we
parse the whole program using the if else statements and count the number of tokens
present in a code.

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
int isKeyword(const char* token)
{
const char keywords[32][20]={"auto","break","case","char",
"const","continue","default","do","double",
"else","enum","extern","float",
"for","goto","if","int","long",
"register","return","short","signed",
"sizeof","static","struct","switch",
"typedef","union","unsigned",
"void","volatile","while"};
int i;
for(i=0;i<32;i++)
{
if(strcmp(token,keywords[i])==0)
return TRUE;
}
return FALSE;
}
int isIntLiteral(const char* token)
{
int i;
for(i=0;token[i]!=(char)NULL;i++)
{
if(!isdigit(token[i]))
return FALSE;
}
return TRUE;
}
int isFloatLiteral(const char* token)
{
int i;
if(isdigit(token[0]))
{
for(i=1;token[i]!=(char)NULL;i++)
{
if(!isdigit(token[i]) && (token[i])!='.')
return FALSE;
}
return TRUE;
}
return FALSE;
}
int isCharLiteral(const char* token)
{
if(token[0]=='\'' && strlen(token)==3 && token[2]=='\'')
return TRUE;
else
return FALSE;
}
int isLiteral(const char* token)
{
if(isCharLiteral(token)||isIntLiteral(token)||isFloatLiteral(token))
return TRUE;
else
return FALSE;
}
int isIdentifier(const char* token)
{
int i;
if(isalpha(token[0]) || token[0]=='_')
{
for(i=1;token[i]!=(char)NULL;i++)
{
if(!isalnum(token[i]))
return FALSE;
}
return TRUE;
}
return FALSE;
}
int isOperator(const char* token)
{
const char operators[41][10] = {".","->","++","--","!","~","(type)","*","&",
"sizeof","*","/","%","+","-","<<",">>","<",
"<=",">",">=","==","!=","&","^","|","&&","||",
"?:","=","+=","-=","*=","/=","%=","&=","^=",
"|=","<<=",">>=",","};

int i;
for(i=0;i<41;i++)
{
if(strcmp(token,operators[i])==0)
return TRUE;
}
return FALSE;
}
int main()
{
FILE *fp = fopen("inputprog.txt","r");
char *token;
char string[BUFFER_SIZE];
const char delimiters[]=" .,;(){}\n";
fgets(string,sizeof string, fp);
//printf("%s\n",string);
int counter=0;
while(string!=(char *)NULL || (strcmp(string,"\n"))==0)
{
token = strtok(string,delimiters);
//printf("%s ",token);
while(token!=NULL)
{
if(isKeyword(token))
printf("%s is a keyword. \n",token);
else if(isOperator(token))
printf("%s is an operator. \n",token);
else if(isIdentifier(token))
printf("%s is an identifier. \n",token);
else if(isLiteral(token))
printf("%s is a literal. \n",token);
else
printf("Unrecognized token : %s \n",token);

token = strtok(NULL,delimiters);
}
fgets(string,sizeof string, fp);
++counter;
if(counter>20)
break;
}
fclose(fp);
return 0;
}

Input File read:


#include<stdio.h>
#include<conio.h>
void main()
{
int n,i,temp,reverse = 0;
scanf (" % d ",& n);
while (n > 0)
{
temp = n % 10;
reverse = reverse * 10 + temp;
n = n / 10;
}
printf ("%d",reverse);
getch ();
}
RESULT & DISCUSSION
Counting the number of tokens in a program code is important as this will be useful for the parser to
parse the code and generate the parse tree and then compiling the code.

LEARNING & FINDING


In this experiment we learnt how to count the number of tokens in a code and also certain things
which are not explicitly mentioned. For example the code between the string quotes are counted as
on token. Printf (“DELHI TECHNOLOGICAL UNIVERSITY”); This has a total of 4 tokens.
OUTPUT
Prog. 4 : Write a program to implement Lexical Analyser
THEORY
Lexical analysis is the first phase of a compiler. It takes the modified source code from language
preprocessors that are written in the form of sentences. The lexical analyzer breaks these syntaxes
into a series of tokens, by removing any whitespace or comments in the source code.

If the lexical analyzer finds a token invalid, it generates an error. The lexical analyzer works closely
with the syntax analyzer. It reads character streams from the source code, checks for legal tokens,
and passes the data to the syntax analyzer when it demands.

ALGORITHM:
Since the function of the lexical analyzer is to scan the source program and produce a stream of
tokens as output, the issues involved in the design of lexical analyzer are:

1. Identifying the tokens of the language for which the lexical analyzer is to be built, and to specify
these tokens by using suitable notation, and
2. Constructing a suitable recognizer for these tokens.

#include<stdio.h>
#include<string.h>
#include<stdlib.h>

void removeduplicate();
void final();
int Isiden(char ch);
int Isop(char ch);
int Isdel(char ch);
int Iskey(char * str);
void removeduplicate();
char op[8]={'+','-','*','/','=','<','>','%'};
char del[8]={'}','{',';','(',')','[',']',','};
char *key[]={"int","void","main","char","float"};
//char *operato[]={"+","-","/","*","<",">","=","%","<=",">=","++"};
int idi=0,idj=0,k,opi=0,opj=0,deli=0,uqdi=0,uqidi=0,uqoperi=0,kdi=0,liti=0,ci=0;
int uqdeli[20],uqopi[20],uqideni[20],l=0,j;
char uqdel[20],uqiden[20][20],uqop[20][20],keyword[20][20];
char iden[20][20],oper[20][20],delem[20],litral[20][20],lit[20],constant[20][20];
int lexanalysis(char *str)
{
int i=0;
while(str[i]!='\0')
{
if(Isiden(str[i])) //for identifiers
{
while(Isiden(str[i]))
{
iden[idi][idj++]=str[i++];
}
iden[idi][idj]='\0';
idi++;idj=0;
}
else
if(str[i]=='"') //for literals
{
lit[l++]=str[i];
for(j=i+1;str[j]!='"';j++)
{
lit[l++]=str[j];
}
lit[l++]=str[j];lit[l]='\0';
strcpy(litral[liti++],lit);
i=j+1;
}
else if(Isop(str[i])) // for operators
{
while(Isop(str[i]))
{
oper[opi][opj++]=str[i++];
}
oper[opi][opj]='\0';
opi++;opj=0;
}
else
if(Isdel(str[i])) //for delemeters
{
while(Isdel(str[i]))
{
delem[deli++]=str[i++];
}
}
else
{
i++;
}
}
removeduplicate();
final();
}
int Isiden(char ch)
{
if(isalpha(ch)||ch=='_'||isdigit(ch)||ch=='.')
return 1;
else
return 0;
}
int Isop(char ch)
{
int f=0,i;
for(i=0;i<8&&!f;i++)
{
if(ch==op[i])
f=1;
}
return f;
}
int Isdel(char ch)
{
int f=0,i;
for(i=0;i<8&&!f;i++)
{
if(ch==del[i])
f=1;
}
return f;
}
int Iskey(char * str)
{
int i,f=0;
for(i=0;i<5;i++)
{
if(!strcmp(key[i],str))
f=1;
}
return f;
}
void removeduplicate()
{
int i,j;
for(i=0;i<20;i++)
{
uqdeli[i]=0;
uqopi[i]=0;
uqideni[i]=0;
}
for(i=1;i<deli+1;i++) //removing duplicate delemeters
{
if(uqdeli[i-1]==0)
{
uqdel[uqdi++]=delem[i-1];
for(j=i;j<deli;j++)
{
if(delem[i-1]==delem[j])
uqdeli[j]=1;
}
}
}
for(i=1;i<idi+1;i++) //removing duplicate identifiers
{
if(uqideni[i-1]==0)
{
strcpy(uqiden[uqidi++],iden[i-1]);
for(j=i;j<idi;j++)
{
if(!strcmp(iden[i-1],iden[j]))
uqideni[j]=1;
}
}
}
for(i=1;i<opi+1;i++) //removing duplicate operators
{
if(uqopi[i-1]==0)
{
strcpy(uqop[uqoperi++],oper[i-1]);
for(j=i;j<opi;j++)
{
if(!strcmp(oper[i-1],oper[j]))
uqopi[j]=1;
}
}
}

}
void final()
{
int i=0;
idi=0;
for(i=0;i<uqidi;i++)
{
if(Iskey(uqiden[i])) //identifying keywords
strcpy(keyword[kdi++],uqiden[i]);
else
if(isdigit(uqiden[i][0])) //identifying constants
strcpy(constant[ci++],uqiden[i]);
else
strcpy(iden[idi++],uqiden[i]);
}
// printing the outputs
printf("\n\tDelemeter are : \n");
for(i=0;i<uqdi;i++)
printf("\t%c\n",uqdel[i]);
printf("\n\tOperators are : \n");
for(i=0;i<uqoperi;i++)
{
printf("\t");
puts(uqop[i]);
}
printf("\n\tIdentifiers are : \n");
for(i=0;i<idi;i++)
{
printf("\t");
puts(iden[i]);
}
printf("\n\tKeywords are : \n");
for(i=0;i<kdi;i++)
{
printf("\t");
puts(keyword[i]);
}
printf("\n\tConstants are :\n");
for(i=0;i<ci;i++){
printf("\t");
puts(constant[i]);
}
printf("\n\tLiterals are :\n");
for(i=0;i<liti;i++)
{
printf("\t");
puts(litral[i]);
}
}
void main()
{
char str[50];
//clrscr();
printf("\nEnter the string : ");
scanf("%[^\n]c",str);
lexanalysis(str);
//getch();
return 0;
}

RESULT AND DISCUSSION


Lexical analyser separates the delemeters, operators, identifiers, keywords, literals and convert them
into leximes and those leximes are used in the compilation of code.

FINDING &
LEARNING
Usually we may think it is much harder to write a lexical analyzer generator than it is just to
write a lexical analyzer and then make changes to it to produce a different lexical analyzer. After all,
most programming languages have similar tokens. This thought has been voiced by many compiler
experts. In fact, many compiler tools allow the user to write lexical analyzer and call it from the
generated parser or to make changes to a lexical analyzer provided by the tool.

Nevertheless, the process is informative, and there may be applications for which the user may wish
to be able to generate various lexical analyzers.
OUTPUT
Prog. 5. Recursive Descent Parser
THEORY:
A recursive descent parser is a top-down parser, so called because it builds a parse tree from the top
(the start symbol) down, and from left to right, using an input sentence as a target as it is scanned
from left to right. (The actual tree is not constructed but is implicit in a sequence of function calls.)
This type of parser was very popular for real compilers in the past, but is not as popular now. The
parser is usually written entirely by hand and does not require any sophisticated tools. It is a simple
and effective technique, but is not as powerful as some of the

shift-reduce parsers -- not the one presented in class, but fancier similar ones called LR parsers.

ALGORITHM:

∙ Consider the grammar used before for simple arithmetic expressions

P ---> E
E ---> E + T | E - T | T T ---> T * S | T / S | S
S ---> F ^ S | F
F ---> ( E ) | char

∙ The above grammar won't work for recursive descent because of the left recursion in the second
and third rules. (The recursive function for E would immediately
call E recursively, resulting in an indefinite recursive regression.)

In order to eliminate left recursion, one simple method is to introduce new notation:
curley brackets, where {xx} means "zero or more repetitions of xx", and parentheses () used for
grouping, along with the or-symbol: |. Because of the many metasymbols, it is a good idea to enclose
all terminals in single quotes. Also put a '$' at the end. The resulting grammar looks as follows:

P ---> E '$'
E ---> T {('+'|'-') T} T ---> S {('*'|'/') S} S ---> F '^'
S|F
F ---> '(' E ')' | char

Now the grammar is suitable for creation of a recursive descent parser. Notice that this is a different
grammar that describes the same language, that is the same sentences or
strings of terminal symbols. A given sentence will have a similar parse tree to one given by the
previous grammar, but not necessarily the same parse tree.

One could alter the first grammar in other ways to make it work for recursive descent. For example,
one could write the rule for E as:

E ---> T '+'E|T
#include "stdio.h"
#include "conio.h"
#include "string.h""
char input[100];
char prod[100][100];
int pos=-1,l,st=-1;
char id,num;
void E();
void T();
void F();
void advance();
void Td();
void Ed();
void advance()
{
pos++;
if(pos<l)

{
if(input[pos]>='0'&& input[pos]<='9')
{
num=input[pos];
id='\0';
}
if((input[pos]>='a' || input[pos]>='A')&&(input[pos]<='z' || input[pos]<='Z'))
{
id=input[pos];
num='\0';
}
}
}
void E()
{
strcpy(prod[++st],"E->TE'");
T();
Ed();
}
void Ed()
{
int p=1;
if(input[pos]=='+')
{

p=0;
strcpy(prod[++st],"E'->+TE'");
advance();
T();
Ed();
}
if(input[pos]=='-')
{
p=0;
strcpy(prod[++st],"E'->-TE'");
advance();
T();
Ed();
}
if(p==1)
{
strcpy(prod[++st],"E'->null");
}
}
void T()
{
strcpy(prod[++st],"T->FT'");
F();

Td();
}
void Td()
{
int p=1;
if(input[pos]=='*')
{
p=0;
strcpy(prod[++st],"T'->*FT'");
advance();
F();
Td();
}
if(input[pos]=='/')
{
p=0;
strcpy(prod[++st],"T'->/FT'");
advance();
F();
Td();
}
if(p==1)
strcpy(prod[++st],"T'->null");
}

void F()
{
if(input[pos]==id)
{
strcpy(prod[++st],"F->id");
advance();
}
if(input[pos]=='(')
{
strcpy(prod[++st],"F->(E)");
advance();
E();
if(input[pos]==')')
{
advance();
}
}
if(input[pos]==num)
{
strcpy(prod[++st],"F->num");
advance();
}
}

int main()
{
int i;
printf("Enter Input String ");
scanf("%s",input);
l=strlen(input);
input[l]='$';
advance();
E();
if(pos==l)
{
printf("String Accepted\n");
for(i=0;i<=st;i++)
{
printf("%s\n",prod[i]);
}
}
else
{
printf("String rejected\n");
}
getch();
return 0;
}
RESULT & DISCUSSION

If the parser matches all the input letters in an ordered manner. The string is accepted. Predictive
parser is a recursive descent parser, which has the capability to predict which production is to be
used to replace the input string. The predictive parser does not suffer from backtracking.
FINDING AND
LEARNING
In this experiment we learnt that Recursive descent is a simple
parsing algorithm that is very easy to implement. It is a top-down
parsing algorithm because it builds the parse tree from the top (the start symbol) down.

The main limitation of recursive descent parsing (and all top-down parsing algorithms in
general) is that they only work on grammars with certain properties. For example, if a
grammar contains any left recursion, recursive descent parsing doesn't work.
Prog. 6: Write a program to left factor the given grammar
THEORY:

Left factoring is anpther useful grammar trrnsfromation used in parsing. The general Idea is to
replace the production

ALGORITHM:

Let the given grammar: A-->ab1 | ab2 | ab3

1) We can see that, for every production, there is a common prefix & if we choose any
production here, it is not confirmed that we will not need to backtrack.

2) It is non deterministic, because we cannot choice any production and be assured that
we will reach at our desired string by making the correct parse tree. But if we rewrite the grammar in
a way that is deterministic and also leaves us to be flexible enough to make it any string that may be
possible without backtracking.... it will be:

· A --> aA', A' --> b1 | b2| b3 now if we are asked to make the parse tree for string ab2.... we
don't need back tracking. Because we can always choose the correct production when we get A' thus
we will generate the correct parse tree.
· Left factoring is required to eliminate non-determinism of a grammar. Suppose a grammar, S
-> abS | aSb
· Here, S is deriving the same terminal a in the production rule (two alternative choices for S),
which follows non-determinism. We can rewrite the production to defer the decision of S as-

S -> aS'
#include<stdio.h>
#include<string.h>
int main()
{
char gram[20],part1[20],part2[20],modifiedGram[20],newGram[20],tempGram[20];
int i,j=0,k=0,l=0,pos;
printf("Enter Production : A->");
gets(gram);
for(i=0;gram[i]!='|';i++,j++)
part1[j]=gram[i];
part1[j]='\0';
for(j=++i,i=0;gram[j]!='\0';j++,i++)
part2[i]=gram[j];
part2[i]='\0';
for(i=0;i<strlen(part1)||i<strlen(part2);i++)
{
if(part1[i]==part2[i])
{
modifiedGram[k]=part1[i];
k++;
pos=i+1;
}
}
for(i=pos,j=0;part1[i]!='\0';i++,j++){
newGram[j]=part1[i];
}
newGram[j++]='|';
for(i=pos;part2[i]!='\0';i++,j++){
newGram[j]=part2[i];
}
modifiedGram[k]='X';
modifiedGram[++k]='\0';
newGram[j]='\0';
printf("\n A->%s",modifiedGram);
printf("\n X->%s\n",newGram);
}

RESULT AND DISCUSSION


Difference between Left Factoring and Left Recursion
1. Left recursion: when one or more productions can be reached from themselves with no tokens
consumed in-between.

2. Left factoring: a process of transformation, turning the grammar from a left-recursive form to an
equivalent non-left-recursive form.
FINDING & LEARNING

In this experiment we learnt how to remove the left factoring from the grammar and the difference
between left recursion and left factoring and how this can be helpful in converting a grammar to

suitable form
Prog. 7: Write a program to convert left recursive grammar to right
recursive grammar
THEORY:
The production is left-recursive if the leftmost symbol on the right side is the same as the non
terminal on the left side. For example,
expr → expr + term.

ALGORITHM:
For each rule which contains a left-recursive option, A --> A |
introduce a new nonterminal A' and rewrite the rule as

A --> A'
A' --> | A' Thus the production:
E --> E + T | T
is left-recursive with "E" playing the role of "A","+ T" playing the role of , and "T" playing the role
of A'. Introducing the new nonterminal E', the production can be replaced by:

E --> T E'
E' --> | + T E'

For example, the left-recursive grammar is: E → E + T | T


E → T * F | F F → (E) | id.
We can redefine E and T without left-recursion as: E → TE`
E`→ + TE` | E
T → FT`
T → * FT` | E
F → (E) | id

#include<bits/stdc++.h>
using namespace std;
int main()
{
char prod[10][20],current_p[20];
int n,index=3,i=0,k=0;
char alpha[20],beta[20],non_t;
cout<<"Enter the number of prod\n";
cin>>n;
cout<<"Enter the productions\n";
for(i=0;i<n;++i)
cin>>prod[i];
for(i=0;i<n;++i)
{
strcpy(current_p,prod[i]);
cout<<"\nThe production: "<<current_p<<"\n";
non_t = current_p[0];
if(non_t == current_p[index])
{
strcpy(alpha,current_p+index+1);
//cout<<alpha;
cout<<"is left recursive\n";
while(current_p[index]!='\0' && current_p[index]!='|' )
{
index++;
k++;
}
if(current_p[index] == '|')
{
strcpy(beta,current_p+index+1);
alpha[k-1]='\0';
cout<<"Grammar without left recursion is\n";
cout<<non_t<<"->"<<beta<<non_t<<"\'"<<"\n";
cout<<non_t<<"\'->"<<alpha<<non_t<<"\'|epsilon";
}
else if(current_p[index] == '\0')
{
cout<<"Grammar without left recursion is\n";
cout<<non_t<<"->"<<non_t<<"\'\n";
cout<<non_t<<"\'->"<<alpha<<non_t<<"\'|epsilon";
}
else
cout<<"Ir-reducible\n";
}
else
cout<<"Is not left-recursive\n";
index = 3;
}
return 0;
}
RESULT AND DISCUSSION
All common parsing algorithm process left-to-right, which is the first L in LL and LR. Top-down (LL)
parsing finds a leftmost derivation (the second L), while bottom-up (LR) parsing finds a rightmost
derivation (the R).

FINDING &
LEARNING
In this experiment we learnt how to convert the left recursive grammar to right recursive grammar
and benefits of it over the former one. Although the above transformations preserve the language
generated by a grammar, they may change the parse trees that witness strings' recognition. With
suitable bookkeeping, tree rewriting can recover the originals, but if this step is omitted, the
differences may change the semantics of a parse.
Prog. 8: Write a program to compute FIRST and FOLLOW

THEORY:
FIRST( If a is any string of grammar symbols, let FIRST(a) be the set of terminals that begin the strings
derived from a. If a Þ e then e is also in FIRST(a).

FOLLOW(A) Define FOLLOW(A), for nonterminal A, to be the set of terminals a that can appear
immediately to the right of A in some sentential form, that is, the set of terminals a such that there
exists a derivation of the form SÞaAab for some a and b. Note that there may, at some time during
the derivation, have been symbols between A and a, but if so, they derived e and disappeared. If A
can be the rightmost symbol in some sentential form, then $, representing the input right endmarker,
is in FOLLOW(A).

ALGORITHM:
To compute FIRST(X) for all grammar symbols X, apply the following rules until no more terminals or
e can be added to any FIRST set:

1. If X is terminal, then FIRST(X) is {X}.

2. If X ® e is a production, then add e to FIRST(X).

3. If X is nonterminal and X ®Y1 Y2 ... Yk . is a production, then place a in FIRST(X) if for some i, a is in
FIRST(Yi ), and e is in all of FIRST(Y1), ... , FIRST(Yi-1); that is, Y1, ... ,Yi-1 Þ e. If e is in FIRST(Yj ) for all j
= 1, 2, ... , k, then add e to FIRST(X). For example, everything in FIRST(Y1) is surely in FIRST(X). If Y1
does not derive e, then we add nothing more to FIRST(X), but if Y1Þ e, then we add FIRST(Y2) and so
on.

To compute FOLLOW(A) for all nonterminals A, apply the following rules until nothing can be added
to any FOLLOW set:

1. Place $ in FOLLOW(S), where S is the start symbol and $ is the input right endmarker.

2. If there is a production A Þ aBb, then everything in FIRST(b), except for e, is placed in


FOLLOW(B).

3. If there is a production A Þ aB, or a production A Þ aBb where FIRST(b) contains e (i.e., b


Þe), then everything in FOLLOW(A) is in FOLLOW(B).
EXAMPLE

E ® T E’
E’® + T E’ | e
T ® F T’
T’® * F T’ | e
F ® ( E ) | id

FIRST(E) = FIRST(T) = FIRST(F) = {( , id}


FIRST(E’) = {+, e} FIRST(T’) = {*, e}
FOLLOW(E) = FOLLOW(E’) = {) , $} FOLLOW(T) = FOLLOW(T’) = {+, ), $} FOLLOW(F) = {+, *, ), $}

#include<stdio.h>
#define max 10
#define MAX 15
void ffun(int,int);
void fun(int,int[]);
void follow(int i);
char array[max][MAX],temp[max][MAX];
int c,n,t;
int fun2(int i,int j,int p[],int key)
{
int k;
if(!key){
for(k=0;k<n;k++)
if(array[i][j]==array[k][0])
break;
p[0]=i;p[1]=j+1;
fun(k,p);
return 0;
}
else{
for(k=0;k<=c;k++){
if(array[i][j]==temp[t][k])
break;
}
if(k>c)return 1;
else return 0;
}
}
void fun(int i,int p[])
{
int j,k,key;
for(j=2;array[i][j]!=NULL;j++)
{
if(array[i][j-1]=='/'){
if(array[i][j]>='A'&&array[i][j]<='Z'){
key=0;
fun2(i,j,p,key);
}
else{
key=1;
if(fun2(i,j,p,key))
temp[t][++c]=array[i][j];
if(array[i][j]=='@'&&p[0]!=-1){ //taking ,@, as null symbol.
if(array[p[0]][p[1]]>='A'&&array[p[0]][p[1]]<='Z'){
key=0;
fun2(p[0],p[1],p,key);
}
else
if(array[p[0]][p[1]]!='/'&&array[p[0]][p[1]]!=NULL){
if(fun2(p[0],p[1],p,key))
temp[t][++c]=array[p[0]][p[1]];
}
}
}
}
}
}
char fol[max][MAX],ff[max];int f,l,ff0;
void follow(int i)
{
int j,k;
for(j=0;j<=ff0;j++)
if(array[i][0]==ff[j])
return;
if(j>ff0)ff[++ff0]=array[i][0];
if(i==0)fol[l][++f]='$';
for(j=0;j<n;j++)
for(k=2;array[j][k]!=NULL;k++)
if(array[j][k]==array[i][0])
ffun(j,k);
}
void ffun(int j,int k)
{
int ii,null=0,tt,cc;
if(array[j][k+1]=='/'||array[j][k+1]==NULL)
null=1;
for(ii=k+1;array[j][ii]!='/'&&array[j][ii]!=NULL;ii++){
if(array[j][ii]<='Z'&&array[j][ii]>='A')
{
for(tt=0;tt<n;tt++)
if(temp[tt][0]==array[j][ii])break;
for(cc=1;temp[tt][cc]!=NULL;cc++)
{
if(temp[tt][cc]=='@')null=1;
else fol[l][++f]=temp[tt][cc];
}
}
else fol[l][++f]=array[j][ii];
}
if(null)follow(j);
}
int main()
{
int p[2],i,j;
printf("Enter the no. of non-terminals :");
scanf("%d",&n);
printf("Enter the productions :\n");
for(i=0;i<n;i++)
scanf("%s",array[i]);
for(i=0,t=0;i<n;i++,t++){
c=0,p[0]=-1,p[1]=-1;
temp[t][0]=array[i][0];
fun(i,p);
temp[t][++c]=NULL;
printf("First(%c) : [ ",temp[t][0]);
for(j=1;j<c;j++)
printf("%c,",temp[t][j]);
printf("\b ].\n");
}

for(i=0,l=0;i<n;i++,l++)
{
f=-1;ff0=-1;
fol[l][++f]=array[i][0];
follow(i);
fol[l][++f]=NULL;
}
for(i=0;i<n;i++)
{
printf("\nFollow[%c] : [ ",fol[i][0]);
for(j=1;fol[i][j]!=NULL;j++)
printf("%c,",fol[i][j]);
printf("\b ]");
}
return 0;
}

FINDING &
LEARNING
In this experiment we learnt how to find the first and follow of a grammar and how this
can be used to make different parsers such as LL1, SR1,and CLR1.
Prog. 9: Write a program to construct LL(1) parsing table

THEORY:
The first L means the input string is processed from left to right.
The second L means the derivation will be a leftmost derivation (the leftmost variable is
replaced at each step).

1 means that one symbol in the input string is used to help guide the parse.

ALGORITHM:

EXAMPLE

#include<stdio.h>
#include<conio.h>
#include<string.h>
int main()
{
char pro[10][10],first[10][10],follow[10][10],nt[10],ter[10],res[10][10][10],temp[10];
int npro,noter=0,nont=0,i,j,k,flag=0,count[10][10],row,col,l,m,n,index;
for(i=0;i<10;i++)
{
for(j=0;j<10;j++)
{
count[i][j]=NULL;
for(k=0;k<10;k++){
res[i][j][k]=NULL; }
}
}
printf("Enter the no of productions:");
scanf("%d",&npro);
printf("Enter the productions:");
for(i=0;i<npro;i++)
{
scanf("%s",pro[i]);
}
for(i=0;i<npro;i++)

{
flag=0;
for(j=0;j<nont;j++)
{
if(nt[j]==pro[i][0])
{
flag=1;
}
}
if(flag==0)
{
nt[nont]=pro[i][0];
nont++;
}
}
printf("\nEnter the first values:\n");
for(i=0;i<nont;i++)
{
printf("First value(%c):",nt[i]);
scanf("%s",first[i]);
}
printf("\nEnter the follow values:\n");
for(i=0;i<nont;i++)
{
printf("Follow value(%c):",nt[i]);

scanf("%s",follow[i]);
}
for(i=0;i<nont;i++)
{
flag=0;
for(j=0;j<strlen(first[i]);j++)
{
for(k=0;k<noter;k++)
{
if(ter[k]==first[i][j])
{
flag=1;
}
}
if(flag==0)
{
if(first[i][j]!='#')
{
ter[noter]=first[i][j];
noter++;
}
}
}
}
for(i=0;i<nont;i++)

{
flag=0;
for(j=0;j<strlen(follow[i]);j++)
{
for(k=0;k<noter;k++)
{
if(ter[k]==follow[i][j])
{
flag=1;
}
}
if(flag==0)
{
ter[noter]=follow[i][j];
noter++;
}
}
}
for(i=0;i<nont;i++)
{
for(j=0;j<strlen(first[i]);j++)
{
flag=0;
if(first[i][j]=='#')
{

col=i;
for(m=0;m<strlen(follow[col]);m++)
{
for(l=0;l<noter;l++)
{
if(ter[l]==follow[col][m])
{
row=l;
}
}
temp[0]=nt[col];
temp[1]='-' ;
temp[2]='>';
temp[3]='#';
temp[4]='\0';
printf("temp %s",temp);
strcpy(res[col][row],temp);
count[col][row]+=1;
for(k=0;k<10;k++){
temp[k]=NULL; }
}
}
else{
for(l=0;l<noter;l++)

{
if(ter[l]==first[i][j])
{
row=l;
}
}
for(k=0;k<npro;k++){
if(nt[i]==pro[k][0])
{
col=i;
if((pro[k][3]==first[i][j])&&(pro[k][0]==nt[col]))
{
strcpy(res[col][row],pro[k]);
count[col][row]+=1;
}
else
{
if(((pro[k][3]))&&(pro[k][0]==nt[col]))
{
flag=0;
for(m=0;m<nont;m++)
{
if(nt[m]==pro[k][3]){index=m;flag=1;}
}
if(flag==1){

for(m=0;m<strlen(first[index]);m++)
{if(first[i][j]==first[index][m])
{strcpy(res[col][row],pro[k]);
count[col][row]+=1;}
}
}
}}}}}
}}
printf("LL1 Table\n\n");
flag=0;
for(i=0;i<noter;i++)
{
printf("\t%c",ter[i]);
}
for(j=0;j<nont;j++)
{
printf("\n\n%c",nt[j]);
for(k=0;k<noter;k++)
{
printf("\t%s",res[j][k]);
if(count[j][k]>1){flag=1;}
}
}
if(flag==1){printf("\nThe given grammar is not LL1");}
else{printf("\nThe given grammar is LL1");}

return 0;
}

RESULT &
DISCUSSION

Problems faced in LL1


1. Left Recursion
2. Order of alternatives is important
3. Failure
4. Each LL1 grammar is unambiguous.

FINDING &
LEARNING

In this experiment we learnt how to construct an LL1 parsing table from a given
grammar suing the concepts of First and Follow and how this parsing table is
stronger and better as compared to the previous ones why this should be used. If
the top is $ and on the input stream there is also a $ then the parser reports that it
has successfully parsed the input, otherwise it reports an error. In both cases the
parser will stop.

You might also like