CD Cse Record
CD Cse Record
DESIGN
LAB
RECORD
CERTIFICATE
1. Create a lex specification file to recognize words, characters, blank spaces and lines
2. Compile it by using LEX compiler to get ‘C’ file
4. Run the executable file to get the desired output by providing necessary input
Program:
%{
int c=0,w=0,l=0,s=0;
%}
%%
[\n] l++;
%%
if(argc==2)
yyin=fopen(argv[1],"r");
yylex();
printf("\nCHARACTER=%d",c);
printf("\nLINES=%d",l);
else
printf("ERROR");
}
Input File:
Hello how are you
Output:
lex filename.l
cc lex.yy.c -ll
./a.out in.txt
4. Run the executable file to get the desired output by providing necessary
input
Program:
%{
/*Program to identify a integer/float precision*/
%}
integer ([0-9]+)
float ([0-9]+\.[0-9]+)|([+|-]?[0-9]+\.[0-9]*[e|E][+|-][0-9]*)
%%
{integer}
printf("\n %s is an integer.\n",yytext);
{float} printf("\n %s is a floating number.\n",yytext);
%%
main()
{
yylex();
}
int yywrap()
{
return 1;
}
Output:
lex filename.l
cc lex.yy.c -ll
./a.out
2
COMPILER DESIGN LAB 2021 – 2022 6
2 is an integer
2.3
2.3 is a floating number
Concepts:
sum Identifier
= Assignment operator
3 Integer literal
+ Addition operator
2 Integer literal
; End of statement
Definition section
%%
Rules section
%%
C code section
OUTPUT:
$ lex filename.l
$ cc lex.yy.c -ll
$ ./a.out
if a>b then a else b
<265>
<263,a>
<262,260>
<263,b>
<266>
<263,a>
<267>
<263,b>
{definitions}
%%
{rules}
%%
{user subroutines}
where the definitions and the user subroutines are often omitted.
The second %% is optional, but the first is required to mark the
beginning of the rules. The absolute minimum Lex program is thus
%%
to look for the string integer in the input stream and print the
message ``found keyword INT'' whenever it appears. In this
example the host procedural language is C and the C library
function printf is used to print the string. The end of the expression
is indicated by the first blank or tab character. If the action is
merely a single C expression, it can just be given on the right side
of the line; if it is compound, or takes more than a line, it should be
enclosed in braces. As a slightly more useful example, suppose it is
desired to change a number of words from British to American
spelling. Lex rules such as
colour printf("color");
mechanise printf("mechanize");
petrol printf("gas");
would be a start. These rules are not quite enough, since theword
petroleum would become gaseum; a way of dealing with thiswill
be described later.
integer
"\[]^-?.*+|()$/{}%<>
xyz"++"
"xyz++"
xyz\+\+
[a-z0-9<>_]
indicates the character class containing all the lower case letters,
the digits, the angle brackets, and underline. Ranges may be given
in either order. Using - between any pair of characters which are
not both upper case letters, both lower case letters, or both digits is
implementation dependent and will get a warning message. (E.g.,
[0-z] in ASCII is many more characters than it is in EBCDIC). If it
is desired to include the character - in a character class, it should be
first or last; thus
[-+0-9]
[^abc]
control characters; or
[\40-\176]
ab?c
a*
a+
[a-z]+
(ab|cd)
ab|cd
(ab|cd+)?(ef)*
matches such strings as abefef, efefef, cdef, or cddd; but not abc,
abcd, or abcdef.
%%
Var.c
/* This is LEx Tool Program */
#include<stdio.h>
main()
{
int a,b;
}
/* is a COMMENT
*/ is a COMMENT
FUNCTION
main(
)
BLOCK BEGINS
int is a KEYWORD
a IDENTIFIER,
b IDENTIFIER;
BLOCK ENDS
#include<stdio.h>
#include<string.h>
void main()
{
char input[100], l[50],r[50],temp[10],tempprod[20],productions[25][50];
int i=0,j=0,flag=0,consumed=0;
printf(“Enter the Productions:”);
scanf(“ %ls->%s”, l, r);
printf(“ %s”, r);
while(sscanf(r+consumed, “ % [^l] s”, temp) == 1 &&consumed<=strlen(r))
{
if(temp[0] == l[0])
{
flag = 1;
sprintf(productions[i++], “%s->%s%s ‘\0”, l,temp+1,1);
}
else
sprintf(productions[i++], “%s->%s%s ‘\0”,l, temp,1);
consumed += strlen(temp)+1;
}
if(flag==1)
{
sprintf(productions[i++], “%s->€ \0”, 1);
printf(“the productions after eliminating left recursion are:\n”);
for(j=0;j<i;j++)
printf(“%s \n “, productions[j]);
}
else
printf(“ The Given Grammar has no Left Recursion”);
}
OUTPUT:
Enter the Productions:
E->E+T
Initial Example:
P ---> E
E ---> E + T | E - T | T
T ---> T * S | T / S | S
S ---> F ^ S | F
F ---> ( E ) | char
The above grammar won't work for recursive descent
because of the left recursion in the second and third rules.
(The recursive function for E would immediately
call E recursively, resulting in an indefinite recursive
regression.)
P ---> E '$'
E ---> T {('+'|'-') T}
T ---> S {('*'|'/') S}
S ---> F '^' S | F
F ---> '(' E ')' | char
E ---> T '+' E | T
This eliminates the left recursion, and leaves the language the
same, but it changes the semantics of the language. With this
change, the operator '+' would associate from right to left,
instead of from left to right, so this method is not acceptable.
#include<stdio.h>
//#include<conio.h>
#include<string.h>
char input[100];
int i,l;
int main()
{
printf("recursive decent parsing for the grammar");
printf("\n E->TEP|\nEP->+TEP|@|\nT->FTP|\nTP->*FTP|@|\nF->(E)|ID\
n");
printf("enter the string to check:");
scanf("%s",input);
if(E()){
if(input[i]=='$')
printf("\n string is accepted\n");
else
printf("\n string is not accepted\n");
}
}
E(){
if(T()){
if(EP())
return(1);
else
return(0);
}
else
return(0);
}
EP(){
if(input[i]=='+'){
i++;
if(T()){
if(EP())
return(1);
else
return(0);
OUTPUT:
$ cc rdp.c
$ ./a.out
recursive decent parsing for the grammar
E->TEP|
EP->+TEP|@|
T->FTP|
TP->*FTP|@|
F->(E)|ID
enter the string to check:(i+i)*i
string is accepted
push(S);
read_next_token();
repeat
X = pop();
if (X is a terminal or '$')
if (X == current_token)
read_next_token();
else error();
else if (M[X,current_token] == "X ::= Y1 Y2 ... Yk")
{ push(Yk);
...
push(Y1);
}
else error();
until X == '$';
1) E ::= T E' $
2) E' ::= + T E'
3) | - T E'
4) |
5) T ::= F T'
6) T' ::= * F T'
7) | / F T'
8) |
9) F ::= num
10) | id
FIRST[F] is of course {num,id}. This means
that FIRST[T]=FIRST[F]={num,id}. In
addition, FIRST[E]=FIRST[T]={num,id}.
Similarly, FIRST[T'] is {*,/} and FIRST[E'] is {+,-}.
To summarize, we have:
FIRST FOLLOW
E {num,id} {$}
E' {+,-} {$}
T {num,id} {+,-,$}
T' {*,/} {+,-,$}
F {num,id} {+,-,*,/,$}
Now, given the above table, we can easily construct the parsing
table. For each t FIRST[a], add X : : = a to M[X, t]. If a can be
reduced to the empty sequence, then for each t FOLLOW[X],
add X : : = a to M[X,t].
num id + - * / $
E 1 1
E' 2 3 4
T 5 5
T' 8 8 6 7 8
F 9 10
We will parse now the string x-2*y$ using the above parse table:
0) G := S $
1) S ::= ( L )
2) S ::= a
3) L ::= S L'
4) L' ::= , S L'
5) L' ::=
The first/follow tables are:
FIRST FOLLOW
G (a
S (a ,)$
L (a )
L' , )
( ) a , $
G 0 0
S 1 2
L 3 3
L' 5 4
OUTPUT:
$ cc preparsing.c
$ ./a.out
enter input string:i+i*i$
input is parsed
The parser applies the rule found in the table by matching the top-
most symbol on the stack (row) with the current symbol in the
input stream (column).
When the parser starts, the stack already contains two symbols:
[ S, $ ]
1. S → F
2. S → ( S + F )
3. F → a
( )a+$
F- -3- -
[ (, S, +, F, ), $ ]
Since the '(' from the input stream did not match the top-
most symbol, 'S', from the stack, it was not removed, and
remains the next-available input symbol for the following
step.
In the second step, the parser removes the '(' from its input
stream and from its stack, since they match. The stack now
becomes:
[ S, +, F, ), $ ]
[ F, +, F, ), $ ]
The parser now has an 'a' on its input stream and an 'F' as its
stack top. The parsing table instructs it to apply rule (3) from
the grammar and write the rule number 3 to the output
stream. The stack becomes:
[ a, +, F, ), $ ]
In the next two steps the parser reads the 'a' and '+' from the
input stream and, since they match the next two items on the
stack, also removes them from the stack. This results in:
[ F, ), $ ]
In the next three steps the parser will replace 'F' on the stack
by 'a', write the rule number 3 to the output stream and
remove the 'a' and ')' from both the stack and the input
stream. The parser thus ends with '$' on both its stack and its
input stream.
In this case the parser will report that it has accepted the
input string and write the following list of rule numbers to
the output stream:
[ 2, 1, 3, 3 ]
This is indeed a list of rules for a leftmost derivation of
the input string, which is:
S→(S+F)→(F+F)→(a+F)→(a+a)
OUTPUT:
$ cc ll1parsing.c
$ ./a.out
stack input
+i*i$
i*i$
*i$
i$
$
SUCCESS
SLR(1)
where the S stands for simple SLR(1) parsers use the
same LR(0) configurating sets and have the same table structure
and parser operation, The difference comes in assigning table
actions, where we are going to use one token of lookahead to help
arbitrate among the conflicts. If we think back to the kind of
conflicts we encountered in LR(0) parsing, it was the reduce
actions that cause us grief. A state in an LR(0) parser
can have at most one reduce action and cannot have both shift and
reduce instructions. Since a reduce is indicated for any completed
item, this dictates that each completed item must be in a state by
itself. But let's revisit the assumption that if the item is complete,
the parser must choose to reduce. Is that always appropriate? If
we peeked at the next upcoming token, it may tell us something
that invalidates that reduction. If the sequence on top of the stack
could be reduced to the non-terminal A, what tokens do we expect
to find as the next input? What tokens would tell us that the
reduction is not appropriate?
Perhaps Follow(A) could be useful here!
COMPILER DESIGN LAB 2021 – 2022 45
The simple improvement that SLR(1) makes on the basic LR(0)
parser is to reduce only if the next input token is a member of the
follow set of the non-terminal being reduced.
When filling in the table, we don't assume a reduce on all inputs as
we did in LR(0), we selectively choose the reduction only when
the next input symbols in a member of the follow set. To be more
precise, here is the algorithm for SLR(1) table construction (note
all steps are the same as for LR(0) table construction except for 2a)
Let's consider those changes at the end of the LR(0) handout to the
simplified expression grammar that would have made it no longer
LR(0). Here is the version with the addition
of array access:
E' –> E
E –> E + T | T
T –> (E) | id | id[E]
Here are the first two LR(0) configurating sets entered if id is the
first token of the input.
In an LR(0) parser, the set on the right has a shift-reduce conflict.
However, an SLR(1)
will compute Follow(T) = { + ) ] $ } and only enter the reduce
action on those tokens. The
input [ will shift and there is no conflict. Thus this grammar is
SLR(1) even though it is
not LR(0).
All LR(0) grammars are SLR(1) but the reverse is not true, as the
two extensions to our
int gotot[12][3]={1,2,3,-1,-1,-1-1,-1,-1,-1,-1,-1,8,2,3,-1,-1,-1,-1,9,3,-1,-1,10,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
int a[10];
char b[10];
int top=-1,btop=-1,i;
void push(int k)
{
if(top<9)
a[++top]=k;
}
void pushb(char k)
{
if(btop<9)
b[++btop]=k;
}
char TOS()
{
return a[top];
}
COMPILER DESIGN LAB 2021 – 2022 50
void pop()
{
if(top>=0)
top--;
}
void popb()
{
if(btop>=10)
b[btop--]='\0';
}
void display()
{
for(i=0;i<top;i++)
printf("%d%c",a[i],b[i]);
}
void display1(char p[],int m)
{
int l;
printf("\t\t");
for(l=m;p[l]!='\0';l++)
printf("%c",p[l]);
printf("\n");
}
void error()
{
printf("syntax error");
}
void reduce(int p)
{
int k,ad;
char src,*dest;
switch(p)
{
case 1:dest="E+T";
src='E';
break;
case 2:dest="T";
src='E';
COMPILER DESIGN LAB 2021 – 2022 51
break;
case 3:dest="T*F";
src='T';
break;
case 4:dest="F";
src='T';
break;
case 5:dest="(E)";
src='F';
break;
case 6:dest="i";
src='F';
break;
default :dest="\0";
src='\0';
break;
}
for(k=0;k<strlen(dest);k++)
{
pop();
popb();
}
pushb(src);
switch(src)
{
case 'E':ad=0;
break;
case 'T':ad=1;
break;
case 'F':ad=2;
break;
default:ad=-1;
break;
}
push(gotot[TOS()][ad]);
}
int main()
{
COMPILER DESIGN LAB 2021 – 2022 52
int j,st,ic;
char ip[20]="\0",an;
printf("enter any string");
scanf("%s",ip);
push(0);
display();
printf("\t%s\n",ip);
for(j=0;ip[j]!='\0';)
{
st=TOS();
an=ip[j];
if(an>='a'&&an<='z') ic=0;
else if(an=='+') ic=1;
else if(an=='*') ic=2;
else if(an=='(') ic=3;
else if(an==')') ic=4;
else if(an=='$') ic=5;
else
{
error();
break;
}
if(axn[st][ic][0]==100)
{
pushb(an);
push(axn[st][ic][1]);
display();
j++;
display1(ip,j);
}
if(axn[st][ic][0]==101)
{
reduce(axn[st][ic][1]);
display();
display1(ip,j);
}
if(axn[st][ic][1]==102)
{
printf("given string is accepted");
COMPILER DESIGN LAB 2021 – 2022 53
break;
}
}
return(0);
}
OUTPUT:
$ cc slr.c
$ ./a.out
enter any stringi+i*i
i+i*i
0i +i*i
0i5 +i*i
0i53 +i*i
YACC Concepts:
Return values:
In addition to specifying the return code, the lex parse can return a
symbol that is put on top of the stack, so that yacc can access it.
This symbol is returned in the variable yylval. By default, this is
defined as an int, so the lex program would have extern int llval;
%%
[0-9]+ {llval=atoi(yytext); return NUMBER;}
COMPILER DESIGN LAB 2021 – 2022 56
If more than just integers need to be returned, the specifications in
the yacc code become more complicated. Suppose we want to
return double values, and integer indices in a table.
The following three actions are needed.
1. The possible return values need to be stated:
%union {int ival; double dval;}
2. These types need to be connected to the possible return tokens:
2%token <ival> INDEX
%token <dval> NUMBER
3. The types of non-terminals need to be given:
%type <dval> expr
%type <dval> mulex
%type <dval> term
The generated .h file will now have
#define INDEX 258
#define NUMBER 259
typedef union {int ival; double dval;} YYSTYPE;
extern YYSTYPE yylval;
Rules section:
The rules section contains the grammar of the language you want
to parse. This looks like
name1 : THING something OTHERTHING {action}
| othersomething THING {other action}
name2 : .....
This is the general form of context-free grammars, with a set of
actions associated with each matching right-hand side. It is a good
convention to keep non-terminals (names that can be expanded
further) in lower case and terminals (the symbols that are finally
matched) in upper case.
The terminal symbols get matched with return codes from the lex
tokenizer. They are typically defines coming from %token
definitions in the yacc program or character values;
1+2/3*6
5
such as:
#include<stdio.h>
#include<string.h>
void pm();
void plus();
void div();
int i,ch,j,l;
char ex[10],ex1[10],exp1[10],ex2[10];
main()
{
while(1)
{
printf("\n 1.Assignment\n 2.Arithmatic\n 3.exit\n ENTER THE
CHOICE:");
scanf("%d",&ch);
switch(ch)
{
case 1:printf("\n enter the expression with assignment operator:");
scanf("%s",ex1);
l=strlen(ex1);
ex2[0]='\0';
i=0;
while(ex1[i]!='=')
{
i++;
}
strncat(ex2,ex1,i);
strrev(ex1);
exp1[0]='\0';
strncat(exp1,ex1,l-(i+1));
strrev(exp1);
printf("3 address code:\n temp=%s \n %s=temp\n",exp1,ex2);
break;
COMPILER DESIGN LAB 2021 – 2022 61
case 2:printf("\n enter the expression with arithmatic operator:");
scanf("%s",ex);
strcpy(ex1,ex);
l=strlen(ex1);
exp1[0]='\0';
for(i=0;i<l;i++)
{
if(ex1[i]=='+'||ex1[i]=='-')
{
if(ex1[i+2]=='/'||ex1[i+2]=='*')
{
pm();
break;
}
else
{
plus();
break;
}
}
else if(ex1[i]=='/'||ex1[i]=='*')
{
div();
break;
}
}
break;
}
}
break;
case 3:exit(0);
}
}
}
void pm()
COMPILER DESIGN LAB 2021 – 2022 62
{
strrev(exp1);
j=l-i-1;
strncat(exp1,ex1,j);
strrev(exp1);
printf("3 address code:\n temp=%s\n temp1=%c%c temp\
n",exp1,ex1[j+2],ex1[j]);
}
void div()
{
strncat(exp1,ex1,i+2);
printf("3 address code:\n temp=%s\n temp1=temp%c%c\
n",exp1,ex1[l+2],ex1[i+3]);
}
void plus()
{
strncat(exp1,ex1,i+2);
printf("3 address code:\n temp=%s\n temp1=temp%c%c\
n",exp1,ex1[l+2],ex1[i+3]);
}
1.Assignment
2.Arithmetic
3.Exit
Enter the choice:2
Enter the exp with arithmetic operator:a*b+c
3 address code:
temp=a*b
temp1=temp+c
1.Assignment
2.Arithmetic
3.Exit
Enter the choice:3
Input.txt
/t3 t2 t2
uminus t2 t2
print t2
+t1 t3 t4
print t4
OUTPUT:
$ cc codegen.c
$ ./a.out
LOADt2,R0
LOAD t2,R1
DIV R1,R0
STORE R0,uminus
OUTt2
LOADt3,R0
LOAD t4,R1
ADD R1,R0
STORE R0,print
LOADt2,R0
LOAD t2,R1
DIV R1,R0
STORE R0,uminus
OUTt2
LOADt3,R0
LOAD t4,R1
ADD R1,R0
STORE R0,print
Concepts:
Constant Folding
Example:
int f (void)
{
return 3 + 5;
}
int f (void)
{
return 8;
}
Notes:
int New_Index = 0;
int main() {
FILE *In_file, *Out_file;
char Buffer[100], ch;
int i = 0;
In_file = fopen("code.txt", "r");
Out_file = fopen("output.txt", "w");
while(1) {
ch = fgetc(In_file);
i = 0;
while(1) {
if(ch == '\n') break;
fflush(Out_file);
strcpy(temp, "");
for(i=0; i<n; i++) {
strcat(temp, Token[i]);
if(Token[i+1][0]!=',' || Token[i+1][0]!=';')
COMPILER DESIGN LAB 2021 – 2022 74
strcat(temp, "");
}
strcat(temp, "\n\0");
fwrite(&temp, strlen(temp), 1, Out_file);
}
Token[i][j++] = str[k++];
Token[i++][j] = '\0';
if(str[k] == '=' || str[k] == '/' || str[k] == '+' || str[k] == '-' ||
str[k] == '*' || str[k] == ',' || str[k] == ';')
{
Token[i][0] = str[k++];
Token[i++][1] = '\0';
}
if(str[k] == '\0')
break;
}
return i;
}
Input.txt
#include<stdio.h>
main()
{
float pi=3.14,r,a;
a=pi*r*r;
printf("a=%f",a);
COMPILER DESIGN LAB 2021 – 2022 75
return 0;
}
OUTPUT:
$ cc codeop.c
$ ./a.out
$ vi output.txt
Output.txt
#include<stdio.h>
main()
{
Float pi=3.14,r,a;
a=3.14*r*r;
printf("a=%f",a);
return0;
}