Compilers part 1 and 2

Introduction

In this coursework we were asked to parse a c-like grammar using a reccursive descent parser. My implementation of this did not actually work, so I don't think I did all that well. The following does not include the symbol table or any look-ahead. If after reading this you know why it does not work, please let me know, i would be very interested.

Index

  1. Introduction
  2. Index
  3. Grammar
  4. Lexer
  5. Header
  6. Parser

Grammar

The grammar we were asked to parse is as follows. We were allowed to change the grammar so long as it did not have any affect on the resulting language.

    goal            =   expression ;
    goal            =   ifstatement ;
    
    expression      =   term + term 
    expression      =   term - term 
    expression      =   term
    
    term            =   factor * factor
    term            =   factor / factor
    term            =   factor
    
    factor          =   variable
    factor          =   integer
    factor          =   string
    factor          =   ( expression )
    
    ifstatement     =   if ( expression ) goal else goal
    ifstatement     =   if ( expression ) goal 

Lexer


%{
#include "header1.h"
%}

%% 

\/\/.*$                   { T_type = T_COMMENT;           }

if                        { T_type = T_KEY_IF;            }
else                      { T_type = T_KEY_ELSE;          }
while                     { T_type = T_KEY_WHILE;         }
int                       { T_type = T_KEY_INT;           }
void                      { T_type = T_KEY_VOID;          }
return                    { T_type = T_KEY_RETURN;        }

[a-zA-Z_][a-zA-Z0-9_]*    { T_type = T_VARIABLE;          }

[0-9]+                    { T_type = T_INTEGER;           }

\"[^\"]*\"                { T_type = T_STRING;            }

\+                        { T_type = T_PLUS;              }
\-                        { T_type = T_MINUS;             }
\(                        { T_type = T_BRA;               }
\)                        { T_type = T_KET;               }
\,                        { T_type = T_COMMA;             }
\;                        { T_type = T_SEMICOLON;         }
\=                        { T_type = T_EQUALS;            }
\*                        { T_type = T_STAR;              }
\/                        { T_type = T_FORWARDSLASH;      }
\|\|                      { T_type = T_OR;                }
&&                { T_type = T_AND;               }

[ \t]+                    { ;                             }


#ifndef HEADER1

#define HEADER1

#include 

#define T_COMMENT 257

#define T_KEY_IF 258
#define T_KEY_ELSE 259
#define T_KEY_WHILE 260
#define T_KEY_INT 261
#define T_KEY_VOID 262
#define T_KEY_RETURN 263

#define T_VARIABLE 264
#define T_INTEGER 265
#define T_STRING 266

#define T_PLUS 267
#define T_MINUS 268
#define T_BRA 269
#define T_KET 270
#define T_COMMA 271
#define T_SEMICOLON 272
#define T_EQUALS 273
#define T_STAR 274
#define T_FORWARDSLASH 275
#define T_OR 276
#define T_AND 277

typedef struct {
    char * lexeme;
    int type;
} token;

struct tree_node {
    char * lexeme;
    int type;
    struct tree_node * left;
    struct tree_node * right;
    struct tree_node * parent;
};

int NT_type;

#endif

Parser


#include 
#include 
#include 
#include "header1.h"

typedef struct tree_node Node;

Node * goal(void);
Node * expression(void);
Node * term(void);
Node * factor(void);
Node * ifstatement(void);

void error(char * msg);

Node * create_node(char * v, int d, Node * l, Node * r);
void print_tree(Node * tree, int indent);

int check_for(int x);
int check_move(int x);

int nextSym;
char * yytext;


/* Main method */
int main(void) {
    
    Node * tree;
    
    printf("\nStart of main()\n");

    printf("moving to the first token\n nextSym was %d\n", nextSym);
    nextSym = yylex();
    printf("nextSym is now %d\n", nextSym);

    
    tree = goal();
    
    if (tree != NULL) {
        printf("Successful parse\n");
        print_tree(tree, 0); 
        /* free_tree(tree); */
        return 1;
    } else {
        printf("Un-successful parse\n");
        return 0;
    }
}

/* Checks and doesnt move on */
int check_for(int x) {
    if(x == nextSym) {
        return nextSym;
    } else {
        return 0;
    }
}

/* Checks and moves on */
/* Check for x */
int check_move(int x) {
    if(!check_for(x)) {
        return 0;
    } else {
        nextSym = yylex();
        return nextSym;
    }
}

Node * create_node(char * v, int t, Node * l, Node * r) {
    Node * temp;
    /* Grab enough memory to hold an Node, assign it to temp */
    temp = (Node *)malloc(sizeof(Node));
    temp->type = t;
    temp->lexeme = v;
    temp->parent = NULL;
    temp->left = l;
    temp->right = r;
    if (temp->right != NULL) {
        temp->right->parent = temp;
    }
    if (temp->left != NULL) {
        temp->left->parent = temp;
    }
    return temp;
}

void print_tree(Node * tree, int indent) {
    if (tree != NULL) {
        int i;      
        if (indent != NULL) {
            for(i=0; i<(indent); i++) {
                printf(" ");
            }
            printf(" |\n");
            for(i=0; i<(indent); i++) {
                printf(" ");
            }
            printf(" \\_%d\n", tree->type);
        } else {
            printf("   %d\n", tree->type);
        }
        indent = indent + 2;
        print_tree(tree->left, indent);
        print_tree(tree->right, indent);
    }
}

void free_tree(Node * tree) {
    if (tree != NULL) {
        /* printf("freeing node [%d]\n", tree->data); */
        free_tree(tree->left);
        free_tree(tree->right);
        free(tree);
    }
}


/* Takes an int */
/* why does it take an int? why not a string or something usefull*/
void error(char * msg) {
    printf("\n*** ERROR: %s\n", msg);
    exit(1);

    
    /*

********** GOAL

error 1 = trying to parse a 'goal' - code does not constitute an if statement

error 2 = trying to parse a 'goal' - code does not constitute an expression

********** EXPRESSION

error 3 = trying to parse an 'expression' - just parsed a 'plus' - the next piece of code does not constitute a term

error 4 = trying to parse an 'expression' - just parsed a 'minus' - the next piece of code does not constitute a term

error 5 = error 5 = trying to parse an 'expression' code does not constitute a term

********** TERM

error 6 = trying to parse a 'term' - just parsed a 'factor' and a 'star' - the rest of the code does not constitute a factor

error 7 = trying to parse a 'term' - just parsed a 'factor' and a 'forwardslash' - the rest of the code does not constitute a factor

error 8 = trying to parse a 'term' - code does not constitute a factor

********** FACTOR

error 9 = trying to parse a 'factor' - just recieved a opening bracket, and parsed an expression - was expecting a closing bracket

error 10 = rying to parse a 'factor' - just recieved a opening bracket - the next piece of code does not constitute an expression

error 11 = trying to parse a 'factor' - was expecting either a 'variable', 'integer', 'sring' or 'opening bracket'

********** IFSTATEMENT

error 12 = trying to parse an 'ifstatement' - just recieved an 'if' keyword and an 'opening bracket' and haved parsed an 'expression', I then recieved a 'closing bracket' and a 'goal' and an 'else' keyword - the next piece of code does not constitute another goal

error 13 = trying to parse an 'ifstatement' - just recieved an 'if' keyword and an 'opening bracket' and haved parsed an 'expression', I then recieved a 'closing bracket' - the next piece of code does not constitute a goal

error 14 = trying to parse an 'ifstatement' - just recieved an 'if' keyword and an 'opening bracket' and haved parsed an 'expression' - was expecting a 'closing bracket'

error 15 = trying to parse an 'ifstatement' - just recieved an 'if' keyword and an 'opening bracket' - the next piece of code does not constitute an 'expression'

error 16 = trying to parse an 'ifstatement' - just recieved an 'if' keyword - was expecting an 'opening bracket'

error 17 = trying to parse an 'ifstatement' - the first token was not an 'if' keyword

   */

   
}

Node * goal(void) {
    Node * goalTree;
    printf("\nStart of goal()\n nextSym is %d\n", nextSym);
    nextSym = yylex();
    
    if (T_KEY_IF == nextSym) {
        /* if the token being delt with is an 'if' keyword */
        printf("the token being delt with is an 'if' keyword\n");

        goalTree = ifstatement();
        
        if (goalTree != NULL) {
            printf("\nback in goal()\n its an if statement");
            printf("\n not calling yylex - just returning 1 instead\n");
            /*
            printf("moving to the first token\n");
            printf("nextSym was %d\n", nextSym);
            nextSym = yylex();
            printf("nextSym is now %d\n", nextSym);
            */
            return goalTree;
        } else {
            /* error 1 = trying to parse a 'goal' - code does not constitute an if statement */
            printf("I got an if but there was no if statement\n");
            error("error 1 = trying to parse a 'goal' - code does not constitute an if statement");
            return NULL;
        }
    } else {
        printf("now I am checking for an expression\n");
        
        /* nextSym = yylex(); */
        goalTree = expression();
        
        if (goalTree != NULL) {
            printf("\nback in goal()\n It was an expression so I move onto the next token");
            printf("\n nextSym was %d\n", nextSym);
            nextSym = yylex();
            printf("nextSym is now %d\n", nextSym);
            return goalTree;
        } else {
            /* error 2 = trying to parse a 'goal' - code does not constitute an expression */
            printf("It wasn't an if or an expression so it wasnt a goal\n");
            error("error 2 = trying to parse a 'goal' - code does not constitute an expression");
            return NULL;
        }
    }
    /*
    goal            =   expression ;
    goal            =   ifstatement ;
    */
}

Node * expression(void) {
    
    Node * expressionTree;
    Node * expressionTreeOne;
    Node * expressionTreeTwo;
    
    printf("\nStart of expression()\n nextSym is %d\n", nextSym);
    printf("looking for a term\n");
    
    expressionTree = term();
    
    if (expressionTree != NULL) {
        printf("\nback in expression()\n term found");
        printf("\n moving to the first token\n nextSym was %d\n", nextSym);
        nextSym = yylex();
        printf("nextSym is now %d\n", nextSym);
        if (T_PLUS == nextSym) {
            /* if the token being delt with is a '+' */
            printf("term + :: now looking for the next term [moving to next token]\n");
            nextSym = yylex();
            
            expressionTreeOne = expressionTree;
            expressionTreeTwo = term();
            
            if (expressionTreeTwo != NULL) {
                printf("\nback in expression()\n the next token was a term");
                printf("\n not calling yylex - just returning 1 instead\n");
                /*
                printf("moving to the next token\n");
                printf("nextSym was %d\n", nextSym);
                nextSym = yylex();
                printf("nextSym is now %d\n", nextSym);
                */
                
                expressionTree = create_node("+", T_PLUS, expressionTreeOne, expressionTreeTwo);
                
                return expressionTree;
            } else {
                printf("didnt find a term after my plus\n");
                error("error 3 = trying to parse an 'expression' - just parsed a 'plus' rest is not a term");
                return NULL;
            }
        } else if (T_MINUS == nextSym) {
            /* if the token being delt with is a '-' */
            printf("term - :: now looking for the next term\n moving to the next token");
            printf("\n nextSym was %d\n", nextSym);
            nextSym = yylex();
            printf("nextSym is now %d\n", nextSym);
            
            expressionTreeOne = expressionTree;
            expressionTreeTwo = term();
            
            if (expressionTreeTwo != NULL) {
                printf("\nback in expression()\n the next token was a term");
                printf("\n not calling yylex - just returning 1 instead\n");
                /*
                printf("moving to the next token\n");
                printf("nextSym was %d\n", nextSym);
                nextSym = yylex();
                printf("nextSym is now %d\n", nextSym);
                */
                
                expressionTree = create_node("-", T_MINUS, expressionTreeOne, expressionTreeTwo);
                return expressionTree;
            } else {
                printf("didnt find a term after my minus\n");
                error("error 4 = trying to parse an 'expression' - just parsed a 'minus' rest not a term");
                return NULL;
            }
        } else {
            printf("not calling yylex - just returning 1 instead\n");
            /*
            printf("moving to the next token\n");
            printf("nextSym was %d\n", nextSym);
            nextSym = yylex();
            printf("nextSym is now %d\n", nextSym);
            */
            return expressionTree;
        }
    } else {
        /* error 5 = trying to parse an 'expression' code does not constitute a term */
        error("error 5 = trying to parse an 'expression' code does not constitute a term");
        return NULL;
    }

    /*
    expression      =   term + term 
    expression      =   term - term 
    expression      =   term
    */
    
}

Node * term(void) {
    Node * termTree;
    Node * termTreeOne;
    Node * termTreeTwo;
    
    printf("\nStart of term()\n");
    printf("nextSym is %d\n", nextSym);
    printf("looking for the first factor\n");

    termTree = factor();
    
    if (termTree != NULL) {
        printf("\nback in term()\n Just parsed a factor\n moving to the first token");
        printf("\n nextSym was %d\n", nextSym);
        nextSym = yylex();
        printf("nextSym is now %d\n", nextSym);
        printf("now need either a star or a forwardslash or it is just a factor\n");
                
        if (T_STAR == nextSym) {
            /* if the token being delt with is a '*' */
            printf("parsed the star\n");
            printf("moving to the next token\n");
            printf("nextSym was %d\n", nextSym);
            nextSym = yylex();
            printf("nextSym is now %d\n", nextSym);
            printf("need another factor\n");
            
            termTreeOne = termTree;
            termTreeTwo = factor();
            
            if (termTreeTwo != NULL) {
                printf("\nback in term()\n");
                printf("parsed a factor\n");
                printf("not calling yylex - just returning 1 instead\n");
                /*
                printf("moving to the next token\n");
                printf("nextSym was %d\n", nextSym);
                nextSym = yylex();
                printf("nextSym is now %d\n", nextSym);
                */
                
                termTree = create_node("*", T_STAR, termTreeOne, termTreeTwo);
                
                return termTree;
            } else {
                printf("no second factor!\n");
                error("error 6 = trying to parse a 'term', 'factor', 'star' - the rest not a factor");
                return NULL;
            }
        } else if (T_FORWARDSLASH == nextSym) {
            /* if the token being delt with is a '/' */
            printf("parsed the forwardslash\n");
            printf("moving to the next token\n");
            printf("nextSym was %d\n", nextSym);
            nextSym = yylex();
            printf("nextSym is now %d\n", nextSym);
            printf("now I need another factor\n");
            
            termTreeOne = termTree;
            termTreeTwo = factor();
            
            if (termTreeTwo != NULL) {
                printf("\nback in term()\n");
                printf("parsed a factor!\n");
                printf("not calling yylex - just returning 1 instead\n");
                /*
                printf("moving to the next token\n");
                printf("nextSym was %d\n", nextSym);
                nextSym = yylex();
                printf("nextSym is now %d\n", nextSym);
                */
                termTree = create_node("/", T_FORWARDSLASH, termTreeOne, termTreeTwo);
                
                return termTree;
            } else {
                printf("no second factor!\n");
                error("error 7 = trying to parse a 'term', 'factor', 'forwardslash' - the rest not a factor");
                return NULL;
            }
        } else {
            printf("parsed a factor!\n");
            printf("not calling yylex - just returning 1 instead\n");
            return termTree;
        }
    } else {
        /* error 8 = trying to parse a 'term' - code does not constitute a factor */
        error ("error 8 = trying to parse a 'term' - code does not constitute a factor");
        return NULL;
    }
    
    /*
    term            =   factor * factor
    term            =   factor / factor
    term            =   factor
    */
    
}

Node * factor(void) {
    
    Node * factorTree;
    
    printf("\nStart of factor()\n");
    printf("nextSym is %d\n", nextSym);
    
    /*
    printf("moving to the next token\n");
    printf("nextSym was %d\n", nextSym);
    nextSym = yylex();
    printf("nextSym is now %d\n", nextSym);
    */

    if (T_VARIABLE == nextSym) {
        printf("parsed a variable\n");
        /* if the token being delt with is a variable */
        printf("not calling yylex - just returning 1 instead\n");
        /*
        printf("moving to the first token\n");
        printf("nextSym was %d\n", nextSym);
        nextSym = yylex();
        printf("nextSym is now %d\n", nextSym);
        */
        
        factorTree = create_node(yytext, nextSym, NULL, NULL);
        
        return factorTree;
        
    } else if (T_INTEGER == nextSym) {
        printf("parsed an integer\n");
        /* if the token being delt with is an integer */
        /*
        printf("not calling yylex - just returning 1 instead\n");
        */
        printf("moving to the next token\n");
        printf("nextSym was %d\n", nextSym);
        nextSym = yylex();
        printf("nextSym is now %d\n", nextSym);
        
        
        factorTree = create_node(yytext, nextSym, NULL, NULL);
        printf("leaving factor\n");
        return factorTree;
        
    } else if (T_STRING == nextSym) {
        printf("have found a string\n");
        /* if the token being delt with is a string */
        printf("not calling yylex - just returning 1 instead\n");
        /*
        printf("moving to the first token\n");
        printf("nextSym was %d\n", nextSym);
        nextSym = yylex();
        printf("nextSym is now %d\n", nextSym);
        */
        
        factorTree = create_node(yytext, nextSym, NULL, NULL);
        
        return factorTree;
        
    } else if (nextSym == T_BRA) {
        printf("I have an opening bracket\n");
        /* if the token being delt with is a '(' parse it and call for the next token*/
        printf("Found a ( in factor()\n");
        printf("moving to the next token\n");
        printf("nextSym was %d\n", nextSym);
        nextSym = yylex();
        printf("nextSym is now %d\n", nextSym);
        if (expression() != NULL) {
            printf("\nback in factor()\n");
            printf("parsed an expression\n");
            /* if it was an expression */
            nextSym = yylex();
            
            factorTree = create_node(yytext, nextSym, NULL, NULL);
            
            if (T_KET == nextSym) {
                printf("closing bracket\n");
                /* if the token being dealt with is a ')' */
                printf("not calling yylex - just returning 1 instead\n");
                /*
                printf("moving to the first token\n");
                printf("nextSym was %d\n", nextSym);
                nextSym = yylex();
                printf("nextSym is now %d\n", nextSym);
                */
                return factorTree;
            } else {
                printf("nextSym is now %d\n", nextSym);
                error("error 9 = trying to parse a 'factor', had opening bracket, expression - need a ')'");
                return NULL;
            }
        } else {
            error("error 10 = rying to parse a 'factor' - had a opening bracket rest not an expression");
            return NULL;
        }
    } else {
        printf("wanted either a 'variable', 'integer', 'sring' or 'opening bracket'\n");
        printf("just spat out a number %d\n", nextSym);
        error("error 11 = trying to parse a 'factor' - need either a 'variable', 'integer', 'sring' or '('");
        return NULL;
    }
    
    /*
    factor          =   variable
    factor          =   integer
    factor          =   string
    factor          =   ( expression )
    */
}

Node * ifstatement(void) {
    
    Node * ifTree;
    Node * ifTreeOne;
    Node * ifTreeTwo;
    Node * ifTreeThree;
    Node * ifTreeFour;
    
    ifTree = NULL;
    ifTreeOne = NULL;
    ifTreeTwo = NULL;
    ifTreeThree = NULL;
    ifTreeFour = NULL;
    
    printf("\nStart of ifstatement()\n");
    printf("nextSym is %d\n", nextSym);
    
    if (T_KEY_IF != nextSym) {
        /* if the token being delt with is not an 'if' keyword */
        error("error 17 = trying to parse an 'ifstatement' - the first token  not an 'if' keyword");
        return NULL;
    }
    nextSym = yylex();

    if (T_BRA != nextSym) {
        /* if the token being delt with is not a '(' */
        error("error 16 = trying to parse an 'ifstatement' - had an 'if' keyword -need a '('");
        return NULL;
    }
    nextSym = yylex();

    ifTreeOne = expression();
    if (ifTreeOne == NULL) {
        /* if the tokens being delt with do not constitute an expression */
        error("error 15 = trying to parse an 'ifstatement' - just had a 'if' and an '(' - need an 'expression'");
        return NULL;
    }
    nextSym = yylex();

    if (T_KET != nextSym) {
        /* if the token being delt with is not a ')' */
        error("error 14 = trying to parse 'ifstatement': had an 'if' and a '(' and an 'expression' need a ')'");
        return NULL;
    }

    ifTreeTwo = goal();
    if (ifTreeTwo != NULL) {
        /* if the tokens being delt with do not constitute a goal */
        error("error 13 = in 'ifstatement': had 'if' &'(' & 'expression' & ')' need a goal");
        return NULL;
    }
    nextSym = yylex();

    /* Now have "if ( expression ) goal" */

    if (T_KEY_ELSE == nextSym) {
        /* if the token being delt with is an 'else' keyword */
        
        
        ifTreeFour = goal();
        
        if (ifTreeFour != NULL) {
            /* if the tokens being delt with constitute a goal */
            /* ifstatement     =   if ( expression ) goal else goal */
            
            ifTreeThree = ifTreeTwo;
            ifTreeTwo = create_node("else", T_KEY_ELSE, ifTreeThree, ifTreeFour);
            ifTree = create_node("if", T_KEY_IF, ifTreeOne, ifTreeTwo);
            
            return ifTree;
            
        } else {
            /* cant have 'ifstatement     =   if ( expression ) goal else' */
            error("error 12 = trying to parse an 'ifstatement' - in case 2, need a second goal");
            return NULL;
        }
        
    } else {
        /* 'ifstatement     =   if ( expression ) goal'  */
        ifTree = create_node("if", T_KEY_IF, ifTreeOne, ifTreeTwo);
        return ifTree;
    }
 
    
    /*
    ifstatement     =   if ( expression ) goal else goal
    ifstatement     =   if ( expression ) goal 
    */
}