|
Recursive descent is a way to implement a top-down parser.
The idea is to define a function for each nonterminal in the grammar.
The function for nonterminal N uses the lookahead to make a prediction about which production for N to use, then reads each thing on the right-hand side of that production.
A function match(t) is created to read a token. First, it checks whether the lookahead is t, then it gets the next token, updating the lookahead.
For example, if the right-hand side of the production is if ( E ) S then the parser does the following.
match(TOK_IF); match('('); E(); match(')'); S();
The following example not only parses simple expressions but also does some semantic processing: it evaluates the expression.
#include <stdio.h> #include <ctype.h> #include <stdlib.h> #include <limits.h> /* This is an example of a * recursive descent parser. * It evaluates an expression * using the following * grammar. * * Expr -> Term ExprRest * * ExprRest -> (empty) * | + Expr * | - Expr * * Term -> Factor TermRest * * TermRest -> (empty) * | * Term * | / Term * * Factor -> number * | ( Expr ) * * where a number is an * integer and all operations * are done using integer * arithmetic. */ #define MAX_DIGITS 10 #define TOK_NUMBER 256 typedef int TOKEN; TOKEN lookahead = 0; int attribute = 0; int nextch = EOF; void syntaxerror(void); void advance(void); TOKEN lex(void); void match(TOKEN t); int Expr(void); int ExprRest(int n); int Term(void); int TermRest(int n); int Factor(void); /*============================ * syntaxerror(): * Report a syntax error. * * Error reporting is very * crude here. *============================*/ void syntaxerror(void) { printf("Syntax error"); exit(1); } /*============================ * advance(): * Skip over white space, * then read a character. * * Store the character that * was read into nextch. *============================*/ void advance(void) { nextch = getchar(); while(isspace(nextch)) { nextch = getchar(); } } /*=========================== * lex(): * A simple ad-hoc lexer * * Read a token and store it * into lookahead. *===========================*/ TOKEN lex(void) { if(nextch == EOF) { return EOF; } if(nextch == '+' || nextch == '-' || nextch == '*' || nextch == '/' || nextch == '(' || nextch == ')') { int tok = nextch; advance(); return tok; } if(isdigit(nextch)) { int v = 0; while(isdigit(nextch)) { v = 10*v + (nextch - '0'); advance(); } attribute = (int) v; return TOK_NUMBER; } syntaxerror(); return 0; } /*=========================== * match(t): * Check that the current * lookahead is t. * * If so, get the next token * (which becomes the next * lookahead token). * * If there is a mismatch, * it is a syntax error. *===========================*/ void match(TOKEN t) { if(lookahead == t) { lookahead = lex(); } else { syntaxerror(); } } /*=========================== * Expr(): * Read an expression * and return its value. * * Expr -> Term ExprRest *===========================*/ int Expr(void) { int x = Term(); int y = ExprRest(x); return y; } /*=========================== * ExprRest(n): * * ExprRest * -> (empty) * Return n * | + Expr * Return n + val(Expr) * | - Expr * Return n - val(Expr) *===========================*/ int ExprRest(int n) { int x; if(lookahead == '+') { match('+'); x = Expr(); return n + x; } if(lookahead == '-') { match('-'); x = Expr(); return n - x; } return n; } /*=========================== * Term(): * Read a term and * return its value. * * Term -> Factor TermRest *===========================*/ int Term(void) { int x = Factor(); int y = TermRest(x); return y; } /*=========================== * TermRest(n): * * TermRest * -> (empty) * Return n * | * Term * Return n * val(Term) * | / Term * Return n / val(Term) *===========================*/ int TermRest(int n) { int x; if(lookahead == '*') { match('*'); x = Term(); return n * x; } if(lookahead == '/') { match('/'); x = Term(); return n / x; } return n; } /*=========================== * Factor(): * Read a factor and return * its value. * * Factor -> number * | ( Expr ) *===========================*/ int Factor(void) { int x; if(lookahead == '(') { match('('); x = Expr(); match(')'); return x; } if(lookahead == TOK_NUMBER) { x = attribute; match(TOK_NUMBER); return x; } syntaxerror(); return 0; } /*=========================== * main(): * Read an expression and * print its value, or print * "syntax error" if the * expression is poorly formed. * * The expression ends at the * end of file. *===========================*/ int main() { int val; /* Prime the lexer. */ advance(); lookahead = lex(); /* Evaluate the expression. */ val = Expr(); if(lookahead != EOF) { printf("%d\n", val); } else { syntaxerror(); } return 0; }
|