8.8. Recursive Descent


Recursive descent

Recursive descent is a way to implement a top-down parser.

The idea is to define a function for each nonterminal in the grammar.

The function for nonterminal N uses the lookahead to make a prediction about which production for N to use, then reads each thing on the right-hand side of that production.

A function match(t) is created to read a token. First, it checks whether the lookahead is t, then it gets the next token, updating the lookahead.

For example, if the right-hand side of the production is if ( E ) S then the parser does the following.

   match(TOK_IF);
   match('(');
   E();
   match(')');
   S();


An example

The following example not only parses simple expressions but also does some semantic processing: it evaluates the expression.

#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <limits.h>

/* This is an example of a
 * recursive descent parser.
 * It evaluates an expression
 * using the following
 * grammar.
 *
 * Expr     -> Term ExprRest
 *
 * ExprRest -> (empty)
 *          |  + Expr
 *          |  - Expr
 *
 * Term     -> Factor TermRest
 *
 * TermRest -> (empty)
 *          |  * Term
 *          |  / Term
 *
 * Factor   -> number
 *          |  ( Expr )
 *
 * where a number is an 
 * integer and all operations 
 * are done using integer
 * arithmetic.
 */

#define MAX_DIGITS 10
#define TOK_NUMBER 256

typedef int TOKEN;

TOKEN lookahead = 0;
int   attribute = 0;
int   nextch    = EOF;

void syntaxerror(void);
void advance(void);
TOKEN lex(void);
void match(TOKEN t);
int Expr(void);
int ExprRest(int n);
int Term(void);
int TermRest(int n);
int Factor(void);

/*============================
 * syntaxerror():
 * Report a syntax error.
 *
 * Error reporting is very
 * crude here.
 *============================*/

void syntaxerror(void)
{
  printf("Syntax error");
  exit(1);
}


/*============================
 * advance(): 
 * Skip over white space, 
 * then read a character.
 *
 * Store the character that 
 * was read into nextch.
 *============================*/

void advance(void)
{
  nextch = getchar();
  while(isspace(nextch)) {
    nextch = getchar();
  }
}

/*===========================
 * lex(): 
 * A simple ad-hoc lexer
 *
 * Read a token and store it
 * into lookahead.
 *===========================*/

TOKEN lex(void)
{
  if(nextch == EOF) {
    return EOF;
  }

  if(nextch == '+' ||
     nextch == '-' ||
     nextch == '*' ||
     nextch == '/' ||
     nextch == '(' ||
     nextch == ')') {

    int tok = nextch;
    advance();
    return tok;
  }

  if(isdigit(nextch)) {
    int v = 0;

    while(isdigit(nextch)) {
      v = 10*v + (nextch - '0');
      advance();
    }
    attribute = (int) v;
    return TOK_NUMBER;
  }

  syntaxerror();
  return 0;
}

/*===========================
 * match(t):
 * Check that the current
 * lookahead is t.
 *
 * If so, get the next token
 * (which becomes the next
 * lookahead token).
 *
 * If there is a mismatch,
 * it is a syntax error.
 *===========================*/

void match(TOKEN t)
{
  if(lookahead == t) {
    lookahead = lex();
  }
  else {
    syntaxerror();
  }
}

/*===========================
 * Expr():
 * Read an expression
 * and return its value.
 *
 * Expr -> Term ExprRest
 *===========================*/

int Expr(void)
{
  int x = Term();
  int y = ExprRest(x);
  return y;
}
  
/*===========================
 * ExprRest(n): 
 *
 * ExprRest 
 *   -> (empty)
 *         Return n
 *   |  + Expr
 *         Return n + val(Expr)
 *   |  - Expr
 *         Return n - val(Expr)
 *===========================*/

int ExprRest(int n)
{
  int x;

  if(lookahead == '+') {
    match('+');
    x = Expr();
    return n + x;
  }

  if(lookahead == '-') {
    match('-');
    x = Expr();
    return n - x;
  }

  return n;
}
  
/*===========================
 * Term(): 
 * Read a term and
 * return its value.
 *
 * Term -> Factor TermRest
 *===========================*/

int Term(void)
{
  int x = Factor();
  int y = TermRest(x);
  return y;
}  

/*===========================
 * TermRest(n): 
 *
 * TermRest 
 *   -> (empty)
 *         Return n
 *   |  * Term
 *         Return n * val(Term)
 *   |  / Term
 *         Return n / val(Term)
 *===========================*/

int TermRest(int n)
{
  int x;

  if(lookahead == '*') {
    match('*');
    x = Term();
    return n * x;
  }

  if(lookahead == '/') {
    match('/');
    x = Term();
    return n / x;
  }

  return n;
}
  
/*===========================
 * Factor():
 * Read a factor and return
 * its value.
 *
 * Factor   -> number
 *          |  ( Expr )
 *===========================*/

int Factor(void)
{
  int x;

  if(lookahead == '(') {
    match('(');
    x = Expr();
    match(')');
    return x;
  }

  if(lookahead == TOK_NUMBER) {
    x = attribute;
    match(TOK_NUMBER);
    return x;
  }

  syntaxerror();
  return 0;
}
  
/*===========================
 * main():
 * Read an expression and
 * print its value, or print 
 * "syntax error" if the
 * expression is poorly formed.
 *
 * The expression ends at the
 * end of file.
 *===========================*/

int main()
{
  int val;

  /* Prime the lexer. */

  advance();
  lookahead = lex();

  /* Evaluate the expression. */

  val = Expr();
  if(lookahead != EOF) {
    printf("%d\n", val);
  }
  else {
    syntaxerror();
  }

  return 0;
}