/************************************************************************
 * File:    table.cc                                                    *
 *                                                                      *
 * Purpose: Implements tables via hashing.  Start file for CSCI 3510    *
 *          programming assignment.                                     *
 *                                                                      *
 * Author:  Karl Abrahamson                                             *
 *                                                                      *
 * Date:    January 27, 1998                                            *
 ************************************************************************/

/************************************************************************
 *                    Introduction to Tables                            *
 *                    ----------------------                            *
 *                                                                      *
 * A table associates values with keys.  You provide the table with     *
 * a key, and it tells you the value that is stored with the key,       *
 * or tells you that the key is not in the table.  An example of a      *
 * table is a telephone book.  You give the telephone book a name,      *
 * and it tells you a telephone number associated with that name.       *
 * You might have a table such as the following.                        *
 *                                                                      *
 *      key        value                                                *
 *     -----      -------                                               *
 *      "John"     "555-3545"                                           *
 *      "Mindy"    "555-6921"                                           *
 *                                                                      *
 * If you ask for the value associated with key "John", you get         *
 * value "555-3545".  If you ask for the value associated with          *
 * "Max", you are told that "Max" is not in the table.                  *
 *                                                                      *
 *                     Representing Tables                              *
 *                     -------------------                              *
 *                                                                      *
 * How to represent tables efficiently, and to do lookups               *
 * efficiently, is a well-studied area of computing.  Hundreds of       *
 * different of methods have been proposed.                             *
 *                                                                      *
 * One efficient method is called hashing.  We start with a function    *
 * h(k) called a hash function, which takes a key and produces an       *
 * integer.  We store an array A, of some size s, with the keys and     *
 * associated values kept in it.  Key k is stored (preferably) in       *
 * A[h(k) mod s].                                                       *
 *                                                                      *
 * For example, suppose that h("John") = 20 and h("Mindy") = 42.        *
 * Suppose that we store our table above in a hash table of size 5.     *
 * Then key "John" would be stored in A[20 mod 5] (that is, in A[0]),   *
 * and "Mindy" would be stored in A[42 mod 5] (that is, in A[2]).       *
 * The array would look like this.                                      *
 *                                                                      *
 *   index      key         value                                       *
 *   -----      ---         -----                                       *
 *     0        "John"      "555-3545"                                  *
 *     1          -          -                                          *
 *     2        "Mindy"     "555-6921"                                  *
 *     3          -          -                                          *
 *     4          -          -                                          *
 *                                                                      *
 * Value "-" in the key field indicates a missing, or null, key.  Some  *
 * standard representation of this null value must be decided on.  For  *
 * strings, an obvious null value is a null pointer.			*
 *                                                                      *
 * As long as all of the keys in the table have different hash values,  *
 * this is very simple.  To find key k, you look in A[h(k) mod  s].     *
 * Unfortunately, things are not always this nice.  Sometimes, two      *
 * different keys have the same hash value.  In that case, what we do   *
 * is put a key in the first empty array cell after the one where       *
 * it prefers to be.  If we hit the end of the array, we wrap around    *
 * back to the beginning.                                               *
 *                                                                      *
 * If you have a decent hash function, and do not try to fill the table *
 * too full (as a rule of thumb, don't use more than about 3/4 of       *
 * the cells), then hash tables are very efficient -- more efficient    *
 * than any other approach, on the average.                             *
 ************************************************************************/

#include <string.h>
#include <stdlib.h>
#include "table.h"
#define PRIVATE static

//**************************************************************//
//                      dupstring                               //
//                      dupKey                                  //
//                      dupVal                                  //
//                      deleteKey                               //
//                      deleteVal                               //
//**************************************************************//
// dupstring(s) returns a copy of s, in the heap.               //
//                                                              //
// Macros dupKey, dupVal, deleteKey and deleteVal are provided  //
// in case the key or value types change.                       //
//                                                              //
// dupKey and dupVal should be designed to duplicate a key or a //
// value, respectively, so that its memory is owned by the hash //
// table.  That way, the hash table is insulated from any       //
// changes that are made to the memory occupied by the keys or  //
// values that are given to it.  When a key k is added, what is //
// actually put in the table is dupKey(k).  
//                                                              //
// For example, if the key is a string, then dupKey should use  //
// dupstring to duplicate it.  But if the key is an integer,    //
// then dupKey would just return the integer, since there is no //
// need to duplicate an integer.                                //
//                                                              //
// deleteKey and deleteVal are used to delete the memory	//
// occupied by a key or value, respectively.  They		//
// should just do a delete in the case of a string, but         //
// should do nothing in the case of an integer.			//
// When a key k is deleted, deleteKey(k) is called.		//
//**************************************************************//

char* dupstring(char* s)
{
  char* d = new char[strlen(s) + 1];
  strcpy(d, s);
  return d;
}

#define dupKey(k)    dupstring(k)
#define dupVal(v)    dupstring(v)
#define deleteKey(k) delete[](k)
#define deleteVal(v) delete[](v)


//**************************************************************//
//                      equalKeys                               //
//**************************************************************//
// equalKeys(k1,k2) is 1 if keys k1 and k2 are the same, and    //
// is 0 if they are different.  				//
//								//
// The following implementation is for the case where the keys  //
// are strings.                     				//
//**************************************************************//

PRIVATE int equalKeys(HashKeyType k1, HashKeyType k2)
{
  return strcmp(k1, k2) == 0;
}


//**************************************************************//
//                      hash                                    //
//**************************************************************//
// hash(key) is the hash value associated with key.             //
//								//
// The following implementation is for the case where the keys  //
// are strings.                     				//
//**************************************************************//

PRIVATE long hash(HashKeyType key)
{
  //------------------------------------------------------------//
  // This hash function is computed by breaking the string into //
  // four-byte segments, and EXCLUSIVE-ORing the segments       //
  // together.                                                  //
  //------------------------------------------------------------//

  long   h;   // Accumulates hash value.
  long   b;   // Accumulates four bytes of the string.
  char*  s;   // Scans through the string.
  char   c;   // Holds *s, the current character.
  int    i;

  if(key == NULL) return 0;

  h = 0;
  s = key;
  c = *s;

  while(c != 0) {

    //----------------------------------------------------------//
    // Shift four bytes of s into b, but stop at the end of s.  //
    // Then exclusive-or into h.                                //
    //                                                          //
    // Note: x << k is x shifted left by k bits.                //
    //       x ^ y is bitwise the exclusive-or of x and y.      //
    //                                                          //
    // The bytes are taken to be 7 bits.                        //
    //----------------------------------------------------------//

    b = 0;
    i = 0;
    while(i < 4 && c != 0) {
      b = (b << 7) + c;
      c = *(++s);
      i++;
    }
    h = h ^ b;
  }

  return h;
}


//**************************************************************//
//                      hashLocate                              //
//**************************************************************//
// hashLocate(t,key) returns a pointer to the cell in table t   //
// that holds the given key.  If key is not in table t, then    //
// the location that the key should occupy, if it is inserted,  //
// is returned.                                                 //
//                                                              //
//			IMPORTANT NOTE				//
//								//
// It is important to hashLocate that the table have at least   //
// one empty cell.  Otherwise, hashLocate will get into an      //
// infinite loop when searching for something that is not in    //
// the table.                                                   //
//**************************************************************//

PRIVATE HashCellPtrType hashLocate(HashTableType& tbl, HashKeyType key)
{
  HashCellPtrType cells = tbl.cells;          // The cell array.
  int             size  = HASH_TABLE_SIZE;    // The size of array cells.
  int             currentIndex;               // Possible result index.
  HashCellPtrType currentCell;                // Possible result pointer.
  
  currentIndex = hash(key) % size;
  currentCell  = cells + currentIndex;

  while(currentCell->key != NullKey) {

    //----------------------------------------//
    // If the key is found, return this cell. //
    //----------------------------------------//

    if(equalKeys(key, currentCell->key)) return currentCell;

    //----------------------------------------------------------//
    // Move to the next cell, wrapping around if at the end of  //
    // the table.                                               //
    //----------------------------------------------------------//

    currentIndex++;
    if(currentIndex >= size) {
      currentIndex -= size;
    }
    currentCell = cells + currentIndex;
  }

  //---------------------------------------------------------
  // If we get out of the loop, currentCell points to an empty
  // cell where this cell would be inserted, if an insertion
  // is being done.  Return that pointer.
  //---------------------------------------------------------

  return currentCell;
}


//**************************************************************//
//                      hashInsert                              //
//**************************************************************//
// hashInsert(t,key,val) inserts the given key, with associated //
// value val, into table t.                                     //
//                                                              //
// If key is already present in the table, then hashInsert      //
// replaces the value associated with key by val.               //
//                                                              //
// If there is no more room in the table, then hashInsert does  //
// not do the insertion -- it leaves the table unchanged.       //
//**************************************************************//

void hashInsert(HashTableType& tbl, HashKeyType key, HashValueType val)
{
  //------------------------------------------------------------//
  // If an insertion would overload the table, just give up,    //
  // and do not do the insertion.  Note that we cannot use      //
  // all of the cells in the table, since hashLocate requires   //
  // that there be at least one empty cell.                     //
  //------------------------------------------------------------//

  if(hashFull(tbl)) return;

  //---------------------------------------//
  // Locate the place to do the insertion. //
  //---------------------------------------//

  HashCellPtrType insertionPoint = hashLocate(tbl, key);

  //------------------------------------------------------------//
  // If the insertion cell is empty, then install both the      //
  // key and the value.  If the insertion cell is occupied,     //
  // then we are modifying an entry; delete the value and       //
  // change it to the new value.                                //
  //------------------------------------------------------------//

  if(insertionPoint->key == NullKey) {
    insertionPoint->key = dupKey(key);
    tbl.load++;
  }
  else {
    deleteVal(insertionPoint->val);
  }
  insertionPoint->val = dupVal(val);
}


//**************************************************************//
//                      hashLookup                              //
//**************************************************************//
// If key is in table t, then hashLookup(t,key,val) sets val    //
// to the value associated with key in table t, and returns 1.  //
//                                                              //
// If key is NOT in table t, then hashLookup(t,key,val) returns //
// 0, and does not alter val.                                   //
//**************************************************************//

int hashLookup(HashTableType& tbl, HashKeyType key, HashValueType& val)
{
  HashCellPtrType lookupPoint = hashLocate(tbl, key);

  if(lookupPoint->key != NullKey) {
    val = lookupPoint->val;
    return 1;
  }
  else return 0;
}


//**************************************************************//
//                      hashFull                                //
//**************************************************************//
// hashFull(t) returns 1 if table t is full (and hence cannot   //
// accept any more insertions), and 0 if table t is not full.   //
//**************************************************************//

int hashFull(HashTableType& tbl)
{
  return tbl.load == HASH_TABLE_SIZE - 1;
}


//**************************************************************//
//                      hashEmpty                               //
//**************************************************************//
// hashEmpty(t) removes all items from table t.                 //
//**************************************************************//

void hashEmpty(HashTableType& tbl)
{
  int i;

  tbl.load = 0;

  for(i = 0; i < HASH_TABLE_SIZE; i++) {

    //-------------------------------------------------------//
    // If the i-th cell is occupied, then delete its key and //
    // value, and set it to an empty cell by setting its key //
    // to NullKey.					     //
    //-------------------------------------------------------//

    HashCellPtrType thisCell = tbl.cells + i;
    if(thisCell->key != NullKey) {
      deleteKey(thisCell->key);
      deleteVal(thisCell->val);
      thisCell->key = NullKey;
    }
  }
}