/************************************************************************ * File: table.cc * * * * Purpose: Implements tables via hashing. Start file for CSCI 3510 * * programming assignment. * * * * Author: Karl Abrahamson * * * * Date: January 27, 1998 * ************************************************************************/ /************************************************************************ * Introduction to Tables * * ---------------------- * * * * A table associates values with keys. You provide the table with * * a key, and it tells you the value that is stored with the key, * * or tells you that the key is not in the table. An example of a * * table is a telephone book. You give the telephone book a name, * * and it tells you a telephone number associated with that name. * * You might have a table such as the following. * * * * key value * * ----- ------- * * "John" "555-3545" * * "Mindy" "555-6921" * * * * If you ask for the value associated with key "John", you get * * value "555-3545". If you ask for the value associated with * * "Max", you are told that "Max" is not in the table. * * * * Representing Tables * * ------------------- * * * * How to represent tables efficiently, and to do lookups * * efficiently, is a well-studied area of computing. Hundreds of * * different of methods have been proposed. * * * * One efficient method is called hashing. We start with a function * * h(k) called a hash function, which takes a key and produces an * * integer. We store an array A, of some size s, with the keys and * * associated values kept in it. Key k is stored (preferably) in * * A[h(k) mod s]. * * * * For example, suppose that h("John") = 20 and h("Mindy") = 42. * * Suppose that we store our table above in a hash table of size 5. * * Then key "John" would be stored in A[20 mod 5] (that is, in A[0]), * * and "Mindy" would be stored in A[42 mod 5] (that is, in A[2]). * * The array would look like this. * * * * index key value * * ----- --- ----- * * 0 "John" "555-3545" * * 1 - - * * 2 "Mindy" "555-6921" * * 3 - - * * 4 - - * * * * Value "-" in the key field indicates a missing, or null, key. Some * * standard representation of this null value must be decided on. For * * strings, an obvious null value is a null pointer. * * * * As long as all of the keys in the table have different hash values, * * this is very simple. To find key k, you look in A[h(k) mod s]. * * Unfortunately, things are not always this nice. Sometimes, two * * different keys have the same hash value. In that case, what we do * * is put a key in the first empty array cell after the one where * * it prefers to be. If we hit the end of the array, we wrap around * * back to the beginning. * * * * If you have a decent hash function, and do not try to fill the table * * too full (as a rule of thumb, don't use more than about 3/4 of * * the cells), then hash tables are very efficient -- more efficient * * than any other approach, on the average. * ************************************************************************/ #include #include #include "table.h" #define PRIVATE static //**************************************************************// // dupstring // // dupKey // // dupVal // // deleteKey // // deleteVal // //**************************************************************// // dupstring(s) returns a copy of s, in the heap. // // // // Macros dupKey, dupVal, deleteKey and deleteVal are provided // // in case the key or value types change. // // // // dupKey and dupVal should be designed to duplicate a key or a // // value, respectively, so that its memory is owned by the hash // // table. That way, the hash table is insulated from any // // changes that are made to the memory occupied by the keys or // // values that are given to it. When a key k is added, what is // // actually put in the table is dupKey(k). // // // For example, if the key is a string, then dupKey should use // // dupstring to duplicate it. But if the key is an integer, // // then dupKey would just return the integer, since there is no // // need to duplicate an integer. // // // // deleteKey and deleteVal are used to delete the memory // // occupied by a key or value, respectively. They // // should just do a delete in the case of a string, but // // should do nothing in the case of an integer. // // When a key k is deleted, deleteKey(k) is called. // //**************************************************************// char* dupstring(char* s) { char* d = new char[strlen(s) + 1]; strcpy(d, s); return d; } #define dupKey(k) dupstring(k) #define dupVal(v) dupstring(v) #define deleteKey(k) delete[](k) #define deleteVal(v) delete[](v) //**************************************************************// // equalKeys // //**************************************************************// // equalKeys(k1,k2) is 1 if keys k1 and k2 are the same, and // // is 0 if they are different. // // // // The following implementation is for the case where the keys // // are strings. // //**************************************************************// PRIVATE int equalKeys(HashKeyType k1, HashKeyType k2) { return strcmp(k1, k2) == 0; } //**************************************************************// // hash // //**************************************************************// // hash(key) is the hash value associated with key. // // // // The following implementation is for the case where the keys // // are strings. // //**************************************************************// PRIVATE long hash(HashKeyType key) { //------------------------------------------------------------// // This hash function is computed by breaking the string into // // four-byte segments, and EXCLUSIVE-ORing the segments // // together. // //------------------------------------------------------------// long h; // Accumulates hash value. long b; // Accumulates four bytes of the string. char* s; // Scans through the string. char c; // Holds *s, the current character. int i; if(key == NULL) return 0; h = 0; s = key; c = *s; while(c != 0) { //----------------------------------------------------------// // Shift four bytes of s into b, but stop at the end of s. // // Then exclusive-or into h. // // // // Note: x << k is x shifted left by k bits. // // x ^ y is bitwise the exclusive-or of x and y. // // // // The bytes are taken to be 7 bits. // //----------------------------------------------------------// b = 0; i = 0; while(i < 4 && c != 0) { b = (b << 7) + c; c = *(++s); i++; } h = h ^ b; } return h; } //**************************************************************// // hashLocate // //**************************************************************// // hashLocate(t,key) returns a pointer to the cell in table t // // that holds the given key. If key is not in table t, then // // the location that the key should occupy, if it is inserted, // // is returned. // // // // IMPORTANT NOTE // // // // It is important to hashLocate that the table have at least // // one empty cell. Otherwise, hashLocate will get into an // // infinite loop when searching for something that is not in // // the table. // //**************************************************************// PRIVATE HashCellPtrType hashLocate(HashTableType& tbl, HashKeyType key) { HashCellPtrType cells = tbl.cells; // The cell array. int size = HASH_TABLE_SIZE; // The size of array cells. int currentIndex; // Possible result index. HashCellPtrType currentCell; // Possible result pointer. currentIndex = hash(key) % size; currentCell = cells + currentIndex; while(currentCell->key != NullKey) { //----------------------------------------// // If the key is found, return this cell. // //----------------------------------------// if(equalKeys(key, currentCell->key)) return currentCell; //----------------------------------------------------------// // Move to the next cell, wrapping around if at the end of // // the table. // //----------------------------------------------------------// currentIndex++; if(currentIndex >= size) { currentIndex -= size; } currentCell = cells + currentIndex; } //--------------------------------------------------------- // If we get out of the loop, currentCell points to an empty // cell where this cell would be inserted, if an insertion // is being done. Return that pointer. //--------------------------------------------------------- return currentCell; } //**************************************************************// // hashInsert // //**************************************************************// // hashInsert(t,key,val) inserts the given key, with associated // // value val, into table t. // // // // If key is already present in the table, then hashInsert // // replaces the value associated with key by val. // // // // If there is no more room in the table, then hashInsert does // // not do the insertion -- it leaves the table unchanged. // //**************************************************************// void hashInsert(HashTableType& tbl, HashKeyType key, HashValueType val) { //------------------------------------------------------------// // If an insertion would overload the table, just give up, // // and do not do the insertion. Note that we cannot use // // all of the cells in the table, since hashLocate requires // // that there be at least one empty cell. // //------------------------------------------------------------// if(hashFull(tbl)) return; //---------------------------------------// // Locate the place to do the insertion. // //---------------------------------------// HashCellPtrType insertionPoint = hashLocate(tbl, key); //------------------------------------------------------------// // If the insertion cell is empty, then install both the // // key and the value. If the insertion cell is occupied, // // then we are modifying an entry; delete the value and // // change it to the new value. // //------------------------------------------------------------// if(insertionPoint->key == NullKey) { insertionPoint->key = dupKey(key); tbl.load++; } else { deleteVal(insertionPoint->val); } insertionPoint->val = dupVal(val); } //**************************************************************// // hashLookup // //**************************************************************// // If key is in table t, then hashLookup(t,key,val) sets val // // to the value associated with key in table t, and returns 1. // // // // If key is NOT in table t, then hashLookup(t,key,val) returns // // 0, and does not alter val. // //**************************************************************// int hashLookup(HashTableType& tbl, HashKeyType key, HashValueType& val) { HashCellPtrType lookupPoint = hashLocate(tbl, key); if(lookupPoint->key != NullKey) { val = lookupPoint->val; return 1; } else return 0; } //**************************************************************// // hashFull // //**************************************************************// // hashFull(t) returns 1 if table t is full (and hence cannot // // accept any more insertions), and 0 if table t is not full. // //**************************************************************// int hashFull(HashTableType& tbl) { return tbl.load == HASH_TABLE_SIZE - 1; } //**************************************************************// // hashEmpty // //**************************************************************// // hashEmpty(t) removes all items from table t. // //**************************************************************// void hashEmpty(HashTableType& tbl) { int i; tbl.load = 0; for(i = 0; i < HASH_TABLE_SIZE; i++) { //-------------------------------------------------------// // If the i-th cell is occupied, then delete its key and // // value, and set it to an empty cell by setting its key // // to NullKey. // //-------------------------------------------------------// HashCellPtrType thisCell = tbl.cells + i; if(thisCell->key != NullKey) { deleteKey(thisCell->key); deleteVal(thisCell->val); thisCell->key = NullKey; } } }