/* * Playing around with ideas from python's hash table implementation. * No error checking, simplest cases only, minimal functionality * * See * - http://effbot.org/zone/python-hash.htm * - "Beautiful Code", chap 18 * - http://svn.python.org/view/python/trunk/Objects/dictnotes.txt?view=markup * - http://svn.python.org/view/python/trunk/Objects/dictobject.c?view=markup * * Jim M, April 2011 */ #define DEBUG 0 #include #include #include // One entry in the hash table typedef struct Pair *Pair; struct Pair { char* key; long hash; // hash stored for speed. long value; }; // The hash table itself typedef struct HashTable *HashTable; struct HashTable { long mask; Pair* pairs; }; // function declarations long hash(char* string); long findslot(long hash, HashTable h); void put(char* key, long value, HashTable h); long get(char* key, HashTable h); HashTable newHashTable(long size); void printTable(HashTable h); // Make a new hash table HashTable newHashTable(long size){ HashTable table = malloc(sizeof(struct HashTable)); table->mask = size - 1; table->pairs = calloc(size, sizeof(struct Pair)); return table; } // limit an index to a slot in the table // using the size of the table to convert n to a number < size long chop(HashTable h, long n){ return h->mask & n; } // Print all the indexes in the hash table. void printTable(HashTable h){ int i; long size = h->mask + 1; for (i=0; i < size; i++){ if (!h->pairs[i]){ printf(" %i empty \n", i); } else { printf(" %i (%s, %li) \n", i, h->pairs[i]->key, h->pairs[i]->value); } } } // Make a new Pair. Pair newPair(char* key, long value){ Pair p = malloc(sizeof(struct Pair)); p->key = key; p->value = value; p->hash = hash(key); return p; } // Return first index which is empty or matches given hash. long findindex(long hash, HashTable h){ long index = hash; long perturb = hash; while (h->pairs[chop(h,index)] && h->pairs[chop(h,index)]->hash != hash){ if (DEBUG) printf(" findex collision at index=%li \n", index); // index = 5*index + perturb + 1; index = (index<<2) + index + perturb + 1; perturb >>= 5; } if (DEBUG) printf(" findex success at %li \n", index); index = chop(h, index); if (DEBUG) printf(" findex chopped to %li \n", index); return index; } // Put a key,value pair into the a table void put(char* key, long value, HashTable h){ long index = findindex(hash(key), h); if (DEBUG) printf(" index = %li \n", index); Pair p = newPair(key, value); if (DEBUG) printf(" pair (key,value) = (%s, %li) \n", key, value); h->pairs[index] = p; } // Return the value from (key,value) in the table. long get(char* key, HashTable h){ long index = findindex(hash(key), h); if (DEBUG) printf(" get index = %li \n", index); return h->pairs[index]->value; } long hash(char* string){ // Hash function that python uses for strings. long value, i=0; if (!string[0]) return 0; value = string[i] << 7; if (DEBUG) printf("debug: value = %lu \n", value); while (string[i]){ value *= 1000003; value ^= string[i]; if (DEBUG) printf("debug: value = %lu \n", value); i++; } value = value ^ i; if (DEBUG) printf("debug: value = %lu \n", value); if (value == -1) return -2; return value; } int main(){ Pair p; HashTable h = newHashTable(32); int i; char* strings[8] = {"aa", "bb", "cc", "dd", "ee", "ff", "gg", "hh"}; long values[8] = {1, 2, 3, 4, 5, 6, 7, 8}; int n_strings = 8; // test hash function printf(" test : hash(\"%s\") = %li \n", "foo", hash("foo")); for (i=0; i