/* IS2610 asn 8 - in memory hashing due 6PM Thursday Nov 17, 2005 In an effort to cover the most useful data structures we will be doing hashing from book chapt 14 this week. I hope you all find this one to be easy going. There is no memory allocation and, as I mentioned, it should not be possible to make a my_hash() that does not work at all!!! You should count your collisions during your linear probing collision resolution. A collision is when you find a slot already occupied with a word other than the one you are looking for. You do not count the final successful insertion as a collision. Just as we did in previous assignments you will simply increase the count on repeats of the same word. Once your code in hashwalk() and the part in main() are correct then, other than the run times, you should get the result shown below. Then go ahead and make up your own my_hash() function to see if you can make it run faster or with fewer collisions than you have with the book_hash() function. It is not difficult to make one that will give a faster CPU time even if you are not able to reduce the number of collisions. However, since I will not grade on the basis of speed or number of collisions please make sure you do your own best hash function design and do not simply take one from the multitude of book or web resources! [awetzel@europa /tmp]$ gcc -DHASHFUN=zero_hash asn8s.c [awetzel@europa /tmp]$ time a.out < vbgle10.txt 334939357 collisions 12816 unique words from a total of 210333 words the most frequent word is used 16930 times user 0m19.130s [awetzel@europa /tmp]$ gcc -DHASHFUN=dumb_hash asn8s.c [awetzel@europa /tmp]$ time a.out < vbgle10.txt 263342701 collisions 12816 unique words from a total of 210333 words the most frequent word is used 16930 times user 0m16.020s [awetzel@europa /tmp]$ gcc -DHASHFUN=book_hash asn8s.c [awetzel@europa /tmp]$ time a.out < vbgle10.txt 42947 collisions 12816 unique words from a total of 210333 words the most frequent word is used 16930 times user 0m0.190s [awetzel@europa /tmp]$ gcc -DHASHFUN=my_hash asn8s.c [awetzel@europa /tmp]$ time a.out < vbgle10.txt 41938 collisions 12816 unique words from a total of 210333 words the most frequent word is used 16930 times user 0m0.120s */ #ifndef HASHFUN #define HASHFUN my_hash #endif #include #include #define HTSIZE 19223 // A prime that is nearly 1.5 * 12816 #define MAXWLEN 24 // This is longer than the longest word in Voyage struct hentry { // Your hashtable is just an array of hentry structs int count; char word[MAXWLEN]; } hashtable[HTSIZE]; int ncollisions; // count your number of collisions here int my_hash(char *); int book_hash(char *); int main(int, char **); int get_word(char *); void hashwalk(); int nunique, ntotal, most_frequent_entry; char word_buff[100]; int main(int argc, char *argv[]) { while(get_word(word_buff) > 0) { /* **** YOUR CODE TO USE THE HASH FUN AND RESOLVE COLLISIONS */ } printf("%d collisions\n", ncollisions); // count your collisions above hashwalk(); // this will collect your stats to print below printf("%d unique words from a total of %d words\n", nunique, ntotal); printf("the most frequent word <%s> is used %d times\n", hashtable[most_frequent_entry].word, hashtable[most_frequent_entry].count); return(0); } // This is the ultimate in bad hashing!!!! int zero_hash(char *wp) { return(0); } // This one is also very bad int dumb_hash(char *wp) { unsigned int v = 0; while(*wp) v += *wp++; return(v % HTSIZE); } // This one from the book is pretty good for for most things int book_hash(char *wp) { int h, a=31415, b=27183; for (h = 0; *wp != '\0' ;wp++, a = a*b % (HTSIZE-1)) h = (a*h + *wp) % HTSIZE; return h; } // Now your hash function should be better yet!!! int my_hash(char *wp) { unsigned int v = 0; /* unsigned to avoid shifting problems */ /* **** A REPLACEMENT HASH FUNCTION OF YOUR OWN DESIGN **** */ } void hashwalk() { /* **** YOUR CODE TO GATHER THE USUAL WORD STATS **** */ } #include // Leave this routine EXACTLY as it stands int get_word(char *s) { int c; do { c = getchar(); if(c == EOF) return(0); } while(!isalpha(c) && !isdigit(c)); do { if(isupper(c)) c = tolower(c); *s++ = c; c = getchar(); } while(isalpha(c) || isdigit(c)); *s = 0; return(1); }