• Main Page
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

hash.c

Go to the documentation of this file.
00001 //--------------------------------------------------------------------------
00002 //
00003 // File:           hash.c
00004 //
00005 // Purpose:        Hash table implementation
00006 //
00007 // Author:         Jerry Coffin
00008 //
00009 // Description:    Public domain code by Jerry Coffin, with improvements by
00010 //                 HenkJan Wolthuis.
00011 //                 Date last modified: 05-Jul-1997
00012 //
00013 // Revisions:      18-09-2002 -- modified by Pavel Sakov
00014 //
00015 //--------------------------------------------------------------------------
00016 
00017 #include <string.h>
00018 #include <stdlib.h>
00019 #include <assert.h>
00020 #include "hash.h"
00021 
00022 #define INT_PER_DOUBLE    2
00023 
00024 //* A hash table consists of an array of these buckets.
00025 //
00026 typedef struct ht_bucket
00027 {
00028     void * key;
00029     void            * data;
00030     int  id;                    // unique id -- just in case
00031     struct ht_bucket* next;
00032 } ht_bucket;
00033 
00034 //* Hash table structure.
00035 // Note that more nodes than `size' can be inserted in the table,
00036 // but performance degrades as this happens.
00037 //
00038 struct hashtable
00039 {
00040     int         size;           // table size
00041     int         n;              // current number of entries
00042     int         naccum;         // number of inserted entries
00043     int         nhash;          // number of used table elements
00044     ht_keycp    cp;
00045     ht_keyeq    eq;
00046     ht_key2hash hash;
00047     ht_bucket   ** table;
00048 };
00049 
00050 // Creates a hashtable of specified size.
00051 //
00052 hashtable* ht_create( int size, ht_keycp cp, ht_keyeq eq, ht_key2hash hash )
00053 {
00054     hashtable* table = malloc( sizeof ( hashtable ) );
00055     ht_bucket** bucket;
00056     int      i;
00057 
00058     assert( sizeof ( double ) == INT_PER_DOUBLE * sizeof ( int ) );
00059     //
00060     // (used in d1hash() and d2hash())
00061     //
00062 
00063     if ( table == NULL )
00064         return NULL;
00065 
00066     if ( size <= 0 )
00067     {
00068         free( table );
00069         return NULL;
00070     }
00071 
00072     table->size  = size;
00073     table->table = malloc( sizeof ( ht_bucket* ) * size );
00074     bucket       = table->table;
00075 
00076     if ( bucket == NULL )
00077     {
00078         free( table );
00079         return NULL;
00080     }
00081 
00082     for ( i = 0; i < size; ++i )
00083         bucket[i] = NULL;
00084     table->n      = 0;
00085     table->naccum = 0;
00086     table->nhash  = 0;
00087     table->eq     = eq;
00088     table->cp     = cp;
00089     table->hash   = hash;
00090 
00091     return table;
00092 }
00093 
00094 // Destroys a hash table.
00095 // (Take care of deallocating data by ht_process() prior to destroying the
00096 // table if necessary.)
00097 //
00098 // @param table Hash table to be destroyed
00099 //
00100 void ht_destroy( hashtable* table )
00101 {
00102     int i;
00103 
00104     if ( table == NULL )
00105         return;
00106 
00107     for ( i = 0; i < table->size; ++i )
00108     {
00109         ht_bucket* bucket;
00110 
00111         for ( bucket = ( table->table )[i]; bucket != NULL; )
00112         {
00113             ht_bucket* prev = bucket;
00114 
00115             free( bucket->key );
00116             bucket = bucket->next;
00117             free( prev );
00118         }
00119     }
00120 
00121     free( table->table );
00122     free( table );
00123 }
00124 
00125 // Inserts a new entry into the hash table.
00126 //
00127 // @param table The hash table
00128 // @param key Ponter to entry's key
00129 // @param data Pointer to associated data
00130 // @return Pointer to the old data associated with the key, NULL if the key
00131 //         wasn't in the table previously
00132 //
00133 void* ht_insert( hashtable* table, void* key, void* data )
00134 {
00135     unsigned int val = table->hash( key ) % table->size;
00136     ht_bucket    * bucket;
00137 
00138     //
00139     // NULL means this bucket hasn't been used yet.  We'll simply allocate
00140     // space for our new bucket and put our data there, with the table
00141     // pointing at it.
00142     //
00143     if ( ( table->table )[val] == NULL )
00144     {
00145         bucket = malloc( sizeof ( ht_bucket ) );
00146         if ( bucket == NULL )
00147             return NULL;
00148 
00149         bucket->key  = table->cp( key );
00150         bucket->next = NULL;
00151         bucket->data = data;
00152         bucket->id   = table->naccum;
00153 
00154         ( table->table )[val] = bucket;
00155         table->n++;
00156         table->naccum++;
00157         table->nhash++;
00158 
00159         return bucket->data;
00160     }
00161 
00162     //
00163     // This spot in the table is already in use.  See if the current string
00164     // has already been inserted, and if so, return corresponding data.
00165     //
00166     for ( bucket = ( table->table )[val]; bucket != NULL; bucket = bucket->next )
00167         if ( table->eq( key, bucket->key ) == 1 )
00168         {
00169             void* old_data = bucket->data;
00170 
00171             bucket->data = data;
00172             bucket->id   = table->naccum;
00173             table->naccum++;
00174 
00175             return old_data;
00176         }
00177 
00178     //
00179     // This key must not be in the table yet.  We'll add it to the head of
00180     // the list at this spot in the hash table.  Speed would be slightly
00181     // improved if the list was kept sorted instead.  In this case, this
00182     // code would be moved into the loop above, and the insertion would take
00183     // place as soon as it was determined that the present key in the list
00184     // was larger than this one.
00185     //
00186     bucket = (ht_bucket *) malloc( sizeof ( ht_bucket ) );
00187     if ( bucket == NULL )
00188         return 0;
00189     bucket->key  = table->cp( key );
00190     bucket->data = data;
00191     bucket->next = ( table->table )[val];
00192     bucket->id   = table->naccum;
00193 
00194     ( table->table )[val] = bucket;
00195     table->n++;
00196     table->naccum++;
00197 
00198     return data;
00199 }
00200 
00201 // Returns a pointer to the data associated with a key.  If the key has
00202 // not been inserted in the table, returns NULL.
00203 //
00204 // @param table The hash table
00205 // @param key The key
00206 // @return The associated data or NULL
00207 //
00208 void* ht_find( hashtable* table, void* key )
00209 {
00210     unsigned int val = table->hash( key ) % table->size;
00211     ht_bucket    * bucket;
00212 
00213     if ( ( table->table )[val] == NULL )
00214         return NULL;
00215 
00216     for ( bucket = ( table->table )[val]; bucket != NULL; bucket = bucket->next )
00217         if ( table->eq( key, bucket->key ) == 1 )
00218             return bucket->data;
00219 
00220     return NULL;
00221 }
00222 
00223 // Deletes an entry from the table.  Returns a pointer to the data that
00224 // was associated with the key so that the calling code can dispose it
00225 // properly.
00226 //
00227 // @param table The hash table
00228 // @param key The key
00229 // @return The associated data or NULL
00230 //
00231 void* ht_delete( hashtable* table, void* key )
00232 {
00233     unsigned int val = table->hash( key ) % table->size;
00234     ht_bucket    * prev;
00235     ht_bucket    * bucket;
00236     void         * data;
00237 
00238     if ( ( table->table )[val] == NULL )
00239         return NULL;
00240 
00241     //
00242     // Traverse the list, keeping track of the previous node in the list.
00243     // When we find the node to delete, we set the previous node's next
00244     // pointer to point to the node after ourself instead.  We then delete
00245     // the key from the present node, and return a pointer to the data it
00246     // contains.
00247     //
00248     for ( prev = NULL, bucket = ( table->table )[val]; bucket != NULL; prev = bucket, bucket = bucket->next )
00249     {
00250         if ( table->eq( key, bucket->key ) == 1 )
00251         {
00252             data = bucket->data;
00253             if ( prev != NULL )
00254                 prev->next = bucket->next;
00255             else
00256             {
00257                 //
00258                 // If 'prev' still equals NULL, it means that we need to
00259                 // delete the first node in the list. This simply consists
00260                 // of putting our own 'next' pointer in the array holding
00261                 // the head of the list.  We then dispose of the current
00262                 // node as above.
00263                 //
00264                 ( table->table )[val] = bucket->next;
00265                 table->nhash--;
00266             }
00267             free( bucket->key );
00268             free( bucket );
00269             table->n--;
00270 
00271             return data;
00272         }
00273     }
00274 
00275     //
00276     // If we get here, it means we didn't find the item in the table. Signal
00277     // this by returning NULL.
00278     //
00279     return NULL;
00280 }
00281 
00282 // For each entry, calls a specified function with corresponding data as a
00283 // parameter.
00284 //
00285 // @param table The hash table
00286 // @param func The action function
00287 //
00288 void ht_process( hashtable* table, void ( *func )( void* ) )
00289 {
00290     int i;
00291 
00292     for ( i = 0; i < table->size; ++i )
00293         if ( ( table->table )[i] != NULL )
00294         {
00295             ht_bucket* bucket;
00296 
00297             for ( bucket = ( table->table )[i]; bucket != NULL; bucket = bucket->next )
00298                 func( bucket->data );
00299         }
00300 }
00301 
00302 //
00303 // functions for for string keys
00304 //
00305 
00306 static unsigned int strhash( void* key )
00307 {
00308     char         * str     = (char *) key;
00309     unsigned int hashvalue = 0;
00310 
00311     while ( *str != 0 )
00312     {
00313         hashvalue  ^= *(unsigned int *) str;
00314         hashvalue <<= 1;
00315         str++;
00316     }
00317 
00318     return hashvalue;
00319 }
00320 
00321 static void* strcp( void* key )
00322 {
00323     return strdup( key );
00324 }
00325 
00326 static int streq( void* key1, void* key2 )
00327 {
00328     return !strcmp( key1, key2 );
00329 }
00330 
00331 // functions for for double keys
00332 
00333 static unsigned int d1hash( void* key )
00334 {
00335     unsigned int* v = (unsigned int *) key;
00336 
00337 #if INT_PER_DOUBLE == 2
00338     return v[0] + v[1];
00339 #else
00340 #error not implemented
00341 #endif
00342 }
00343 
00344 static void* d1cp( void* key )
00345 {
00346     double* newkey = malloc( sizeof ( double ) );
00347 
00348     *newkey = *(double *) key;
00349 
00350     return newkey;
00351 }
00352 
00353 int d1eq( void* key1, void* key2 )
00354 {
00355     return *(double *) key1 == *(double *) key2;
00356 }
00357 
00358 //
00359 // functions for for double[2] keys
00360 //
00361 
00362 #include "math.h"
00363 
00364 static unsigned int d2hash( void* key )
00365 {
00366     unsigned int* v = (unsigned int *) key;
00367 
00368 #if INT_PER_DOUBLE == 2
00369     //
00370     // PS: here multiplications suppose to make (a,b) and (b,a) generate
00371     // different hash values
00372     //
00373     return v[0] + v[1] + v[2] * 3 + v[3] * 7;
00374 #else
00375 #error not implemented
00376 #endif
00377 }
00378 
00379 static void* d2cp( void* key )
00380 {
00381     double* newkey = malloc( sizeof ( double ) * 2 );
00382 
00383     newkey[0] = ( (double *) key )[0];
00384     newkey[1] = ( (double *) key )[1];
00385 
00386     return newkey;
00387 }
00388 
00389 static int d2eq( void* key1, void* key2 )
00390 {
00391     return ( ( (double *) key1 )[0] == ( (double *) key2 )[0] ) && ( ( (double *) key1 )[1] == ( (double *) key2 )[1] );
00392 }
00393 
00394 hashtable* ht_create_d1( int size )
00395 {
00396     return ht_create( size, d1cp, d1eq, d1hash );
00397 }
00398 
00399 hashtable* ht_create_d2( int size )
00400 {
00401     return ht_create( size, d2cp, d2eq, d2hash );
00402 }
00403 
00404 hashtable* ht_create_str( int size )
00405 {
00406     return ht_create( size, strcp, streq, strhash );
00407 }
00408 
00409 #ifdef HT_TEST
00410 
00411 #include <stdio.h>
00412 #include <limits.h>
00413 
00414 #define BUFSIZE    1024
00415 
00416 static void print_double( void* data )
00417 {
00418     printf( " \"%d\"", (int) *(double *) data );
00419 }
00420 
00421 static void print_string( void* data )
00422 {
00423     printf( " \"%s\"", (char *) data );
00424 }
00425 
00426 int main()
00427 {
00428     double   points[] = {
00429         922803.7855, 7372394.688,   0,
00430         922849.2037, 7372307.027,   1,
00431         922894.657,  7372219.306,   2,
00432         922940.1475, 7372131.528,   3,
00433         922985.6777, 7372043.692,   4,
00434         923031.2501, 7371955.802,   5,
00435         923076.8669, 7371867.857,   6,
00436         923122.5307, 7371779.861,   7,
00437         923168.2439, 7371691.816,   8,
00438         923214.0091, 7371603.722,   9,
00439         923259.8288, 7371515.583,  10,
00440         922891.3958, 7372440.117,  11,
00441         922936.873,  7372352.489,  12,
00442         922982.3839, 7372264.804,  13,
00443         923027.9308, 7372177.064,  14,
00444         923073.5159, 7372089.268,  15,
00445         923119.1415,  7372001.42,  16,
00446         923164.8099, 7371913.521,  17,
00447         923210.5233, 7371825.572,  18,
00448         923256.2841, 7371737.575,  19,
00449         923302.0946, 7371649.534,  20,
00450         923347.9572,  7371561.45,  21,
00451         922978.9747, 7372485.605,  22,
00452         923024.5085, 7372398.009,  23,
00453         923070.0748, 7372310.358,  24,
00454         923115.6759, 7372222.654,  25,
00455         923161.3136, 7372134.897,  26,
00456         923206.9903,  7372047.09,  27,
00457         923252.7079, 7371959.233,  28,
00458         923298.4686,  7371871.33,  29,
00459         923344.2745, 7371783.381,  30,
00460         923390.1279, 7371695.389,  31,
00461         923436.0309, 7371607.357,  32,
00462         923066.5232, 7372531.148,  33,
00463         923112.1115, 7372443.583,  34,
00464         923157.7311, 7372355.966,  35,
00465         923203.3842, 7372268.296,  36,
00466         923249.0725, 7372180.577,  37,
00467         923294.7981, 7372092.808,  38,
00468         923340.5628, 7372004.993,  39,
00469         923386.3686, 7371917.132,  40,
00470         923432.2176, 7371829.229,  41,
00471         923478.1116, 7371741.284,  42,
00472         923524.0527, 7371653.302,  43,
00473         923154.0423, 7372576.746,  44,
00474         923199.6831, 7372489.211,  45,
00475         923245.3541, 7372401.625,  46,
00476         923291.0572, 7372313.989,  47,
00477         923336.7941, 7372226.305,  48,
00478         923382.5667, 7372138.574,  49,
00479         923428.3766, 7372050.798,  50,
00480         923474.2256, 7371962.978,  51,
00481         923520.1155, 7371875.118,  52,
00482         923566.0481, 7371787.218,  53,
00483         923612.0252, 7371699.282,  54,
00484         923241.533,  7372622.396,  55,
00485         923287.2244, 7372534.889,  56,
00486         923332.9449, 7372447.334,  57,
00487         923378.6963, 7372359.731,  58,
00488         923424.4801, 7372272.081,  59,
00489         923470.2979, 7372184.385,  60,
00490         923516.1513, 7372096.646,  61,
00491         923562.0418, 7372008.866,  62,
00492         923607.9709, 7371921.046,  63,
00493         923653.9402, 7371833.188,  64,
00494         923699.9514, 7371745.296,  65,
00495         923328.9962, 7372668.095,  66,
00496         923374.7365, 7372580.617,  67,
00497         923420.5049, 7372493.091,  68,
00498         923466.303,  7372405.519,  69,
00499         923512.1321, 7372317.901,  70,
00500         923557.9936,  7372230.24,  71,
00501         923603.8889, 7372142.536,  72,
00502         923649.8192, 7372054.793,  73,
00503         923695.786,  7371967.011,  74,
00504         923741.7905, 7371879.193,  75,
00505         923787.8341, 7371791.342,  76,
00506         923416.4327, 7372713.844,  77,
00507         923462.2204, 7372626.393,  78,
00508         923508.0353, 7372538.895,  79,
00509         923553.8787, 7372451.353,  80,
00510         923599.7517, 7372363.766,  81,
00511         923645.6555, 7372276.137,  82,
00512         923691.5914, 7372188.467,  83,
00513         923737.5603, 7372100.757,  84,
00514         923783.5634, 7372013.011,  85,
00515         923829.6017, 7371925.231,  86,
00516         923875.6763, 7371837.419,  87,
00517         923503.8433,  7372759.64,  88,
00518         923549.6771, 7372672.214,  89,
00519         923595.5372, 7372584.744,  90,
00520         923641.4246,  7372497.23,  91,
00521         923687.3404, 7372409.673,  92,
00522         923733.2855, 7372322.074,  93,
00523         923779.2608, 7372234.436,  94,
00524         923825.2672, 7372146.759,  95,
00525         923871.3056, 7372059.047,  96,
00526         923917.3766, 7371971.301,  97,
00527         923963.4812, 7371883.524,  98,
00528         923591.2288, 7372805.481,  99,
00529         923637.1076, 7372718.081, 100,
00530         923683.0118, 7372630.638, 101,
00531         923728.9423, 7372543.151, 102,
00532         923774.8998, 7372455.622, 103,
00533         923820.8852, 7372368.052, 104,
00534         923866.8991, 7372280.443, 105,
00535         923912.9422, 7372192.797, 106,
00536         923959.015,  7372105.116, 107,
00537         924005.118,  7372017.402, 108,
00538         924051.2518, 7371929.657, 109,
00539         923678.5898, 7372851.367, 110,
00540         923724.5126, 7372763.992, 111,
00541         923770.46,   7372676.574, 112,
00542         923816.4328, 7372589.113, 113,
00543         923862.4314, 7372501.611, 114,
00544         923908.4564, 7372414.069, 115,
00545         923954.5083, 7372326.488, 116,
00546         924000.5875,  7372238.87, 117,
00547         924046.6941, 7372151.218, 118,
00548         924092.8286, 7372063.533, 119,
00549         924138.9911, 7371975.818, 120
00550     };
00551 
00552     int      size = sizeof ( points ) / sizeof ( double ) / 3;
00553     hashtable* ht;
00554     int      i;
00555 
00556     //
00557     // double[2] key
00558     //
00559 
00560     printf( "\n1. Testing a table with key of double[2] type\n\n" );
00561 
00562     printf( "  creating a table..." );
00563     ht = ht_create_d2( size );
00564     printf( "done\n" );
00565 
00566     printf( "  inserting %d values from a file...", size );
00567     for ( i = 0; i < size; ++i )
00568         ht_insert( ht, &points[i * 3], &points[i * 3 + 2] );
00569     printf( "done\n" );
00570 
00571     printf( "  stats:\n" );
00572     printf( "    %d entries, %d table elements, %d filled elements\n", ht->n, ht->size, ht->nhash );
00573     printf( "    %f entries per hash value in use\n", (double) ht->n / ht->nhash );
00574 
00575     printf( "  finding and printing each 10th data:\n" );
00576     for ( i = 0; i < size; i += 10 )
00577     {
00578         double* point = &points[i * 3];
00579         double* data  = ht_find( ht, point );
00580 
00581         if ( data != NULL )
00582             printf( "    i = %d; data = \"%d\"\n", i, (int) *data );
00583         else
00584             printf( "    i = %d; data = <none>\n", i );
00585     }
00586 
00587     printf( "  removing every 3rd element..." );
00588     for ( i = 0; i < size; i += 3 )
00589     {
00590         double* point = &points[i * 3];
00591         ht_delete( ht, point );
00592     }
00593     printf( "done\n" );
00594 
00595     printf( "  stats:\n" );
00596     printf( "    %d entries, %d table elements, %d filled elements\n", ht->n, ht->size, ht->nhash );
00597     printf( "    %f entries per hash value in use\n", (double) ht->n / ht->nhash );
00598 
00599     printf( "  finding and printing each 10th data:\n" );
00600     for ( i = 0; i < size; i += 10 )
00601     {
00602         double* point = &points[i * 3];
00603         double* data  = ht_find( ht, point );
00604 
00605         if ( data != NULL )
00606             printf( "    i = %d; data = \"%d\"\n", i, (int) *data );
00607         else
00608             printf( "    i = %d; data = <none>\n", i );
00609     }
00610 
00611     printf( "  printing all data by calling ht_process():\n " );
00612     ht_process( ht, print_double );
00613 
00614     printf( "\n  destroying the hash table..." );
00615     ht_destroy( ht );
00616     printf( "done\n" );
00617 
00618     //
00619     // char* key
00620     //
00621 
00622     printf( "\n2. Testing a table with key of char* type\n\n" );
00623 
00624     printf( "  creating a table..." );
00625     ht = ht_create_str( size );
00626     printf( "done\n" );
00627 
00628     printf( "  inserting %d elements with deep copy of each data string...", size );
00629     for ( i = 0; i < size; ++i )
00630     {
00631         char key[BUFSIZE];
00632         char str[BUFSIZE];
00633         char * data;
00634 
00635         sprintf( key, "%d-th key", i );
00636         sprintf( str, "%d-th data", i );
00637         data = strdup( str );
00638         ht_insert( ht, key, data );
00639     }
00640     printf( "done\n" );
00641 
00642     printf( "  stats:\n" );
00643     printf( "    %d entries, %d table elements, %d filled elements\n", ht->n, ht->size, ht->nhash );
00644     printf( "    %f entries per hash value in use\n", (double) ht->n / ht->nhash );
00645 
00646     printf( "  finding and printing each 10th data:\n" );
00647     for ( i = 0; i < size; i += 10 )
00648     {
00649         char key[BUFSIZE];
00650         char * data;
00651 
00652         sprintf( key, "%d-th key", i );
00653         data = ht_find( ht, key );
00654         if ( data != NULL )
00655             printf( "    i = %d; data = \"%s\"\n", i, data );
00656         else
00657             printf( "    i = %d; data = <none>\n", i );
00658     }
00659 
00660     printf( "  removing every 3rd element..." );
00661     for ( i = 0; i < size; i += 3 )
00662     {
00663         char key[BUFSIZE];
00664 
00665         sprintf( key, "%d-th key", i );
00666         free( ht_delete( ht, key ) );
00667     }
00668     printf( "done\n" );
00669 
00670     printf( "  stats:\n" );
00671     printf( "    %d entries, %d table elements, %d filled elements\n", ht->n, ht->size, ht->nhash );
00672     printf( "    %f entries per hash value in use\n", (double) ht->n / ht->nhash );
00673 
00674     printf( "  finding and printing each 10th data:\n" );
00675     for ( i = 0; i < size; i += 10 )
00676     {
00677         char key[BUFSIZE];
00678         char * data;
00679 
00680         sprintf( key, "%d-th key", i );
00681         data = ht_find( ht, key );
00682         if ( data != NULL )
00683             printf( "    i = %d; data = \"%s\"\n", i, data );
00684         else
00685             printf( "    i = %d; data = <none>\n", i );
00686     }
00687 
00688     printf( "  printing all data by calling ht_process():\n " );
00689     ht_process( ht, print_string );
00690 
00691     printf( "\n  freeing the remaining data by calling ht_process()..." );
00692     ht_process( ht, free );
00693     printf( "done\n" );
00694 
00695     printf( "  destroying the hash table..." );
00696     ht_destroy( ht );
00697     printf( "done\n" );
00698 
00699     return 0;
00700 }
00701 
00702 #endif                          // HT_TEST

Generated on Wed Oct 12 2011 20:42:21 for PLplot by  doxygen 1.7.1