Drizzled Public API Documentation

my_hash.cc
00001 /* -*- mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; -*-
00002  *  vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
00003  *
00004  *  Copyright (C) 2008 Sun Microsystems, Inc.
00005  *
00006  *  This program is free software; you can redistribute it and/or modify
00007  *  it under the terms of the GNU General Public License as published by
00008  *  the Free Software Foundation; version 2 of the License.
00009  *
00010  *  This program is distributed in the hope that it will be useful,
00011  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  *  GNU General Public License for more details.
00014  *
00015  *  You should have received a copy of the GNU General Public License
00016  *  along with this program; if not, write to the Free Software
00017  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
00018  */
00019 
00020 /* The hash functions used for saving keys */
00021 /* One of key_length or key_length_offset must be given */
00022 /* Key length of 0 isn't allowed */
00023 
00024 #include <config.h>
00025 #include <drizzled/my_hash.h>
00026 #include <drizzled/charset.h>
00027 #include <drizzled/charset_info.h>
00028 #include <vector>
00029 
00030 namespace drizzled {
00031 
00032 const uint32_t NO_RECORD= UINT32_MAX;
00033 
00034 const int LOWFIND= 1;
00035 const int LOWUSED= 2;
00036 const int HIGHFIND= 4;
00037 const int HIGHUSED= 8;
00038 
00039 static uint32_t hash_mask(uint32_t hashnr, uint32_t buffmax, uint32_t maxlength);
00040 static void movelink(HASH_LINK *array, uint32_t pos, uint32_t next_link, uint32_t newlink);
00041 static int hashcmp(const HASH *hash, HASH_LINK *pos, const unsigned char *key, size_t length);
00042 
00043 static uint32_t calc_hash(const HASH *hash, const unsigned char *key, size_t length)
00044 {
00045   uint32_t nr1=1, nr2=4;
00046   hash->charset->coll->hash_sort(hash->charset, key,length, &nr1, &nr2);
00047   return nr1;
00048 }
00049 
00050 #define dynamic_element(array,array_index,type) ((type)((array)->buffer) +(array_index))
00051 
00052 bool
00053 _hash_init(HASH *hash,uint32_t growth_size, const CHARSET_INFO * const charset,
00054            uint32_t size, size_t key_offset, size_t key_length,
00055            hash_get_key get_key,
00056            hash_free_key free_element, uint32_t flags)
00057 {
00058   hash->records=0;
00059   if (my_init_dynamic_array_ci(&hash->array, sizeof(HASH_LINK), size,
00060                                growth_size))
00061   {
00062     /* Allow call to hash_free */
00063     hash->free=0;
00064     return true;
00065   }
00066   hash->key_offset=key_offset;
00067   hash->key_length=key_length;
00068   hash->blength=1;
00069   hash->get_key=get_key;
00070   hash->free=free_element;
00071   hash->flags=flags;
00072   hash->charset=charset;
00073   return false;
00074 }
00075 
00076 
00077 /*
00078   Call hash->free on all elements in hash.
00079 
00080   SYNOPSIS
00081   hash_free_elements()
00082   hash   hash table
00083 
00084   NOTES:
00085   Sets records to 0
00086 */
00087 
00088 static inline void hash_free_elements(HASH *hash)
00089 {
00090   if (hash->free)
00091   {
00092     HASH_LINK *data=dynamic_element(&hash->array,0,HASH_LINK*);
00093     HASH_LINK *end= data + hash->records;
00094     while (data < end)
00095       (*hash->free)((data++)->data);
00096   }
00097   hash->records=0;
00098 }
00099 
00100 
00101 /*
00102   Free memory used by hash.
00103 
00104   SYNOPSIS
00105   hash_free()
00106   hash   the hash to delete elements of
00107 
00108   NOTES: Hash can't be reused without calling hash_init again.
00109 */
00110 
00111 void hash_free(HASH *hash)
00112 {
00113   hash_free_elements(hash);
00114   hash->free= 0;
00115   delete_dynamic(&hash->array);
00116 }
00117 
00118 /* some helper functions */
00119 
00120 /*
00121   This function is char* instead of unsigned char* as HPUX11 compiler can't
00122   handle inline functions that are not defined as native types
00123 */
00124 
00125 static inline char*
00126 hash_key(const HASH *hash, const unsigned char *record, size_t *length,
00127          bool first)
00128 {
00129   if (hash->get_key)
00130     return (char*) (*hash->get_key)(record,length,first);
00131   *length=hash->key_length;
00132   return (char*) record+hash->key_offset;
00133 }
00134 
00135 /* Calculate pos according to keys */
00136 
00137 static uint32_t hash_mask(uint32_t hashnr,uint32_t buffmax,uint32_t maxlength)
00138 {
00139   if ((hashnr & (buffmax-1)) < maxlength) return (hashnr & (buffmax-1));
00140   return (hashnr & ((buffmax >> 1) -1));
00141 }
00142 
00143 static uint32_t hash_rec_mask(const HASH *hash, HASH_LINK *pos,
00144                               uint32_t buffmax, uint32_t maxlength)
00145 {
00146   size_t length;
00147   unsigned char *key= (unsigned char*) hash_key(hash,pos->data,&length,0);
00148   return hash_mask(calc_hash(hash,key,length),buffmax,maxlength);
00149 }
00150 
00151 
00152 
00153 static
00154 inline
00155 unsigned int rec_hashnr(HASH *hash,const unsigned char *record)
00156 {
00157   size_t length;
00158   unsigned char *key= (unsigned char*) hash_key(hash,record,&length,0);
00159   return calc_hash(hash,key,length);
00160 }
00161 
00162 
00163 unsigned char* hash_search(const HASH *hash, const unsigned char *key,
00164                            size_t length)
00165 {
00166   HASH_SEARCH_STATE state;
00167   return hash_first(hash, key, length, &state);
00168 }
00169 
00170 /*
00171   Search after a record based on a key
00172 
00173   NOTE
00174   Assigns the number of the found record to HASH_SEARCH_STATE state
00175 */
00176 
00177 unsigned char* hash_first(const HASH *hash, const unsigned char *key,
00178                           size_t length,
00179                           HASH_SEARCH_STATE *current_record)
00180 {
00181   HASH_LINK *pos;
00182   uint32_t flag,idx;
00183 
00184   flag=1;
00185   if (hash->records)
00186   {
00187     idx=hash_mask(calc_hash(hash,key,length ? length : hash->key_length),
00188                   hash->blength,hash->records);
00189     do
00190     {
00191       pos= dynamic_element(&hash->array,idx,HASH_LINK*);
00192       if (!hashcmp(hash,pos,key,length))
00193       {
00194         *current_record= idx;
00195         return (pos->data);
00196       }
00197       if (flag)
00198       {
00199         /* Reset flag */
00200         flag=0;
00201         if (hash_rec_mask(hash,pos,hash->blength,hash->records) != idx)
00202           /* Wrong link */
00203           break;
00204       }
00205     }
00206     while ((idx=pos->next) != NO_RECORD);
00207   }
00208   *current_record= NO_RECORD;
00209   return(0);
00210 }
00211 
00212 /* Get next record with identical key */
00213 /* Can only be called if previous calls was hash_search */
00214 
00215 unsigned char* hash_next(const HASH *hash, const unsigned char *key,
00216                          size_t length,
00217                          HASH_SEARCH_STATE *current_record)
00218 {
00219   HASH_LINK *pos;
00220   uint32_t idx;
00221 
00222   if (*current_record != NO_RECORD)
00223   {
00224     HASH_LINK *data=dynamic_element(&hash->array,0,HASH_LINK*);
00225     for (idx=data[*current_record].next; idx != NO_RECORD ; idx=pos->next)
00226     {
00227       pos=data+idx;
00228       if (!hashcmp(hash,pos,key,length))
00229       {
00230         *current_record= idx;
00231         return pos->data;
00232       }
00233     }
00234     *current_record= NO_RECORD;
00235   }
00236   return 0;
00237 }
00238 
00239 
00240 /* Change link from pos to new_link */
00241 
00242 static void movelink(HASH_LINK *array, uint32_t find,
00243                      uint32_t next_link, uint32_t newlink)
00244 {
00245   HASH_LINK *old_link;
00246   do
00247   {
00248     old_link=array+next_link;
00249   }
00250   while ((next_link=old_link->next) != find);
00251   old_link->next= newlink;
00252   return;
00253 }
00254 
00255 /*
00256   Compare a key in a record to a whole key. Return 0 if identical
00257 
00258   SYNOPSIS
00259   hashcmp()
00260   hash   hash table
00261   pos    position of hash record to use in comparison
00262   key    key for comparison
00263   length length of key
00264 
00265   NOTES:
00266   If length is 0, comparison is done using the length of the
00267   record being compared against.
00268 
00269   RETURN
00270   = 0  key of record == key
00271   != 0 key of record != key
00272 */
00273 
00274 static int hashcmp(const HASH *hash, HASH_LINK *pos, const unsigned char *key,
00275                    size_t length)
00276 {
00277   size_t rec_keylength;
00278   unsigned char *rec_key= (unsigned char*) hash_key(hash, pos->data,
00279                                                     &rec_keylength,1);
00280   return ((length && length != rec_keylength) ||
00281           my_strnncoll(hash->charset, rec_key, rec_keylength,
00282                        key, rec_keylength));
00283 }
00284 
00285 
00286 /* Write a hash-key to the hash-index */
00287 
00288 bool my_hash_insert(HASH *info,const unsigned char *record)
00289 {
00290   int flag;
00291   size_t idx;
00292   uint32_t halfbuff,hash_nr,first_index;
00293   unsigned char *ptr_to_rec=NULL,*ptr_to_rec2=NULL;
00294   HASH_LINK *data,*empty,*gpos=NULL,*gpos2=NULL,*pos;
00295 
00296   if (HASH_UNIQUE & info->flags)
00297   {
00298     unsigned char *key= (unsigned char*) hash_key(info, record, &idx, 1);
00299     if (hash_search(info, key, idx))
00300       /* Duplicate entry */
00301       return(true);
00302   }
00303 
00304   flag=0;
00305   if (!(empty=(HASH_LINK*) alloc_dynamic(&info->array)))
00306     /* No more memory */
00307     return(true);
00308 
00309   data=dynamic_element(&info->array,0,HASH_LINK*);
00310   halfbuff= info->blength >> 1;
00311 
00312   idx= first_index= info->records-halfbuff;
00313   /* If some records */
00314   if (idx != info->records)
00315   {
00316     do
00317     {
00318       pos=data+idx;
00319       hash_nr=rec_hashnr(info,pos->data);
00320       /* First loop; Check if ok */
00321       if (flag == 0)
00322         if (hash_mask(hash_nr,info->blength,info->records) != first_index)
00323           break;
00324       if (!(hash_nr & halfbuff))
00325       {
00326         /* Key will not move */
00327         if (!(flag & LOWFIND))
00328         {
00329           if (flag & HIGHFIND)
00330           {
00331             flag=LOWFIND | HIGHFIND;
00332             /* key shall be moved to the current empty position */
00333             gpos=empty;
00334             ptr_to_rec=pos->data;
00335             /* This place is now free */
00336             empty=pos;
00337           }
00338           else
00339           {
00340             /* key isn't changed */
00341             flag=LOWFIND | LOWUSED;
00342             gpos=pos;
00343             ptr_to_rec=pos->data;
00344           }
00345         }
00346         else
00347         {
00348           if (!(flag & LOWUSED))
00349           {
00350             /* Change link of previous LOW-key */
00351             gpos->data=ptr_to_rec;
00352             gpos->next= (uint32_t) (pos-data);
00353             flag= (flag & HIGHFIND) | (LOWFIND | LOWUSED);
00354           }
00355           gpos=pos;
00356           ptr_to_rec=pos->data;
00357         }
00358       }
00359       else
00360       {
00361         /* key will be moved */
00362         if (!(flag & HIGHFIND))
00363         {
00364           flag= (flag & LOWFIND) | HIGHFIND;
00365           /* key shall be moved to the last (empty) position */
00366           gpos2 = empty; empty=pos;
00367           ptr_to_rec2=pos->data;
00368         }
00369         else
00370         {
00371           if (!(flag & HIGHUSED))
00372           {
00373             /* Change link of previous hash-key and save */
00374             gpos2->data=ptr_to_rec2;
00375             gpos2->next=(uint32_t) (pos-data);
00376             flag= (flag & LOWFIND) | (HIGHFIND | HIGHUSED);
00377           }
00378           gpos2=pos;
00379           ptr_to_rec2=pos->data;
00380         }
00381       }
00382     }
00383     while ((idx=pos->next) != NO_RECORD);
00384 
00385     if ((flag & (LOWFIND | LOWUSED)) == LOWFIND)
00386     {
00387       gpos->data=ptr_to_rec;
00388       gpos->next=NO_RECORD;
00389     }
00390     if ((flag & (HIGHFIND | HIGHUSED)) == HIGHFIND)
00391     {
00392       gpos2->data=ptr_to_rec2;
00393       gpos2->next=NO_RECORD;
00394     }
00395   }
00396   /* Check if we are at the empty position */
00397 
00398   idx=hash_mask(rec_hashnr(info,record),info->blength,info->records+1);
00399   pos=data+idx;
00400   if (pos == empty)
00401   {
00402     pos->data=(unsigned char*) record;
00403     pos->next=NO_RECORD;
00404   }
00405   else
00406   {
00407     /* Check if more records in same hash-nr family */
00408     empty[0]=pos[0];
00409     gpos=data+hash_rec_mask(info,pos,info->blength,info->records+1);
00410     if (pos == gpos)
00411     {
00412       pos->data=(unsigned char*) record;
00413       pos->next=(uint32_t) (empty - data);
00414     }
00415     else
00416     {
00417       pos->data=(unsigned char*) record;
00418       pos->next=NO_RECORD;
00419       movelink(data,(uint32_t) (pos-data),(uint32_t) (gpos-data),(uint32_t) (empty-data));
00420     }
00421   }
00422   if (++info->records == info->blength)
00423     info->blength+= info->blength;
00424   return(0);
00425 }
00426 
00427 
00428 /******************************************************************************
00429  ** Remove one record from hash-table. The record with the same record
00430  ** ptr is removed.
00431  ** if there is a free-function it's called for record if found
00432  *****************************************************************************/
00433 
00434 bool hash_delete(HASH *hash,unsigned char *record)
00435 {
00436   uint32_t blength,pos2,pos_hashnr,lastpos_hashnr,idx,empty_index;
00437   HASH_LINK *data,*lastpos,*gpos,*pos,*pos3,*empty;
00438   if (!hash->records)
00439     return(1);
00440 
00441   blength=hash->blength;
00442   data=dynamic_element(&hash->array,0,HASH_LINK*);
00443   /* Search after record with key */
00444   pos=data+ hash_mask(rec_hashnr(hash,record),blength,hash->records);
00445   gpos = 0;
00446 
00447   while (pos->data != record)
00448   {
00449     gpos=pos;
00450     if (pos->next == NO_RECORD)
00451       /* Key not found */
00452       return(1);
00453 
00454     pos=data+pos->next;
00455   }
00456 
00457   if ( --(hash->records) < hash->blength >> 1) hash->blength>>=1;
00458   lastpos=data+hash->records;
00459 
00460   /* Remove link to record */
00461   empty=pos; empty_index=(uint32_t) (empty-data);
00462   if (gpos)
00463     /* unlink current ptr */
00464     gpos->next=pos->next;
00465   else if (pos->next != NO_RECORD)
00466   {
00467     empty=data+(empty_index=pos->next);
00468     pos->data=empty->data;
00469     pos->next=empty->next;
00470   }
00471 
00472   /* last key at wrong pos or no next link */
00473   if (empty == lastpos)
00474     goto exit;
00475 
00476   /* Move the last key (lastpos) */
00477   lastpos_hashnr=rec_hashnr(hash,lastpos->data);
00478   /* pos is where lastpos should be */
00479   pos=data+hash_mask(lastpos_hashnr,hash->blength,hash->records);
00480   /* Move to empty position. */
00481   if (pos == empty)
00482   {
00483     empty[0]=lastpos[0];
00484     goto exit;
00485   }
00486   pos_hashnr=rec_hashnr(hash,pos->data);
00487   /* pos3 is where the pos should be */
00488   pos3= data+hash_mask(pos_hashnr,hash->blength,hash->records);
00489   if (pos != pos3)
00490   {         /* pos is on wrong posit */
00491     empty[0]=pos[0];      /* Save it here */
00492     pos[0]=lastpos[0];      /* This should be here */
00493     movelink(data,(uint32_t) (pos-data),(uint32_t) (pos3-data),empty_index);
00494     goto exit;
00495   }
00496   pos2= hash_mask(lastpos_hashnr,blength,hash->records+1);
00497   if (pos2 == hash_mask(pos_hashnr,blength,hash->records+1))
00498   {         /* Identical key-positions */
00499     if (pos2 != hash->records)
00500     {
00501       empty[0]=lastpos[0];
00502       movelink(data,(uint32_t) (lastpos-data),(uint32_t) (pos-data),empty_index);
00503       goto exit;
00504     }
00505     idx= (uint32_t) (pos-data);   /* Link pos->next after lastpos */
00506   }
00507   else idx= NO_RECORD;    /* Different positions merge */
00508 
00509   empty[0]=lastpos[0];
00510   movelink(data,idx,empty_index,pos->next);
00511   pos->next=empty_index;
00512 
00513 exit:
00514   pop_dynamic(&hash->array);
00515   if (hash->free)
00516     (*hash->free)((unsigned char*) record);
00517   return(0);
00518 }
00519 
00520 unsigned char *hash_element(HASH *hash,uint32_t idx)
00521 {
00522   if (idx < hash->records)
00523     return dynamic_element(&hash->array,idx,HASH_LINK*)->data;
00524   return 0;
00525 }
00526 
00527 } /* namespace drizzled */