00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #include <config.h>
00017
00018 #include <drizzled/charset.h>
00019 #include <drizzled/error.h>
00020 #include <drizzled/charset_info.h>
00021 #include <drizzled/internal/m_string.h>
00022 #include <drizzled/configmake.h>
00023 #include <vector>
00024
00025 #include <drizzled/visibility.h>
00026
00027 using namespace std;
00028
00029 namespace drizzled
00030 {
00031
00032
00033
00034
00035 static vector<unsigned char*> memory_vector;
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047 bool my_charset_same(const CHARSET_INFO *cs1, const CHARSET_INFO *cs2)
00048 {
00049 return ((cs1 == cs2) || !strcmp(cs1->csname,cs2->csname));
00050 }
00051
00052
00053 static uint
00054 get_collation_number_internal(const char *name)
00055 {
00056 for (CHARSET_INFO **cs= all_charsets;
00057 cs < all_charsets+array_elements(all_charsets)-1;
00058 cs++)
00059 {
00060 if ( cs[0] && cs[0]->name && !my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->name, name))
00061 {
00062 return cs[0]->number;
00063 }
00064 }
00065 return 0;
00066 }
00067
00068 static unsigned char *cs_alloc(size_t size)
00069 {
00070 memory_vector.push_back(new unsigned char[size]);
00071 return memory_vector.back();
00072 }
00073
00074 static bool init_state_maps(CHARSET_INFO *cs)
00075 {
00076 if (!(cs->state_map= cs_alloc(256)))
00077 return 1;
00078
00079 if (!(cs->ident_map= cs_alloc(256)))
00080 return 1;
00081
00082 unsigned char *state_map= cs->state_map;
00083 unsigned char *ident_map= cs->ident_map;
00084
00085
00086 for (int i= 0; i < 256; i++)
00087 {
00088 if (my_isalpha(cs,i))
00089 state_map[i]= MY_LEX_IDENT;
00090 else if (my_isdigit(cs,i))
00091 state_map[i]= MY_LEX_NUMBER_IDENT;
00092 else if (my_mbcharlen(cs, i)>1)
00093 state_map[i]= MY_LEX_IDENT;
00094 else if (my_isspace(cs,i))
00095 state_map[i]= MY_LEX_SKIP;
00096 else
00097 state_map[i]= MY_LEX_CHAR;
00098 }
00099 state_map['_']=state_map['$']= MY_LEX_IDENT;
00100 state_map['\'']= MY_LEX_STRING;
00101 state_map['.']= MY_LEX_REAL_OR_POINT;
00102 state_map['>']=state_map['=']=state_map['!']= MY_LEX_CMP_OP;
00103 state_map['<']= MY_LEX_LONG_CMP_OP;
00104 state_map['&']=state_map['|']= MY_LEX_BOOL;
00105 state_map['#']= MY_LEX_COMMENT;
00106 state_map[';']= MY_LEX_SEMICOLON;
00107 state_map[':']= MY_LEX_SET_VAR;
00108 state_map[0]= MY_LEX_EOL;
00109 state_map['\\']= MY_LEX_ESCAPE;
00110 state_map['/']= MY_LEX_LONG_COMMENT;
00111 state_map['*']= MY_LEX_END_LONG_COMMENT;
00112 state_map['@']= MY_LEX_USER_END;
00113 state_map['`']= MY_LEX_USER_VARIABLE_DELIMITER;
00114 state_map['"']= MY_LEX_STRING_OR_DELIMITER;
00115
00116
00117
00118
00119 for (int i= 0; i < 256; i++)
00120 {
00121 ident_map[i]= state_map[i] == MY_LEX_IDENT || state_map[i] == MY_LEX_NUMBER_IDENT;
00122 }
00123
00124
00125 state_map['x']= state_map['X']= MY_LEX_IDENT_OR_HEX;
00126 state_map['b']= state_map['B']= MY_LEX_IDENT_OR_BIN;
00127 return 0;
00128 }
00129
00130 static bool charset_initialized= false;
00131
00132 DRIZZLED_API CHARSET_INFO *all_charsets[256];
00133 const DRIZZLED_API CHARSET_INFO *default_charset_info = &my_charset_utf8_general_ci;
00134
00135 void add_compiled_collation(CHARSET_INFO * cs)
00136 {
00137 all_charsets[cs->number]= cs;
00138 cs->state|= MY_CS_AVAILABLE;
00139 }
00140
00141 static bool init_available_charsets(myf myflags)
00142 {
00143 bool error= false;
00144
00145
00146
00147
00148 if (charset_initialized == false)
00149 {
00150 CHARSET_INFO **cs;
00151 memset(&all_charsets, 0, sizeof(all_charsets));
00152 init_compiled_charsets(myflags);
00153
00154
00155 for (cs=all_charsets;
00156 cs < all_charsets+array_elements(all_charsets)-1 ;
00157 cs++)
00158 {
00159 if (*cs)
00160 {
00161 if (cs[0]->ctype)
00162 if (init_state_maps(*cs))
00163 *cs= NULL;
00164 }
00165 }
00166
00167 charset_initialized= true;
00168 }
00169 assert(charset_initialized);
00170
00171 return error;
00172 }
00173
00174
00175 void free_charsets()
00176 {
00177 charset_initialized= false;
00178
00179 while (not memory_vector.empty())
00180 {
00181 delete[] memory_vector.back();
00182 memory_vector.pop_back();
00183 }
00184 }
00185
00186
00187 uint32_t get_collation_number(const char *name)
00188 {
00189 init_available_charsets(MYF(0));
00190 return get_collation_number_internal(name);
00191 }
00192
00193
00194 uint32_t get_charset_number(const char *charset_name, uint32_t cs_flags)
00195 {
00196 CHARSET_INFO **cs;
00197 init_available_charsets(MYF(0));
00198
00199 for (cs= all_charsets;
00200 cs < all_charsets+array_elements(all_charsets)-1 ;
00201 cs++)
00202 {
00203 if ( cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) && !my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->csname, charset_name))
00204 return cs[0]->number;
00205 }
00206 return 0;
00207 }
00208
00209
00210 const char *get_charset_name(uint32_t charset_number)
00211 {
00212 init_available_charsets(MYF(0));
00213
00214 const CHARSET_INFO *cs= all_charsets[charset_number];
00215 if (cs && (cs->number == charset_number) && cs->name )
00216 return cs->name;
00217
00218 return "?";
00219 }
00220
00221
00222 static const CHARSET_INFO *get_internal_charset(uint32_t cs_number)
00223 {
00224 CHARSET_INFO *cs;
00225
00226
00227
00228
00229 if ((cs= all_charsets[cs_number]))
00230 {
00231 if (!(cs->state & MY_CS_COMPILED) && !(cs->state & MY_CS_LOADED))
00232 {
00233 assert(0);
00234 }
00235 cs= (cs->state & MY_CS_AVAILABLE) ? cs : NULL;
00236 }
00237 if (cs && !(cs->state & MY_CS_READY))
00238 {
00239 if ((cs->cset->init && cs->cset->init(cs, cs_alloc)) ||
00240 (cs->coll->init && cs->coll->init(cs, cs_alloc)))
00241 cs= NULL;
00242 else
00243 cs->state|= MY_CS_READY;
00244 }
00245
00246 return cs;
00247 }
00248
00249
00250 const CHARSET_INFO *get_charset(uint32_t cs_number)
00251 {
00252 const CHARSET_INFO *cs;
00253 if (cs_number == default_charset_info->number)
00254 return default_charset_info;
00255
00256 (void) init_available_charsets(MYF(0));
00257
00258 if (!cs_number || cs_number >= array_elements(all_charsets)-1)
00259 return NULL;
00260
00261 cs= get_internal_charset(cs_number);
00262
00263 return cs;
00264 }
00265
00266 const CHARSET_INFO *get_charset_by_name(const char *cs_name)
00267 {
00268 uint32_t cs_number;
00269 const CHARSET_INFO *cs;
00270 (void) init_available_charsets(MYF(0));
00271
00272 cs_number= get_collation_number(cs_name);
00273 cs= cs_number ? get_internal_charset(cs_number) : NULL;
00274
00275 return cs;
00276 }
00277
00278
00279 const CHARSET_INFO *get_charset_by_csname(const char *cs_name, uint32_t cs_flags)
00280 {
00281 uint32_t cs_number;
00282 const CHARSET_INFO *cs;
00283
00284 (void) init_available_charsets(MYF(0));
00285
00286 cs_number= get_charset_number(cs_name, cs_flags);
00287 cs= cs_number ? get_internal_charset(cs_number) : NULL;
00288
00289 return(cs);
00290 }
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318 size_t escape_quotes_for_drizzle(const CHARSET_INFO *charset_info,
00319 char *to, size_t to_length,
00320 const char *from, size_t length)
00321 {
00322 const char *to_start= to;
00323 const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
00324 bool overflow= false;
00325 bool use_mb_flag= use_mb(charset_info);
00326 for (end= from + length; from < end; from++)
00327 {
00328 int tmp_length;
00329 if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
00330 {
00331 if (to + tmp_length > to_end)
00332 {
00333 overflow= true;
00334 break;
00335 }
00336 while (tmp_length--)
00337 *to++= *from++;
00338 from--;
00339 continue;
00340 }
00341
00342
00343
00344
00345
00346 if (*from == '\'')
00347 {
00348 if (to + 2 > to_end)
00349 {
00350 overflow= true;
00351 break;
00352 }
00353 *to++= '\'';
00354 *to++= '\'';
00355 }
00356 else
00357 {
00358 if (to + 1 > to_end)
00359 {
00360 overflow= true;
00361 break;
00362 }
00363 *to++= *from;
00364 }
00365 }
00366 *to= 0;
00367 return overflow ? UINT32_MAX : to - to_start;
00368 }
00369
00370 }