Drizzled Public API Documentation

substr_functions.cc
00001 /* -*- mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; -*-
00002  *  vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
00003  *
00004  *  Copyright (C) 2008 Sun Microsystems, Inc.
00005  *  Copyright (C) 2010 Stewart Smith
00006  *
00007  *  This program is free software; you can redistribute it and/or modify
00008  *  it under the terms of the GNU General Public License as published by
00009  *  the Free Software Foundation; version 2 of the License.
00010  *
00011  *  This program is distributed in the hope that it will be useful,
00012  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *  GNU General Public License for more details.
00015  *
00016  *  You should have received a copy of the GNU General Public License
00017  *  along with this program; if not, write to the Free Software
00018  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
00019  */
00020 
00021 #include <config.h>
00022 
00023 #include <algorithm>
00024 
00025 #include <drizzled/charset_info.h>
00026 #include <drizzled/function/str/strfunc.h>
00027 #include <drizzled/plugin/function.h>
00028 
00029 using namespace std;
00030 using namespace drizzled;
00031 
00032 class SubstrFunction :public Item_str_func
00033 {
00034   String tmp_value;
00035 public:
00036   SubstrFunction() :Item_str_func() {}
00037 
00038   String *val_str(String *);
00039   void fix_length_and_dec();
00040   const char *func_name() const { return "substr"; }
00041 
00042   bool check_argument_count(int n) { return n == 2 || n == 3; }
00043 };
00044 
00045 
00046 class SubstrIndexFunction :public Item_str_func
00047 {
00048   String tmp_value;
00049 public:
00050   SubstrIndexFunction() :Item_str_func() {}
00051 
00052   String *val_str(String *);
00053   void fix_length_and_dec();
00054   const char *func_name() const { return "substring_index"; }
00055 
00056   bool check_argument_count(int n) { return n == 3; }
00057 };
00058 
00059 String *SubstrFunction::val_str(String *str)
00060 {
00061   assert(fixed == 1);
00062   String *res  = args[0]->val_str(str);
00063   /* must be int64_t to avoid truncation */
00064   int64_t start= args[1]->val_int();
00065   /* Assumes that the maximum length of a String is < INT32_MAX. */
00066   /* Limit so that code sees out-of-bound value properly. */
00067   int64_t length= arg_count == 3 ? args[2]->val_int() : INT32_MAX;
00068   int64_t tmp_length;
00069 
00070   if ((null_value=(args[0]->null_value || args[1]->null_value ||
00071        (arg_count == 3 && args[2]->null_value))))
00072     return 0;
00073 
00074   /* Negative or zero length, will return empty string. */
00075   if ((arg_count == 3) && (length <= 0) &&
00076       (length == 0 || !args[2]->unsigned_flag))
00077     return &my_empty_string;
00078 
00079   /* Assumes that the maximum length of a String is < INT32_MAX. */
00080   /* Set here so that rest of code sees out-of-bound value as such. */
00081   if ((length <= 0) || (length > INT32_MAX))
00082     length= INT32_MAX;
00083 
00084   /* if "unsigned_flag" is set, we have a *huge* positive number. */
00085   /* Assumes that the maximum length of a String is < INT32_MAX. */
00086   if ((!args[1]->unsigned_flag && (start < INT32_MIN || start > INT32_MAX)) ||
00087       (args[1]->unsigned_flag && ((uint64_t) start > INT32_MAX)))
00088     return &my_empty_string;
00089 
00090   start= ((start < 0) ?
00091           static_cast<int64_t>(res->numchars() + start)
00092           : start - 1);
00093   start= res->charpos((int) start);
00094   if ((start < 0) || ((uint) start + 1 > res->length()))
00095     return &my_empty_string;
00096 
00097   length= res->charpos((int) length, (uint32_t) start);
00098   tmp_length= res->length() - start;
00099   length= min(length, tmp_length);
00100 
00101   if (!start && (int64_t) res->length() == length)
00102     return res;
00103   tmp_value.set(*res, (uint32_t) start, (uint32_t) length);
00104   return &tmp_value;
00105 }
00106 
00107 void SubstrFunction::fix_length_and_dec()
00108 {
00109   max_length=args[0]->max_length;
00110 
00111   collation.set(args[0]->collation);
00112   if (args[1]->const_item())
00113   {
00114     int32_t start= (int32_t) args[1]->val_int();
00115     if (start < 0)
00116       max_length= ((uint)(-start) > max_length) ? 0 : (uint)(-start);
00117     else
00118       max_length-= min((uint)(start - 1), max_length);
00119   }
00120   if (arg_count == 3 && args[2]->const_item())
00121   {
00122     int32_t length= (int32_t) args[2]->val_int();
00123     if (length <= 0)
00124       max_length=0;
00125     else
00126       set_if_smaller(max_length,(uint) length);
00127   }
00128   max_length*= collation.collation->mbmaxlen;
00129 }
00130 
00131 
00132 void SubstrIndexFunction::fix_length_and_dec()
00133 {
00134   max_length= args[0]->max_length;
00135 
00136   if (agg_arg_charsets(collation, args, 2, MY_COLL_CMP_CONV, 1))
00137     return;
00138 }
00139 
00140 
00141 String *SubstrIndexFunction::val_str(String *str)
00142 {
00143   assert(fixed == 1);
00144   String *res= args[0]->val_str(str);
00145   String *delimiter= args[1]->val_str(&tmp_value);
00146   int32_t count= (int32_t) args[2]->val_int();
00147   uint32_t offset;
00148 
00149   if (args[0]->null_value || args[1]->null_value || args[2]->null_value)
00150   {         // string and/or delim are null
00151     null_value=1;
00152     return 0;
00153   }
00154   null_value=0;
00155   uint32_t delimiter_length= delimiter->length();
00156   if (!res->length() || !delimiter_length || !count)
00157     return &my_empty_string;    // Wrong parameters
00158 
00159   res->set_charset(collation.collation);
00160 
00161   if (use_mb(res->charset()))
00162   {
00163     const char *ptr= res->ptr();
00164     const char *strend= ptr+res->length();
00165     const char *end= strend-delimiter_length+1;
00166     const char *search= delimiter->ptr();
00167     const char *search_end= search+delimiter_length;
00168     int32_t n=0,c=count,pass;
00169     register uint32_t l;
00170     for (pass=(count>0);pass<2;++pass)
00171     {
00172       while (ptr < end)
00173       {
00174         if (*ptr == *search)
00175         {
00176     register char *i,*j;
00177     i=(char*) ptr+1; j=(char*) search+1;
00178     while (j != search_end)
00179       if (*i++ != *j++) goto skip;
00180     if (pass==0) ++n;
00181     else if (!--c) break;
00182     ptr+= delimiter_length;
00183     continue;
00184   }
00185     skip:
00186         if ((l=my_ismbchar(res->charset(), ptr,strend))) ptr+=l;
00187         else ++ptr;
00188       } /* either not found or got total number when count<0 */
00189       if (pass == 0) /* count<0 */
00190       {
00191         c+=n+1;
00192         if (c<=0) return res; /* not found, return original string */
00193         ptr=res->ptr();
00194       }
00195       else
00196       {
00197         if (c) return res; /* Not found, return original string */
00198         if (count>0) /* return left part */
00199         {
00200     tmp_value.set(*res,0,(ulong) (ptr-res->ptr()));
00201         }
00202         else /* return right part */
00203         {
00204     ptr+= delimiter_length;
00205     tmp_value.set(*res,(ulong) (ptr-res->ptr()), (ulong) (strend-ptr));
00206         }
00207       }
00208     }
00209   }
00210   else
00211   {
00212     if (count > 0)
00213     {         // start counting from the beginning
00214       for (offset=0; ; offset+= delimiter_length)
00215       {
00216         if ((int) (offset= res->strstr(*delimiter, offset)) < 0)
00217           return res;     // Didn't find, return org string
00218         if (!--count)
00219         {
00220           tmp_value.set(*res,0,offset);
00221           break;
00222         }
00223       }
00224     }
00225     else
00226     {
00227       /*
00228         Negative index, start counting at the end
00229       */
00230       for (offset=res->length(); offset ;)
00231       {
00232         /*
00233           this call will result in finding the position pointing to one
00234           address space less than where the found substring is located
00235           in res
00236         */
00237         if ((int) (offset= res->strrstr(*delimiter, offset)) < 0)
00238           return res;     // Didn't find, return org string
00239         /*
00240           At this point, we've searched for the substring
00241           the number of times as supplied by the index value
00242         */
00243         if (!++count)
00244         {
00245           offset+= delimiter_length;
00246           tmp_value.set(*res,offset,res->length()- offset);
00247           break;
00248         }
00249       }
00250     }
00251   }
00252   /*
00253     We always mark tmp_value as const so that if val_str() is called again
00254     on this object, we don't disrupt the contents of tmp_value when it was
00255     derived from another String.
00256   */
00257   tmp_value.mark_as_const();
00258   return (&tmp_value);
00259 }
00260 
00261 plugin::Create_function<SubstrFunction> *substr_function= NULL;
00262 plugin::Create_function<SubstrIndexFunction> *substr_index_function= NULL;
00263 
00264 static int initialize(drizzled::module::Context &context)
00265 {
00266   substr_function= new plugin::Create_function<SubstrFunction>("substr");
00267   substr_index_function= new plugin::Create_function<SubstrIndexFunction>("substring_index");
00268   context.add(substr_function);
00269   context.add(substr_index_function);
00270   return 0;
00271 }
00272 
00273 DRIZZLE_DECLARE_PLUGIN
00274 {
00275   DRIZZLE_VERSION_ID,
00276   "substr_functions",
00277   "1.0",
00278   "Stewart Smith",
00279   "SUBSTR and SUBSTR",
00280   PLUGIN_LICENSE_GPL,
00281   initialize, /* Plugin Init */
00282   NULL,   /* depends */
00283   NULL    /* config options */
00284 }
00285 DRIZZLE_DECLARE_PLUGIN_END;