Drizzled Public API Documentation

drizzleimport.cc
00001 /* 
00002   Copyright (C) 2010 Vijay Samuel
00003   Copyright (C) 2010 Brian Aker
00004   Copyright (C) 2000-2006 MySQL AB
00005   Copyright (C) 2008-2009 Sun Microsystems, Inc.
00006 
00007   This program is free software; you can redistribute it and/or modify
00008   it under the terms of the GNU General Public License as published by
00009   the Free Software Foundation; version 2 of the License.
00010 
00011   This program is distributed in the hope that it will be useful,
00012   but WITHOUT ANY WARRANTY; without even the implied warranty of
00013   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014   GNU General Public License for more details.
00015 
00016   You should have received a copy of the GNU General Public License
00017   along with this program; if not, write to the Free Software
00018   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */
00019 
00020 #define IMPORT_VERSION "4.0"
00021 
00022 #include "client_priv.h"
00023 #include <string>
00024 #include <sstream>
00025 #include <iostream>
00026 #include <fstream>
00027 #include <boost/program_options.hpp>
00028 #include <pthread.h>
00029 
00030 /* Added this for string translation. */
00031 #include <drizzled/gettext.h>
00032 #include <drizzled/configmake.h>
00033 
00034 namespace po= boost::program_options;
00035 using namespace std;
00036 using namespace drizzled;
00037 
00038 extern "C" void * worker_thread(void *arg);
00039 
00040 int exitcode= 0;
00041 
00042 const char *program_name= "drizzleimport";
00043 
00044 /* Global Thread counter */
00045 uint32_t counter;
00046 pthread_mutex_t counter_mutex;
00047 pthread_cond_t count_threshhold;
00048 
00049 static void db_error(drizzle_con_st *con, drizzle_result_st *result,
00050                      drizzle_return_t ret, char *table);
00051 static char *field_escape(char *to,const char *from,uint32_t length);
00052 static char *add_load_option(char *ptr,const char *object,
00053            const char *statement);
00054 
00055 static bool verbose= false, ignore_errors= false,
00056             opt_delete= false, opt_replace= false, silent= false,
00057             ignore_unique= false, opt_low_priority= false,
00058             use_drizzle_protocol= false, opt_local_file;
00059 
00060 static uint32_t opt_use_threads;
00061 static uint32_t opt_drizzle_port= 0;
00062 static int64_t opt_ignore_lines= -1;
00063 
00064 std::string opt_columns,
00065   opt_enclosed,
00066   escaped,
00067   password,
00068   current_db,
00069   lines_terminated,
00070   current_user,
00071   opt_password,
00072   enclosed,  
00073   current_host,
00074   fields_terminated,
00075   opt_protocol;
00076 
00077 
00078 static int get_options(void)
00079 {
00080 
00081   if (! enclosed.empty() && ! opt_enclosed.empty())
00082   {
00083     fprintf(stderr, "You can't use ..enclosed.. and ..optionally-enclosed.. at the same time.\n");
00084     return(1);
00085   }
00086   if (opt_replace && ignore_unique)
00087   {
00088     fprintf(stderr, "You can't use --ignore_unique (-i) and --replace (-r) at the same time.\n");
00089     return(1);
00090   }
00091 
00092   if (tty_password)
00093     opt_password=client_get_tty_password(NULL);
00094   return(0);
00095 }
00096 
00097 
00098 
00099 static int write_to_table(char *filename, drizzle_con_st *con)
00100 {
00101   char tablename[FN_REFLEN], hard_path[FN_REFLEN],
00102        sql_statement[FN_REFLEN*16+256], *end;
00103   drizzle_result_st result;
00104   drizzle_return_t ret;
00105 
00106   internal::fn_format(tablename, filename, "", "", 1 | 2); /* removes path & ext. */
00107   if (not opt_local_file)
00108     strcpy(hard_path,filename);
00109   else
00110     internal::my_load_path(hard_path, filename, NULL); /* filename includes the path */
00111 
00112   if (opt_delete)
00113   {
00114     if (verbose)
00115       fprintf(stdout, "Deleting the old data from table %s\n", tablename);
00116 #ifdef HAVE_SNPRINTF
00117     snprintf(sql_statement, sizeof(sql_statement), "DELETE FROM %s", tablename);
00118 #else
00119     snprintf(sql_statement, sizeof(sql_statement), "DELETE FROM %s", tablename);
00120 #endif
00121     if (drizzle_query_str(con, &result, sql_statement, &ret) == NULL ||
00122         ret != DRIZZLE_RETURN_OK)
00123     {
00124       db_error(con, &result, ret, tablename);
00125       return(1);
00126     }
00127     drizzle_result_free(&result);
00128   }
00129   if (verbose)
00130   {
00131     if (opt_local_file)
00132       fprintf(stdout, "Loading data from LOCAL file: %s into %s\n",
00133         hard_path, tablename);
00134     else
00135       fprintf(stdout, "Loading data from SERVER file: %s into %s\n",
00136         hard_path, tablename);
00137   }
00138   snprintf(sql_statement, sizeof(sql_statement), "LOAD DATA %s %s INFILE '%s'",
00139     opt_low_priority ? "LOW_PRIORITY" : "",
00140     opt_local_file ? "LOCAL" : "", hard_path);
00141   end= strchr(sql_statement, '\0');
00142   if (opt_replace)
00143     end= strcpy(end, " REPLACE")+8;
00144   if (ignore_unique)
00145     end= strcpy(end, " IGNORE")+7;
00146 
00147   end+= sprintf(end, " INTO TABLE %s", tablename);
00148 
00149   if (! fields_terminated.empty() || ! enclosed.empty() || ! opt_enclosed.empty() || ! escaped.empty())
00150       end= strcpy(end, " FIELDS")+7;
00151   end= add_load_option(end, (char *)fields_terminated.c_str(), " TERMINATED BY");
00152   end= add_load_option(end, (char *)enclosed.c_str(), " ENCLOSED BY");
00153   end= add_load_option(end, (char *)opt_enclosed.c_str(),
00154            " OPTIONALLY ENCLOSED BY");
00155   end= add_load_option(end, (char *)escaped.c_str(), " ESCAPED BY");
00156   end= add_load_option(end, (char *)lines_terminated.c_str(), " LINES TERMINATED BY");
00157   if (opt_ignore_lines >= 0)
00158   {
00159     end= strcpy(end, " IGNORE ")+8;
00160     ostringstream buffer;
00161     buffer << opt_ignore_lines;
00162     end= strcpy(end, buffer.str().c_str())+ buffer.str().size();
00163     end= strcpy(end, " LINES")+6;
00164   }
00165   if (! opt_columns.empty())
00166   {
00167     end= strcpy(end, " (")+2;
00168     end= strcpy(end, (char *)opt_columns.c_str()+opt_columns.length());
00169     end= strcpy(end, ")")+1;
00170   }
00171   *end= '\0';
00172 
00173   if (drizzle_query_str(con, &result, sql_statement, &ret) == NULL ||
00174       ret != DRIZZLE_RETURN_OK)
00175   {
00176     db_error(con, &result, ret, tablename);
00177     return(1);
00178   }
00179   if (!silent)
00180   {
00181     if (strcmp(drizzle_result_info(&result), ""))
00182     {
00183       fprintf(stdout, "%s.%s: %s\n", current_db.c_str(), tablename,
00184         drizzle_result_info(&result));
00185     }
00186   }
00187   drizzle_result_free(&result);
00188   return(0);
00189 }
00190 
00191 
00192 static drizzle_con_st *db_connect(const string host, const string database,
00193                                   const string user, const string passwd)
00194 {
00195   drizzle_st *drizzle;
00196   drizzle_con_st *con;
00197   drizzle_return_t ret;
00198 
00199   if (verbose)
00200     fprintf(stdout, "Connecting to %s, using protocol %s...\n", ! host.empty() ? host.c_str() : "localhost", opt_protocol.c_str());
00201   if (!(drizzle= drizzle_create(NULL)))
00202     return 0;
00203   if (!(con= drizzle_con_add_tcp(drizzle,NULL,(char *)host.c_str(),opt_drizzle_port,(char *)user.c_str(),(char *)passwd.c_str(),
00204                                  (char *)database.c_str(), use_drizzle_protocol ? DRIZZLE_CON_EXPERIMENTAL : DRIZZLE_CON_MYSQL)))
00205   {
00206     return 0;
00207   }
00208 
00209   if ((ret= drizzle_con_connect(con)) != DRIZZLE_RETURN_OK)
00210   {
00211     ignore_errors=0;    /* NO RETURN FROM db_error */
00212     db_error(con, NULL, ret, NULL);
00213   }
00214 
00215   if (verbose)
00216     fprintf(stdout, "Selecting database %s\n", database.c_str());
00217 
00218   return con;
00219 }
00220 
00221 
00222 
00223 static void db_disconnect(const string host, drizzle_con_st *con)
00224 {
00225   if (verbose)
00226     fprintf(stdout, "Disconnecting from %s\n", ! host.empty() ? host.c_str() : "localhost");
00227   drizzle_free(drizzle_con_drizzle(con));
00228 }
00229 
00230 
00231 
00232 static void safe_exit(int error, drizzle_con_st *con)
00233 {
00234   if (ignore_errors)
00235     return;
00236   if (con)
00237     drizzle_free(drizzle_con_drizzle(con));
00238   exit(error);
00239 }
00240 
00241 
00242 
00243 static void db_error(drizzle_con_st *con, drizzle_result_st *result,
00244                      drizzle_return_t ret, char *table)
00245 {
00246   if (ret == DRIZZLE_RETURN_ERROR_CODE)
00247   {
00248     fprintf(stdout, "Error: %d, %s%s%s",
00249             drizzle_result_error_code(result),
00250             drizzle_result_error(result),
00251             table ? ", when using table: " : "", table ? table : "");
00252     drizzle_result_free(result);
00253   }
00254   else
00255   {
00256     fprintf(stdout, "Error: %d, %s%s%s", ret, drizzle_con_error(con),
00257             table ? ", when using table: " : "", table ? table : "");
00258   }
00259 
00260   safe_exit(1, con);
00261 }
00262 
00263 
00264 static char *add_load_option(char *ptr, const char *object,
00265            const char *statement)
00266 {
00267   if (object)
00268   {
00269     /* Don't escape hex constants */
00270     if (object[0] == '0' && (object[1] == 'x' || object[1] == 'X'))
00271       ptr+= sprintf(ptr, " %s %s", statement, object);
00272     else
00273     {
00274       /* char constant; escape */
00275       ptr+= sprintf(ptr, " %s '", statement); 
00276       ptr= field_escape(ptr,object,(uint32_t) strlen(object));
00277       *ptr++= '\'';
00278     }
00279   }
00280   return ptr;
00281 }
00282 
00283 /*
00284 ** Allow the user to specify field terminator strings like:
00285 ** "'", "\", "\\" (escaped backslash), "\t" (tab), "\n" (newline)
00286 ** This is done by doubleing ' and add a end -\ if needed to avoid
00287 ** syntax errors from the SQL parser.
00288 */
00289 
00290 static char *field_escape(char *to,const char *from,uint32_t length)
00291 {
00292   const char *end;
00293   uint32_t end_backslashes=0;
00294 
00295   for (end= from+length; from != end; from++)
00296   {
00297     *to++= *from;
00298     if (*from == '\\')
00299       end_backslashes^=1;    /* find odd number of backslashes */
00300     else
00301     {
00302       if (*from == '\'' && !end_backslashes)
00303   *to++= *from;      /* We want a dublicate of "'" for DRIZZLE */
00304       end_backslashes=0;
00305     }
00306   }
00307   /* Add missing backslashes if user has specified odd number of backs.*/
00308   if (end_backslashes)
00309     *to++= '\\';
00310   return to;
00311 }
00312 
00313 void * worker_thread(void *arg)
00314 {
00315   int error;
00316   char *raw_table_name= (char *)arg;
00317   drizzle_con_st *con;
00318 
00319   if (!(con= db_connect(current_host,current_db,current_user,opt_password)))
00320   {
00321     return 0;
00322   }
00323 
00324   /*
00325     We are not currently catching the error here.
00326   */
00327   if ((error= write_to_table(raw_table_name, con)))
00328   {
00329     if (exitcode == 0)
00330     {
00331       exitcode= error;
00332     }
00333   }
00334 
00335   if (con)
00336   {
00337     db_disconnect(current_host, con);
00338   }
00339 
00340   pthread_mutex_lock(&counter_mutex);
00341   counter--;
00342   pthread_cond_signal(&count_threshhold);
00343   pthread_mutex_unlock(&counter_mutex);
00344 
00345   return 0;
00346 }
00347 
00348 
00349 int main(int argc, char **argv)
00350 {
00351 try
00352 {
00353   int error=0;
00354 
00355   po::options_description commandline_options("Options used only in command line");
00356   commandline_options.add_options()
00357 
00358   ("debug,#", po::value<string>(),
00359   "Output debug log. Often this is 'd:t:o,filename'.")
00360   ("delete,d", po::value<bool>(&opt_delete)->default_value(false)->zero_tokens(),
00361   "First delete all rows from table.")
00362   ("help,?", "Displays this help and exits.")
00363   ("ignore,i", po::value<bool>(&ignore_unique)->default_value(false)->zero_tokens(),
00364   "If duplicate unique key was found, keep old row.")
00365   ("low-priority", po::value<bool>(&opt_low_priority)->default_value(false)->zero_tokens(),
00366   "Use LOW_PRIORITY when updating the table.")
00367   ("replace,r", po::value<bool>(&opt_replace)->default_value(false)->zero_tokens(),
00368   "If duplicate unique key was found, replace old row.")
00369   ("verbose,v", po::value<bool>(&verbose)->default_value(false)->zero_tokens(),
00370   "Print info about the various stages.")
00371   ("version,V", "Output version information and exit.")
00372   ;
00373 
00374   po::options_description import_options("Options specific to the drizzleimport");
00375   import_options.add_options()
00376   ("columns,C", po::value<string>(&opt_columns)->default_value(""),
00377   "Use only these columns to import the data to. Give the column names in a comma separated list. This is same as giving columns to LOAD DATA INFILE.")
00378   ("fields-terminated-by", po::value<string>(&fields_terminated)->default_value(""),
00379   "Fields in the textfile are terminated by ...")
00380   ("fields-enclosed-by", po::value<string>(&enclosed)->default_value(""),
00381   "Fields in the importfile are enclosed by ...")
00382   ("fields-optionally-enclosed-by", po::value<string>(&opt_enclosed)->default_value(""),
00383   "Fields in the i.file are opt. enclosed by ...")
00384   ("fields-escaped-by", po::value<string>(&escaped)->default_value(""),
00385   "Fields in the i.file are escaped by ...")
00386   ("force,f", po::value<bool>(&ignore_errors)->default_value(false)->zero_tokens(),
00387   "Continue even if we get an sql-error.")
00388   ("ignore-lines", po::value<int64_t>(&opt_ignore_lines)->default_value(0),
00389   "Ignore first n lines of data infile.")
00390   ("lines-terminated-by", po::value<string>(&lines_terminated)->default_value(""),
00391   "Lines in the i.file are terminated by ...")
00392   ("local,L", po::value<bool>(&opt_local_file)->default_value(false)->zero_tokens(),
00393   "Read all files through the client.")
00394   ("silent,s", po::value<bool>(&silent)->default_value(false)->zero_tokens(),
00395   "Be more silent.")
00396   ("use-threads", po::value<uint32_t>(&opt_use_threads)->default_value(4),
00397   "Load files in parallel. The argument is the number of threads to use for loading data (default is 4.")
00398   ;
00399 
00400   const char* unix_user= getlogin();
00401 
00402   po::options_description client_options("Options specific to the client");
00403   client_options.add_options()
00404   ("host,h", po::value<string>(&current_host)->default_value("localhost"),
00405   "Connect to host.")
00406   ("password,P", po::value<string>(&password),
00407   "Password to use when connecting to server. If password is not given it's asked from the tty." )
00408   ("port,p", po::value<uint32_t>(&opt_drizzle_port)->default_value(0),
00409   "Port number to use for connection") 
00410   ("protocol", po::value<string>(&opt_protocol)->default_value("mysql"),
00411   "The protocol of connection (mysql or drizzle).")
00412   ("user,u", po::value<string>(&current_user)->default_value((unix_user ? unix_user : "")),
00413   "User for login if not current user.")
00414   ;
00415 
00416   po::options_description long_options("Allowed Options");
00417   long_options.add(commandline_options).add(import_options).add(client_options);
00418 
00419   std::string system_config_dir_import(SYSCONFDIR); 
00420   system_config_dir_import.append("/drizzle/drizzleimport.cnf");
00421 
00422   std::string system_config_dir_client(SYSCONFDIR); 
00423   system_config_dir_client.append("/drizzle/client.cnf");
00424   
00425   std::string user_config_dir((getenv("XDG_CONFIG_HOME")? getenv("XDG_CONFIG_HOME"):"~/.config"));
00426 
00427   if (user_config_dir.compare(0, 2, "~/") == 0)
00428   {
00429     char *homedir;
00430     homedir= getenv("HOME");
00431     if (homedir != NULL)
00432       user_config_dir.replace(0, 1, homedir);
00433   }
00434 
00435   po::variables_map vm;
00436 
00437   // Disable allow_guessing
00438   int style = po::command_line_style::default_style & ~po::command_line_style::allow_guessing;
00439 
00440   po::store(po::command_line_parser(argc, argv).options(long_options).
00441             style(style).extra_parser(parse_password_arg).run(), vm);
00442 
00443   std::string user_config_dir_import(user_config_dir);
00444   user_config_dir_import.append("/drizzle/drizzleimport.cnf"); 
00445 
00446   std::string user_config_dir_client(user_config_dir);
00447   user_config_dir_client.append("/drizzle/client.cnf");
00448 
00449   ifstream user_import_ifs(user_config_dir_import.c_str());
00450   po::store(parse_config_file(user_import_ifs, import_options), vm);
00451 
00452   ifstream user_client_ifs(user_config_dir_client.c_str());
00453   po::store(parse_config_file(user_client_ifs, client_options), vm);
00454 
00455   ifstream system_import_ifs(system_config_dir_import.c_str());
00456   store(parse_config_file(system_import_ifs, import_options), vm);
00457  
00458   ifstream system_client_ifs(system_config_dir_client.c_str());
00459   po::store(parse_config_file(system_client_ifs, client_options), vm);
00460 
00461   po::notify(vm);
00462   if (vm.count("protocol"))
00463   {
00464     std::transform(opt_protocol.begin(), opt_protocol.end(),
00465       opt_protocol.begin(), ::tolower);
00466 
00467     if (not opt_protocol.compare("mysql"))
00468       use_drizzle_protocol=false;
00469     else if (not opt_protocol.compare("drizzle"))
00470       use_drizzle_protocol=true;
00471     else
00472     {
00473       cout << _("Error: Unknown protocol") << " '" << opt_protocol << "'" << endl;
00474       exit(-1);
00475     }
00476   }
00477 
00478   if (vm.count("port"))
00479   {
00480     
00481     /* If the port number is > 65535 it is not a valid port
00482        This also helps with potential data loss casting unsigned long to a
00483        uint32_t. */
00484     if (opt_drizzle_port > 65535)
00485     {
00486       fprintf(stderr, _("Value supplied for port is not valid.\n"));
00487       exit(EXIT_ARGUMENT_INVALID);
00488     }
00489   }
00490 
00491   if( vm.count("password") )
00492   {
00493     if (!opt_password.empty())
00494       opt_password.erase();
00495     if (password == PASSWORD_SENTINEL)
00496     {
00497       opt_password= "";
00498     }
00499     else
00500     {
00501       opt_password= password;
00502       tty_password= false;
00503     }
00504   }
00505   else
00506   {
00507       tty_password= true;
00508   }
00509 
00510 
00511   if (vm.count("version"))
00512   {
00513     printf("%s  Ver %s Distrib %s, for %s-%s (%s)\n", program_name,
00514     IMPORT_VERSION, drizzle_version(),HOST_VENDOR,HOST_OS,HOST_CPU);
00515   }
00516   
00517   if (vm.count("help") || argc < 2)
00518   {
00519     printf("%s  Ver %s Distrib %s, for %s-%s (%s)\n", program_name,
00520     IMPORT_VERSION, drizzle_version(),HOST_VENDOR,HOST_OS,HOST_CPU);
00521     puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,\nand you are welcome to modify and redistribute it under the GPL license\n");
00522     printf("\
00523     Loads tables from text files in various formats.  The base name of the\n\
00524     text file must be the name of the table that should be used.\n\
00525     If one uses sockets to connect to the Drizzle server, the server will open and\n\
00526     read the text file directly. In other cases the client will open the text\n\
00527     file. The SQL command 'LOAD DATA INFILE' is used to import the rows.\n");
00528 
00529     printf("\nUsage: %s [OPTIONS] database textfile...", program_name);
00530     cout<<long_options;
00531     exit(0);
00532   }
00533 
00534 
00535   if (get_options())
00536   {
00537     return(1);
00538   }
00539   
00540   current_db= (*argv)++;
00541   argc--;
00542 
00543   if (opt_use_threads)
00544   {
00545     pthread_t mainthread;            /* Thread descriptor */
00546     pthread_attr_t attr;          /* Thread attributes */
00547     pthread_attr_init(&attr);
00548     pthread_attr_setdetachstate(&attr,
00549                                 PTHREAD_CREATE_DETACHED);
00550 
00551     pthread_mutex_init(&counter_mutex, NULL);
00552     pthread_cond_init(&count_threshhold, NULL);
00553 
00554     for (counter= 0; *argv != NULL; argv++) /* Loop through tables */
00555     {
00556       pthread_mutex_lock(&counter_mutex);
00557       while (counter == opt_use_threads)
00558       {
00559         struct timespec abstime;
00560 
00561         set_timespec(abstime, 3);
00562         pthread_cond_timedwait(&count_threshhold, &counter_mutex, &abstime);
00563       }
00564       /* Before exiting the lock we set ourselves up for the next thread */
00565       counter++;
00566       pthread_mutex_unlock(&counter_mutex);
00567       /* now create the thread */
00568       if (pthread_create(&mainthread, &attr, worker_thread,
00569                          (void *)*argv) != 0)
00570       {
00571         pthread_mutex_lock(&counter_mutex);
00572         counter--;
00573         pthread_mutex_unlock(&counter_mutex);
00574         fprintf(stderr,"%s: Could not create thread\n", program_name);
00575       }
00576     }
00577 
00578     /*
00579       We loop until we know that all children have cleaned up.
00580     */
00581     pthread_mutex_lock(&counter_mutex);
00582     while (counter)
00583     {
00584       struct timespec abstime;
00585 
00586       set_timespec(abstime, 3);
00587       pthread_cond_timedwait(&count_threshhold, &counter_mutex, &abstime);
00588     }
00589     pthread_mutex_unlock(&counter_mutex);
00590     pthread_mutex_destroy(&counter_mutex);
00591     pthread_cond_destroy(&count_threshhold);
00592     pthread_attr_destroy(&attr);
00593   }
00594   else
00595   {
00596     drizzle_con_st *con;
00597 
00598     if (!(con= db_connect(current_host,current_db,current_user,opt_password)))
00599     {
00600       return(1);
00601     }
00602 
00603     for (; *argv != NULL; argv++)
00604       if ((error= write_to_table(*argv, con)))
00605         if (exitcode == 0)
00606           exitcode= error;
00607     db_disconnect(current_host, con);
00608   }
00609   opt_password.empty();
00610 }
00611   catch(exception &err)
00612   {
00613     cerr<<err.what()<<endl;
00614   }
00615   return(exitcode);
00616 }