Drizzled Public API Documentation

trx0roll.cc
00001 /*****************************************************************************
00002 
00003 Copyright (C) 1996, 2009, Innobase Oy. All Rights Reserved.
00004 
00005 This program is free software; you can redistribute it and/or modify it under
00006 the terms of the GNU General Public License as published by the Free Software
00007 Foundation; version 2 of the License.
00008 
00009 This program is distributed in the hope that it will be useful, but WITHOUT
00010 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
00012 
00013 You should have received a copy of the GNU General Public License along with
00014 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
00015 St, Fifth Floor, Boston, MA 02110-1301 USA
00016 
00017 *****************************************************************************/
00018 
00019 /**************************************************/
00026 #include "trx0roll.h"
00027 
00028 #ifdef UNIV_NONINL
00029 #include "trx0roll.ic"
00030 #endif
00031 
00032 #include "fsp0fsp.h"
00033 #include "mach0data.h"
00034 #include "trx0rseg.h"
00035 #include "trx0trx.h"
00036 #include "trx0undo.h"
00037 #include "trx0rec.h"
00038 #include "que0que.h"
00039 #include "usr0sess.h"
00040 #include "srv0start.h"
00041 #include "row0undo.h"
00042 #include "row0mysql.h"
00043 #include "lock0lock.h"
00044 #include "pars0pars.h"
00045 
00048 #define TRX_ROLL_TRUNC_THRESHOLD  1
00049 
00051 static trx_t*   trx_roll_crash_recv_trx = NULL;
00052 
00055 static undo_no_t  trx_roll_max_undo_no;
00056 
00058 static ulint    trx_roll_progress_printed_pct;
00059 
00060 /*******************************************************************/
00063 UNIV_INTERN
00064 int
00065 trx_general_rollback_for_mysql(
00066 /*===========================*/
00067   trx_t*    trx,  
00068   trx_savept_t* savept) 
00071 {
00072   mem_heap_t* heap;
00073   que_thr_t*  thr;
00074   roll_node_t*  roll_node;
00075 
00076   /* Tell Innobase server that there might be work for
00077   utility threads: */
00078 
00079   srv_active_wake_master_thread();
00080 
00081   trx_start_if_not_started(trx);
00082 
00083   heap = mem_heap_create(512);
00084 
00085   roll_node = roll_node_create(heap);
00086 
00087   if (savept) {
00088     roll_node->partial = TRUE;
00089     roll_node->savept = *savept;
00090   }
00091 
00092   trx->error_state = DB_SUCCESS;
00093 
00094   thr = pars_complete_graph_for_exec(roll_node, trx, heap);
00095 
00096         ut_a(thr == que_fork_start_command(static_cast<que_fork_t *>(que_node_get_parent(thr))));
00097   que_run_threads(thr);
00098 
00099   mutex_enter(&kernel_mutex);
00100 
00101   while (trx->que_state != TRX_QUE_RUNNING) {
00102 
00103     mutex_exit(&kernel_mutex);
00104 
00105     os_thread_sleep(100000);
00106 
00107     mutex_enter(&kernel_mutex);
00108   }
00109 
00110   mutex_exit(&kernel_mutex);
00111 
00112   mem_heap_free(heap);
00113 
00114   ut_a(trx->error_state == DB_SUCCESS);
00115 
00116   /* Tell Innobase server that there might be work for
00117   utility threads: */
00118 
00119   srv_active_wake_master_thread();
00120 
00121   return((int) trx->error_state);
00122 }
00123 
00124 /*******************************************************************/
00127 UNIV_INTERN
00128 int
00129 trx_rollback_for_mysql(
00130 /*===================*/
00131   trx_t*  trx)  
00132 {
00133   int err;
00134 
00135   if (trx->conc_state == TRX_NOT_STARTED) {
00136 
00137     return(DB_SUCCESS);
00138   }
00139 
00140   trx->op_info = "rollback";
00141 
00142   /* If we are doing the XA recovery of prepared transactions, then
00143   the transaction object does not have an InnoDB session object, and we
00144   set a dummy session that we use for all MySQL transactions. */
00145 
00146   err = trx_general_rollback_for_mysql(trx, NULL);
00147 
00148   trx->op_info = "";
00149 
00150   return(err);
00151 }
00152 
00153 /*******************************************************************/
00156 UNIV_INTERN
00157 int
00158 trx_rollback_last_sql_stat_for_mysql(
00159 /*=================================*/
00160   trx_t*  trx)  
00161 {
00162   int err;
00163 
00164   if (trx->conc_state == TRX_NOT_STARTED) {
00165 
00166     return(DB_SUCCESS);
00167   }
00168 
00169   trx->op_info = "rollback of SQL statement";
00170 
00171   err = trx_general_rollback_for_mysql(trx, &trx->last_sql_stat_start);
00172   /* The following call should not be needed, but we play safe: */
00173   trx_mark_sql_stat_end(trx);
00174 
00175   trx->op_info = "";
00176 
00177   return(err);
00178 }
00179 
00180 /*******************************************************************/
00182 UNIV_INTERN
00183 void
00184 trx_roll_savepoint_free(
00185 /*=====================*/
00186   trx_t*      trx,  
00187   trx_named_savept_t* savep)  
00188 {
00189   ut_a(savep != NULL);
00190   ut_a(UT_LIST_GET_LEN(trx->trx_savepoints) > 0);
00191 
00192   UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
00193   mem_free(savep->name);
00194   mem_free(savep);
00195 }
00196 
00197 /*******************************************************************/
00200 UNIV_INTERN
00201 void
00202 trx_roll_savepoints_free(
00203 /*=====================*/
00204   trx_t*      trx,  
00205   trx_named_savept_t* savep)  
00208 {
00209   trx_named_savept_t* next_savep;
00210 
00211   if (savep == NULL) {
00212     savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
00213   } else {
00214     savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
00215   }
00216 
00217   while (savep != NULL) {
00218     next_savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
00219 
00220     trx_roll_savepoint_free(trx, savep);
00221 
00222     savep = next_savep;
00223   }
00224 }
00225 
00226 /*******************************************************************/
00235 UNIV_INTERN
00236 ulint
00237 trx_rollback_to_savepoint_for_mysql(
00238 /*================================*/
00239   trx_t*    trx,      
00240   const char* savepoint_name,   
00241   ib_int64_t* mysql_binlog_cache_pos) 
00247 {
00248   trx_named_savept_t* savep;
00249   ulint     err;
00250 
00251   savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
00252 
00253   while (savep != NULL) {
00254     if (0 == ut_strcmp(savep->name, savepoint_name)) {
00255       /* Found */
00256       break;
00257     }
00258     savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
00259   }
00260 
00261   if (savep == NULL) {
00262 
00263     return(DB_NO_SAVEPOINT);
00264   }
00265 
00266   if (trx->conc_state == TRX_NOT_STARTED) {
00267     ut_print_timestamp(stderr);
00268     fputs("  InnoDB: Error: transaction has a savepoint ", stderr);
00269     ut_print_name(stderr, trx, FALSE, savep->name);
00270     fputs(" though it is not started\n", stderr);
00271     return(DB_ERROR);
00272   }
00273 
00274   /* We can now free all savepoints strictly later than this one */
00275 
00276   trx_roll_savepoints_free(trx, savep);
00277 
00278   *mysql_binlog_cache_pos = savep->mysql_binlog_cache_pos;
00279 
00280   trx->op_info = "rollback to a savepoint";
00281 
00282   err = trx_general_rollback_for_mysql(trx, &savep->savept);
00283 
00284   /* Store the current undo_no of the transaction so that we know where
00285   to roll back if we have to roll back the next SQL statement: */
00286 
00287   trx_mark_sql_stat_end(trx);
00288 
00289   trx->op_info = "";
00290 
00291   return(err);
00292 }
00293 
00294 /*******************************************************************/
00300 UNIV_INTERN
00301 ulint
00302 trx_savepoint_for_mysql(
00303 /*====================*/
00304   trx_t*    trx,      
00305   const char* savepoint_name,   
00306   ib_int64_t  binlog_cache_pos) 
00310 {
00311   trx_named_savept_t* savep;
00312 
00313   ut_a(trx);
00314   ut_a(savepoint_name);
00315 
00316   trx_start_if_not_started(trx);
00317 
00318   savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
00319 
00320   while (savep != NULL) {
00321     if (0 == ut_strcmp(savep->name, savepoint_name)) {
00322       /* Found */
00323       break;
00324     }
00325     savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
00326   }
00327 
00328   if (savep) {
00329     /* There is a savepoint with the same name: free that */
00330 
00331     UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
00332 
00333     mem_free(savep->name);
00334     mem_free(savep);
00335   }
00336 
00337   /* Create a new savepoint and add it as the last in the list */
00338 
00339         savep = static_cast<trx_named_savept_t *>(mem_alloc(sizeof(trx_named_savept_t)));
00340 
00341   savep->name = mem_strdup(savepoint_name);
00342 
00343   savep->savept = trx_savept_take(trx);
00344 
00345   savep->mysql_binlog_cache_pos = binlog_cache_pos;
00346 
00347   UT_LIST_ADD_LAST(trx_savepoints, trx->trx_savepoints, savep);
00348 
00349   return(DB_SUCCESS);
00350 }
00351 
00352 /*******************************************************************/
00357 UNIV_INTERN
00358 ulint
00359 trx_release_savepoint_for_mysql(
00360 /*============================*/
00361   trx_t*    trx,      
00362   const char* savepoint_name)   
00363 {
00364   trx_named_savept_t* savep;
00365 
00366   savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
00367 
00368   /* Search for the savepoint by name and free if found. */
00369   while (savep != NULL) {
00370     if (0 == ut_strcmp(savep->name, savepoint_name)) {
00371       trx_roll_savepoint_free(trx, savep);
00372       return(DB_SUCCESS);
00373     }
00374     savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
00375   }
00376 
00377   return(DB_NO_SAVEPOINT);
00378 }
00379 
00380 /*******************************************************************/
00385 UNIV_INTERN
00386 ibool
00387 trx_is_recv(
00388 /*========*/
00389   const trx_t*  trx)  
00390 {
00391   return(trx == trx_roll_crash_recv_trx);
00392 }
00393 
00394 /*******************************************************************/
00397 UNIV_INTERN
00398 trx_savept_t
00399 trx_savept_take(
00400 /*============*/
00401   trx_t*  trx)  
00402 {
00403   trx_savept_t  savept;
00404 
00405   savept.least_undo_no = trx->undo_no;
00406 
00407   return(savept);
00408 }
00409 
00410 /*******************************************************************/
00412 static
00413 void
00414 trx_rollback_active(
00415 /*================*/
00416   trx_t*  trx)  
00417 {
00418   mem_heap_t* heap;
00419   que_fork_t* fork;
00420   que_thr_t*  thr;
00421   roll_node_t*  roll_node;
00422   dict_table_t* table;
00423   ib_int64_t  rows_to_undo;
00424   const char* unit    = "";
00425   ibool   dictionary_locked = FALSE;
00426 
00427   heap = mem_heap_create(512);
00428 
00429   fork = que_fork_create(NULL, NULL, QUE_FORK_RECOVERY, heap);
00430   fork->trx = trx;
00431 
00432   thr = que_thr_create(fork, heap);
00433 
00434   roll_node = roll_node_create(heap);
00435 
00436   thr->child = roll_node;
00437   roll_node->common.parent = thr;
00438 
00439   mutex_enter(&kernel_mutex);
00440 
00441   trx->graph = fork;
00442 
00443   ut_a(thr == que_fork_start_command(fork));
00444 
00445   trx_roll_crash_recv_trx = trx;
00446   trx_roll_max_undo_no = trx->undo_no;
00447   trx_roll_progress_printed_pct = 0;
00448   rows_to_undo = trx_roll_max_undo_no;
00449 
00450   if (rows_to_undo > 1000000000) {
00451     rows_to_undo = rows_to_undo / 1000000;
00452     unit = "M";
00453   }
00454 
00455   ut_print_timestamp(stderr);
00456   fprintf(stderr,
00457     "  InnoDB: Rolling back trx with id " TRX_ID_FMT ", %lu%s"
00458     " rows to undo\n",
00459     trx->id,
00460     (ulong) rows_to_undo, unit);
00461   mutex_exit(&kernel_mutex);
00462 
00463   trx->mysql_thread_id = os_thread_get_curr_id();
00464 
00465   trx->mysql_process_no = os_proc_get_number();
00466 
00467   if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
00468     row_mysql_lock_data_dictionary(trx);
00469     dictionary_locked = TRUE;
00470   }
00471 
00472   que_run_threads(thr);
00473 
00474   mutex_enter(&kernel_mutex);
00475 
00476   while (trx->que_state != TRX_QUE_RUNNING) {
00477 
00478     mutex_exit(&kernel_mutex);
00479 
00480     fprintf(stderr,
00481       "InnoDB: Waiting for rollback of trx id "
00482       TRX_ID_FMT " to end\n",
00483       trx->id);
00484     os_thread_sleep(100000);
00485 
00486     mutex_enter(&kernel_mutex);
00487   }
00488 
00489   mutex_exit(&kernel_mutex);
00490 
00491   if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE
00492       && trx->table_id != 0) {
00493 
00494     /* If the transaction was for a dictionary operation, we
00495     drop the relevant table, if it still exists */
00496 
00497     fprintf(stderr,
00498       "InnoDB: Dropping table with id %llu"
00499       " in recovery if it exists\n",
00500       (ullint) trx->table_id);
00501 
00502     table = dict_table_get_on_id_low(trx->table_id);
00503 
00504     if (table) {
00505       ulint err;
00506 
00507       fputs("InnoDB: Table found: dropping table ", stderr);
00508       ut_print_name(stderr, trx, TRUE, table->name);
00509       fputs(" in recovery\n", stderr);
00510 
00511       err = row_drop_table_for_mysql(table->name, trx, TRUE);
00512       trx_commit_for_mysql(trx);
00513 
00514       ut_a(err == (int) DB_SUCCESS);
00515     }
00516   }
00517 
00518   if (dictionary_locked) {
00519     row_mysql_unlock_data_dictionary(trx);
00520   }
00521 
00522   fprintf(stderr, "\nInnoDB: Rolling back of trx id " TRX_ID_FMT
00523     " completed\n",
00524     trx->id);
00525   mem_heap_free(heap);
00526 
00527   trx_roll_crash_recv_trx = NULL;
00528 }
00529 
00530 /*******************************************************************/
00535 UNIV_INTERN
00536 void
00537 trx_rollback_or_clean_recovered(
00538 /*============================*/
00539   ibool all)  
00541 {
00542   trx_t*  trx;
00543 
00544   mutex_enter(&kernel_mutex);
00545 
00546   if (!UT_LIST_GET_FIRST(trx_sys->trx_list)) {
00547     goto leave_function;
00548   }
00549 
00550   if (all) {
00551     fprintf(stderr,
00552       "InnoDB: Starting in background the rollback"
00553       " of uncommitted transactions\n");
00554   }
00555 
00556   mutex_exit(&kernel_mutex);
00557 
00558 loop:
00559   mutex_enter(&kernel_mutex);
00560 
00561   for (trx = UT_LIST_GET_FIRST(trx_sys->trx_list); trx;
00562        trx = UT_LIST_GET_NEXT(trx_list, trx)) {
00563     if (!trx->is_recovered) {
00564       continue;
00565     }
00566 
00567     switch (trx->conc_state) {
00568     case TRX_NOT_STARTED:
00569     case TRX_PREPARED:
00570       continue;
00571 
00572     case TRX_COMMITTED_IN_MEMORY:
00573       mutex_exit(&kernel_mutex);
00574       fprintf(stderr,
00575         "InnoDB: Cleaning up trx with id "
00576         TRX_ID_FMT "\n",
00577         trx->id);
00578       trx_cleanup_at_db_startup(trx);
00579       goto loop;
00580 
00581     case TRX_ACTIVE:
00582       if (all || trx_get_dict_operation(trx)
00583           != TRX_DICT_OP_NONE) {
00584         mutex_exit(&kernel_mutex);
00585         trx_rollback_active(trx);
00586         goto loop;
00587       }
00588     }
00589   }
00590 
00591   if (all) {
00592     ut_print_timestamp(stderr);
00593     fprintf(stderr,
00594       "  InnoDB: Rollback of non-prepared"
00595       " transactions completed\n");
00596   }
00597 
00598 leave_function:
00599   mutex_exit(&kernel_mutex);
00600 }
00601 
00602 /*******************************************************************/
00609 UNIV_INTERN
00610 os_thread_ret_t
00611 trx_rollback_or_clean_all_recovered(
00612 /*================================*/
00613   void* /*arg*/)
00616 {
00617 #ifdef UNIV_PFS_THREAD
00618   pfs_register_thread(trx_rollback_clean_thread_key);
00619 #endif /* UNIV_PFS_THREAD */
00620 
00621   trx_rollback_or_clean_recovered(TRUE);
00622 
00623   /* We count the number of threads in os_thread_exit(). A created
00624   thread should always use that to exit and not use return() to exit. */
00625 
00626   os_thread_exit(NULL);
00627 
00628   OS_THREAD_DUMMY_RETURN;
00629 }
00630 
00631 /*******************************************************************/
00634 UNIV_INTERN
00635 trx_undo_arr_t*
00636 trx_undo_arr_create(void)
00637 /*=====================*/
00638 {
00639   trx_undo_arr_t* arr;
00640   mem_heap_t* heap;
00641   ulint   i;
00642 
00643   heap = mem_heap_create(1024);
00644 
00645         arr = static_cast<trx_undo_arr_t *>(mem_heap_alloc(heap, sizeof(trx_undo_arr_t)));
00646 
00647         arr->infos = static_cast<trx_undo_inf_t *>(mem_heap_alloc(heap, sizeof(trx_undo_inf_t)
00648                                                                   * UNIV_MAX_PARALLELISM));
00649   arr->n_cells = UNIV_MAX_PARALLELISM;
00650   arr->n_used = 0;
00651 
00652   arr->heap = heap;
00653 
00654   for (i = 0; i < UNIV_MAX_PARALLELISM; i++) {
00655 
00656     (trx_undo_arr_get_nth_info(arr, i))->in_use = FALSE;
00657   }
00658 
00659   return(arr);
00660 }
00661 
00662 /*******************************************************************/
00664 UNIV_INTERN
00665 void
00666 trx_undo_arr_free(
00667 /*==============*/
00668   trx_undo_arr_t* arr)  
00669 {
00670   ut_ad(arr->n_used == 0);
00671 
00672   mem_heap_free(arr->heap);
00673 }
00674 
00675 /*******************************************************************/
00678 static
00679 ibool
00680 trx_undo_arr_store_info(
00681 /*====================*/
00682   trx_t*    trx,  
00683   undo_no_t undo_no)
00684 {
00685   trx_undo_inf_t* cell;
00686   trx_undo_inf_t* stored_here;
00687   trx_undo_arr_t* arr;
00688   ulint   n_used;
00689   ulint   n;
00690   ulint   i;
00691 
00692   n = 0;
00693   arr = trx->undo_no_arr;
00694   n_used = arr->n_used;
00695   stored_here = NULL;
00696 
00697   for (i = 0;; i++) {
00698     cell = trx_undo_arr_get_nth_info(arr, i);
00699 
00700     if (!cell->in_use) {
00701       if (!stored_here) {
00702         /* Not in use, we may store here */
00703         cell->undo_no = undo_no;
00704         cell->in_use = TRUE;
00705 
00706         arr->n_used++;
00707 
00708         stored_here = cell;
00709       }
00710     } else {
00711       n++;
00712 
00713       if (cell->undo_no == undo_no) {
00714 
00715         if (stored_here) {
00716           stored_here->in_use = FALSE;
00717           ut_ad(arr->n_used > 0);
00718           arr->n_used--;
00719         }
00720 
00721         ut_ad(arr->n_used == n_used);
00722 
00723         return(FALSE);
00724       }
00725     }
00726 
00727     if (n == n_used && stored_here) {
00728 
00729       ut_ad(arr->n_used == 1 + n_used);
00730 
00731       return(TRUE);
00732     }
00733   }
00734 }
00735 
00736 /*******************************************************************/
00738 static
00739 void
00740 trx_undo_arr_remove_info(
00741 /*=====================*/
00742   trx_undo_arr_t* arr,  
00743   undo_no_t undo_no)
00744 {
00745   trx_undo_inf_t* cell;
00746   ulint   i;
00747 
00748   for (i = 0;; i++) {
00749     cell = trx_undo_arr_get_nth_info(arr, i);
00750 
00751     if (cell->in_use
00752         && cell->undo_no == undo_no) {
00753 
00754       cell->in_use = FALSE;
00755 
00756       ut_ad(arr->n_used > 0);
00757 
00758       arr->n_used--;
00759 
00760       return;
00761     }
00762   }
00763 }
00764 
00765 /*******************************************************************/
00768 static
00769 undo_no_t
00770 trx_undo_arr_get_biggest(
00771 /*=====================*/
00772   trx_undo_arr_t* arr)  
00773 {
00774   trx_undo_inf_t* cell;
00775   ulint   n_used;
00776   undo_no_t biggest;
00777   ulint   n;
00778   ulint   i;
00779 
00780   n = 0;
00781   n_used = arr->n_used;
00782   biggest = 0;
00783 
00784   for (i = 0;; i++) {
00785     cell = trx_undo_arr_get_nth_info(arr, i);
00786 
00787     if (cell->in_use) {
00788       n++;
00789       if (cell->undo_no > biggest) {
00790 
00791         biggest = cell->undo_no;
00792       }
00793     }
00794 
00795     if (n == n_used) {
00796       return(biggest);
00797     }
00798   }
00799 }
00800 
00801 /***********************************************************************/
00803 UNIV_INTERN
00804 void
00805 trx_roll_try_truncate(
00806 /*==================*/
00807   trx_t*  trx)  
00808 {
00809   trx_undo_arr_t* arr;
00810   undo_no_t limit;
00811   undo_no_t biggest;
00812 
00813   ut_ad(mutex_own(&(trx->undo_mutex)));
00814   ut_ad(mutex_own(&((trx->rseg)->mutex)));
00815 
00816   trx->pages_undone = 0;
00817 
00818   arr = trx->undo_no_arr;
00819 
00820   limit = trx->undo_no;
00821 
00822   if (arr->n_used > 0) {
00823     biggest = trx_undo_arr_get_biggest(arr);
00824 
00825     if (biggest >= limit) {
00826 
00827       limit = biggest + 1;
00828     }
00829   }
00830 
00831   if (trx->insert_undo) {
00832     trx_undo_truncate_end(trx, trx->insert_undo, limit);
00833   }
00834 
00835   if (trx->update_undo) {
00836     trx_undo_truncate_end(trx, trx->update_undo, limit);
00837   }
00838 }
00839 
00840 /***********************************************************************/
00844 static
00845 trx_undo_rec_t*
00846 trx_roll_pop_top_rec(
00847 /*=================*/
00848   trx_t*    trx,  
00849   trx_undo_t* undo, 
00850   mtr_t*    mtr)  
00851 {
00852   page_t*   undo_page;
00853   ulint   offset;
00854   trx_undo_rec_t* prev_rec;
00855   page_t*   prev_rec_page;
00856 
00857   ut_ad(mutex_own(&(trx->undo_mutex)));
00858 
00859   undo_page = trx_undo_page_get_s_latched(undo->space, undo->zip_size,
00860             undo->top_page_no, mtr);
00861   offset = undo->top_offset;
00862 
00863   /*  fprintf(stderr, "Thread %lu undoing trx " TRX_ID_FMT
00864       " undo record " TRX_ID_FMT "\n",
00865   os_thread_get_curr_id(), trx->id, undo->top_undo_no); */
00866 
00867   prev_rec = trx_undo_get_prev_rec(undo_page + offset,
00868            undo->hdr_page_no, undo->hdr_offset,
00869            mtr);
00870   if (prev_rec == NULL) {
00871 
00872     undo->empty = TRUE;
00873   } else {
00874     prev_rec_page = page_align(prev_rec);
00875 
00876     if (prev_rec_page != undo_page) {
00877 
00878       trx->pages_undone++;
00879     }
00880 
00881     undo->top_page_no = page_get_page_no(prev_rec_page);
00882     undo->top_offset  = prev_rec - prev_rec_page;
00883     undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec);
00884   }
00885 
00886   return(undo_page + offset);
00887 }
00888 
00889 /********************************************************************/
00897 UNIV_INTERN
00898 trx_undo_rec_t*
00899 trx_roll_pop_top_rec_of_trx(
00900 /*========================*/
00901   trx_t*    trx,  
00902   undo_no_t limit,  
00903   roll_ptr_t* roll_ptr,
00904   mem_heap_t* heap) 
00905 {
00906   trx_undo_t* undo;
00907   trx_undo_t* ins_undo;
00908   trx_undo_t* upd_undo;
00909   trx_undo_rec_t* undo_rec;
00910   trx_undo_rec_t* undo_rec_copy;
00911   undo_no_t undo_no;
00912   ibool   is_insert;
00913   trx_rseg_t* rseg;
00914   ulint   progress_pct;
00915   mtr_t   mtr;
00916 
00917   rseg = trx->rseg;
00918 try_again:
00919   mutex_enter(&(trx->undo_mutex));
00920 
00921   if (trx->pages_undone >= TRX_ROLL_TRUNC_THRESHOLD) {
00922     mutex_enter(&(rseg->mutex));
00923 
00924     trx_roll_try_truncate(trx);
00925 
00926     mutex_exit(&(rseg->mutex));
00927   }
00928 
00929   ins_undo = trx->insert_undo;
00930   upd_undo = trx->update_undo;
00931 
00932   if (!ins_undo || ins_undo->empty) {
00933     undo = upd_undo;
00934   } else if (!upd_undo || upd_undo->empty) {
00935     undo = ins_undo;
00936   } else if (upd_undo->top_undo_no > ins_undo->top_undo_no) {
00937     undo = upd_undo;
00938   } else {
00939     undo = ins_undo;
00940   }
00941 
00942   if (!undo || undo->empty
00943       || limit > undo->top_undo_no) {
00944 
00945     if ((trx->undo_no_arr)->n_used == 0) {
00946       /* Rollback is ending */
00947 
00948       mutex_enter(&(rseg->mutex));
00949 
00950       trx_roll_try_truncate(trx);
00951 
00952       mutex_exit(&(rseg->mutex));
00953     }
00954 
00955     mutex_exit(&(trx->undo_mutex));
00956 
00957     return(NULL);
00958   }
00959 
00960   if (undo == ins_undo) {
00961     is_insert = TRUE;
00962   } else {
00963     is_insert = FALSE;
00964   }
00965 
00966   *roll_ptr = trx_undo_build_roll_ptr(is_insert, (undo->rseg)->id,
00967               undo->top_page_no,
00968               undo->top_offset);
00969   mtr_start(&mtr);
00970 
00971   undo_rec = trx_roll_pop_top_rec(trx, undo, &mtr);
00972 
00973   undo_no = trx_undo_rec_get_undo_no(undo_rec);
00974 
00975   ut_ad(undo_no + 1 == trx->undo_no);
00976 
00977   /* We print rollback progress info if we are in a crash recovery
00978   and the transaction has at least 1000 row operations to undo. */
00979 
00980   if (trx == trx_roll_crash_recv_trx && trx_roll_max_undo_no > 1000) {
00981 
00982     progress_pct = 100 - (ulint)
00983       ((undo_no * 100) / trx_roll_max_undo_no);
00984     if (progress_pct != trx_roll_progress_printed_pct) {
00985       if (trx_roll_progress_printed_pct == 0) {
00986         fprintf(stderr,
00987           "\nInnoDB: Progress in percents:"
00988           " %lu", (ulong) progress_pct);
00989       } else {
00990         fprintf(stderr,
00991           " %lu", (ulong) progress_pct);
00992       }
00993       fflush(stderr);
00994       trx_roll_progress_printed_pct = progress_pct;
00995     }
00996   }
00997 
00998   trx->undo_no = undo_no;
00999 
01000   if (!trx_undo_arr_store_info(trx, undo_no)) {
01001     /* A query thread is already processing this undo log record */
01002 
01003     mutex_exit(&(trx->undo_mutex));
01004 
01005     mtr_commit(&mtr);
01006 
01007     goto try_again;
01008   }
01009 
01010   undo_rec_copy = trx_undo_rec_copy(undo_rec, heap);
01011 
01012   mutex_exit(&(trx->undo_mutex));
01013 
01014   mtr_commit(&mtr);
01015 
01016   return(undo_rec_copy);
01017 }
01018 
01019 /********************************************************************/
01024 UNIV_INTERN
01025 ibool
01026 trx_undo_rec_reserve(
01027 /*=================*/
01028   trx_t*    trx,  
01029   undo_no_t undo_no)
01030 {
01031   ibool ret;
01032 
01033   mutex_enter(&(trx->undo_mutex));
01034 
01035   ret = trx_undo_arr_store_info(trx, undo_no);
01036 
01037   mutex_exit(&(trx->undo_mutex));
01038 
01039   return(ret);
01040 }
01041 
01042 /*******************************************************************/
01044 UNIV_INTERN
01045 void
01046 trx_undo_rec_release(
01047 /*=================*/
01048   trx_t*    trx,  
01049   undo_no_t undo_no)
01050 {
01051   trx_undo_arr_t* arr;
01052 
01053   mutex_enter(&(trx->undo_mutex));
01054 
01055   arr = trx->undo_no_arr;
01056 
01057   trx_undo_arr_remove_info(arr, undo_no);
01058 
01059   mutex_exit(&(trx->undo_mutex));
01060 }
01061 
01062 /*********************************************************************/
01064 UNIV_INTERN
01065 void
01066 trx_rollback(
01067 /*=========*/
01068   trx_t*    trx,  
01069   trx_sig_t*  sig,  
01070   que_thr_t** next_thr)
01076 {
01077   que_t*    roll_graph;
01078   que_thr_t*  thr;
01079   /*  que_thr_t*  thr2; */
01080 
01081   ut_ad(mutex_own(&kernel_mutex));
01082   ut_ad((trx->undo_no_arr == NULL) || ((trx->undo_no_arr)->n_used == 0));
01083 
01084   /* Initialize the rollback field in the transaction */
01085 
01086   switch (sig->type) {
01087   case TRX_SIG_TOTAL_ROLLBACK:
01088     trx->roll_limit = 0;
01089     break;
01090   case TRX_SIG_ROLLBACK_TO_SAVEPT:
01091     trx->roll_limit = (sig->savept).least_undo_no;
01092     break;
01093   case TRX_SIG_ERROR_OCCURRED:
01094     trx->roll_limit = trx->last_sql_stat_start.least_undo_no;
01095     break;
01096   default:
01097     ut_error;
01098   }
01099 
01100   ut_a(trx->roll_limit <= trx->undo_no);
01101 
01102   trx->pages_undone = 0;
01103 
01104   if (trx->undo_no_arr == NULL) {
01105     trx->undo_no_arr = trx_undo_arr_create();
01106   }
01107 
01108   /* Build a 'query' graph which will perform the undo operations */
01109 
01110   roll_graph = trx_roll_graph_build(trx);
01111 
01112   trx->graph = roll_graph;
01113   trx->que_state = TRX_QUE_ROLLING_BACK;
01114 
01115   thr = que_fork_start_command(roll_graph);
01116 
01117   ut_ad(thr);
01118 
01119   /*  thr2 = que_fork_start_command(roll_graph);
01120 
01121   ut_ad(thr2); */
01122 
01123   if (next_thr && (*next_thr == NULL)) {
01124     *next_thr = thr;
01125     /*    srv_que_task_enqueue_low(thr2); */
01126   } else {
01127     srv_que_task_enqueue_low(thr);
01128     /*    srv_que_task_enqueue_low(thr2); */
01129   }
01130 }
01131 
01132 /****************************************************************/
01138 UNIV_INTERN
01139 que_t*
01140 trx_roll_graph_build(
01141 /*=================*/
01142   trx_t*  trx)  
01143 {
01144   mem_heap_t* heap;
01145   que_fork_t* fork;
01146   que_thr_t*  thr;
01147   /*  que_thr_t*  thr2; */
01148 
01149   ut_ad(mutex_own(&kernel_mutex));
01150 
01151   heap = mem_heap_create(512);
01152   fork = que_fork_create(NULL, NULL, QUE_FORK_ROLLBACK, heap);
01153   fork->trx = trx;
01154 
01155   thr = que_thr_create(fork, heap);
01156   /*  thr2 = que_thr_create(fork, heap); */
01157 
01158   thr->child = row_undo_node_create(trx, thr, heap);
01159   /*  thr2->child = row_undo_node_create(trx, thr2, heap); */
01160 
01161   return(fork);
01162 }
01163 
01164 /*********************************************************************/
01167 static
01168 void
01169 trx_finish_error_processing(
01170 /*========================*/
01171   trx_t*  trx)  
01172 {
01173   trx_sig_t*  sig;
01174   trx_sig_t*  next_sig;
01175 
01176   ut_ad(mutex_own(&kernel_mutex));
01177 
01178   sig = UT_LIST_GET_FIRST(trx->signals);
01179 
01180   while (sig != NULL) {
01181     next_sig = UT_LIST_GET_NEXT(signals, sig);
01182 
01183     if (sig->type == TRX_SIG_ERROR_OCCURRED) {
01184 
01185       trx_sig_remove(trx, sig);
01186     }
01187 
01188     sig = next_sig;
01189   }
01190 
01191   trx->que_state = TRX_QUE_RUNNING;
01192 }
01193 
01194 /*********************************************************************/
01196 static
01197 void
01198 trx_finish_partial_rollback_off_kernel(
01199 /*===================================*/
01200   trx_t*    trx,  
01201   que_thr_t** next_thr)
01206 {
01207   trx_sig_t*  sig;
01208 
01209   ut_ad(mutex_own(&kernel_mutex));
01210 
01211   sig = UT_LIST_GET_FIRST(trx->signals);
01212 
01213   /* Remove the signal from the signal queue and send reply message
01214   to it */
01215 
01216   trx_sig_reply(sig, next_thr);
01217   trx_sig_remove(trx, sig);
01218 
01219   trx->que_state = TRX_QUE_RUNNING;
01220 }
01221 
01222 /****************************************************************/
01224 UNIV_INTERN
01225 void
01226 trx_finish_rollback_off_kernel(
01227 /*===========================*/
01228   que_t*    graph,  
01229   trx_t*    trx,  
01230   que_thr_t** next_thr)
01236 {
01237   trx_sig_t*  sig;
01238   trx_sig_t*  next_sig;
01239 
01240   ut_ad(mutex_own(&kernel_mutex));
01241 
01242   ut_a(trx->undo_no_arr == NULL || trx->undo_no_arr->n_used == 0);
01243 
01244   /* Free the memory reserved by the undo graph */
01245   que_graph_free(graph);
01246 
01247   sig = UT_LIST_GET_FIRST(trx->signals);
01248 
01249   if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) {
01250 
01251     trx_finish_partial_rollback_off_kernel(trx, next_thr);
01252 
01253     return;
01254 
01255   } else if (sig->type == TRX_SIG_ERROR_OCCURRED) {
01256 
01257     trx_finish_error_processing(trx);
01258 
01259     return;
01260   }
01261 
01262 #ifdef UNIV_DEBUG
01263   if (lock_print_waits) {
01264     fprintf(stderr, "Trx " TRX_ID_FMT " rollback finished\n",
01265       trx->id);
01266   }
01267 #endif /* UNIV_DEBUG */
01268 
01269   trx_commit_off_kernel(trx);
01270 
01271   /* Remove all TRX_SIG_TOTAL_ROLLBACK signals from the signal queue and
01272   send reply messages to them */
01273 
01274   trx->que_state = TRX_QUE_RUNNING;
01275 
01276   while (sig != NULL) {
01277     next_sig = UT_LIST_GET_NEXT(signals, sig);
01278 
01279     if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
01280 
01281       trx_sig_reply(sig, next_thr);
01282 
01283       trx_sig_remove(trx, sig);
01284     }
01285 
01286     sig = next_sig;
01287   }
01288 }
01289 
01290 /*********************************************************************/
01293 UNIV_INTERN
01294 roll_node_t*
01295 roll_node_create(
01296 /*=============*/
01297   mem_heap_t* heap) 
01298 {
01299   roll_node_t*  node;
01300 
01301         
01302         node = static_cast<roll_node_t *>(mem_heap_alloc(heap, sizeof(roll_node_t)));
01303   node->common.type = QUE_NODE_ROLLBACK;
01304   node->state = ROLL_NODE_SEND;
01305 
01306   node->partial = FALSE;
01307 
01308   return(node);
01309 }
01310 
01311 /***********************************************************/
01314 UNIV_INTERN
01315 que_thr_t*
01316 trx_rollback_step(
01317 /*==============*/
01318   que_thr_t*  thr)  
01319 {
01320   roll_node_t*  node;
01321   ulint   sig_no;
01322   trx_savept_t* savept;
01323 
01324         node = static_cast<roll_node_t *>(thr->run_node);
01325 
01326   ut_ad(que_node_get_type(node) == QUE_NODE_ROLLBACK);
01327 
01328   if (thr->prev_node == que_node_get_parent(node)) {
01329     node->state = ROLL_NODE_SEND;
01330   }
01331 
01332   if (node->state == ROLL_NODE_SEND) {
01333     mutex_enter(&kernel_mutex);
01334 
01335     node->state = ROLL_NODE_WAIT;
01336 
01337     if (node->partial) {
01338       sig_no = TRX_SIG_ROLLBACK_TO_SAVEPT;
01339       savept = &(node->savept);
01340     } else {
01341       sig_no = TRX_SIG_TOTAL_ROLLBACK;
01342       savept = NULL;
01343     }
01344 
01345     /* Send a rollback signal to the transaction */
01346 
01347     trx_sig_send(thr_get_trx(thr), sig_no, TRX_SIG_SELF, thr,
01348            savept, NULL);
01349 
01350     thr->state = QUE_THR_SIG_REPLY_WAIT;
01351 
01352     mutex_exit(&kernel_mutex);
01353 
01354     return(NULL);
01355   }
01356 
01357   ut_ad(node->state == ROLL_NODE_WAIT);
01358 
01359   thr->run_node = que_node_get_parent(node);
01360 
01361   return(thr);
01362 }