Drizzled Public API Documentation

srv0srv.cc
00001 /*****************************************************************************
00002 
00003 Copyright (C) 1995, 2010, Innobase Oy. All Rights Reserved.
00004 Copyright (C) 2008, 2009 Google Inc.
00005 Copyright (C) 2009, Percona Inc.
00006 
00007 Portions of this file contain modifications contributed and copyrighted by
00008 Google, Inc. Those modifications are gratefully acknowledged and are described
00009 briefly in the InnoDB documentation. The contributions by Google are
00010 incorporated with their permission, and subject to the conditions contained in
00011 the file COPYING.Google.
00012 
00013 Portions of this file contain modifications contributed and copyrighted
00014 by Percona Inc.. Those modifications are
00015 gratefully acknowledged and are described briefly in the InnoDB
00016 documentation. The contributions by Percona Inc. are incorporated with
00017 their permission, and subject to the conditions contained in the file
00018 COPYING.Percona.
00019 
00020 This program is free software; you can redistribute it and/or modify it under
00021 the terms of the GNU General Public License as published by the Free Software
00022 Foundation; version 2 of the License.
00023 
00024 This program is distributed in the hope that it will be useful, but WITHOUT
00025 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00026 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
00027 
00028 You should have received a copy of the GNU General Public License along with
00029 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
00030 St, Fifth Floor, Boston, MA 02110-1301 USA
00031 
00032 *****************************************************************************/
00033 
00034 /**************************************************/
00060 /* Dummy comment */
00061 #include "srv0srv.h"
00062 
00063 #include <drizzled/error.h>
00064 #include <drizzled/errmsg_print.h>
00065 
00066 #include "ut0mem.h"
00067 #include "ut0ut.h"
00068 #include "os0proc.h"
00069 #include "mem0mem.h"
00070 #include "mem0pool.h"
00071 #include "sync0sync.h"
00072 #include "thr0loc.h"
00073 #include "que0que.h"
00074 #include "log0recv.h"
00075 #include "pars0pars.h"
00076 #include "usr0sess.h"
00077 #include "lock0lock.h"
00078 #include "trx0purge.h"
00079 #include "ibuf0ibuf.h"
00080 #include "buf0flu.h"
00081 #include "buf0lru.h"
00082 #include "btr0sea.h"
00083 #include "dict0load.h"
00084 #include "dict0boot.h"
00085 #include "srv0start.h"
00086 #include "row0mysql.h"
00087 #include "ha_prototypes.h"
00088 #include "trx0i_s.h"
00089 #include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
00090 
00091 /* This is set to TRUE if the MySQL user has set it in MySQL; currently
00092 affects only FOREIGN KEY definition parsing */
00093 UNIV_INTERN ibool srv_lower_case_table_names  = FALSE;
00094 
00095 /* The following counter is incremented whenever there is some user activity
00096 in the server */
00097 UNIV_INTERN ulint srv_activity_count  = 0;
00098 
00099 /* The following is the maximum allowed duration of a lock wait. */
00100 UNIV_INTERN ulint srv_fatal_semaphore_wait_threshold = 600;
00101 
00102 /* How much data manipulation language (DML) statements need to be delayed,
00103 in microseconds, in order to reduce the lagging of the purge thread. */
00104 UNIV_INTERN ulint srv_dml_needed_delay = 0;
00105 
00106 UNIV_INTERN ibool srv_lock_timeout_active = FALSE;
00107 UNIV_INTERN ibool srv_monitor_active = FALSE;
00108 UNIV_INTERN ibool srv_error_monitor_active = FALSE;
00109 
00110 UNIV_INTERN const char* srv_main_thread_op_info = "";
00111 
00112 /* Server parameters which are read from the initfile */
00113 
00114 /* The following three are dir paths which are catenated before file
00115 names, where the file name itself may also contain a path */
00116 
00117 UNIV_INTERN char* srv_data_home = NULL;
00118 #ifdef UNIV_LOG_ARCHIVE
00119 UNIV_INTERN char* srv_arch_dir  = NULL;
00120 #endif /* UNIV_LOG_ARCHIVE */
00121 
00124 UNIV_INTERN my_bool srv_file_per_table;
00126 UNIV_INTERN ulint srv_file_format = 0;
00130 UNIV_INTERN ulint srv_max_file_format_at_startup = DICT_TF_FORMAT_MAX;
00131 
00132 #if DICT_TF_FORMAT_51
00133 # error "DICT_TF_FORMAT_51 must be 0!"
00134 #endif
00135 
00137 UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
00138 
00139 /* If this flag is TRUE, then we will use the native aio of the
00140 OS (provided we compiled Innobase with it in), otherwise we will
00141 use simulated aio we build below with threads.
00142 Currently we support native aio on windows and linux */
00143 UNIV_INTERN my_bool srv_use_native_aio = TRUE;
00144 
00145 #ifdef __WIN__
00146 /* Windows native condition variables. We use runtime loading / function
00147 pointers, because they are not available on Windows Server 2003 and
00148 Windows XP/2000.
00149 
00150 We use condition for events on Windows if possible, even if os_event
00151 resembles Windows kernel event object well API-wise. The reason is
00152 performance, kernel objects are heavyweights and WaitForSingleObject() is a
00153 performance killer causing calling thread to context switch. Besides, Innodb
00154 is preallocating large number (often millions) of os_events. With kernel event
00155 objects it takes a big chunk out of non-paged pool, which is better suited
00156 for tasks like IO than for storing idle event objects. */
00157 UNIV_INTERN ibool srv_use_native_conditions = FALSE;
00158 #endif /* __WIN__ */
00159 
00160 UNIV_INTERN ulint srv_n_data_files = 0;
00161 UNIV_INTERN char**  srv_data_file_names = NULL;
00162 /* size in database pages */
00163 UNIV_INTERN ulint*  srv_data_file_sizes = NULL;
00164 
00165 /* if TRUE, then we auto-extend the last data file */
00166 UNIV_INTERN ibool srv_auto_extend_last_data_file  = FALSE;
00167 /* if != 0, this tells the max size auto-extending may increase the
00168 last data file size */
00169 UNIV_INTERN ulint srv_last_file_size_max  = 0;
00170 /* If the last data file is auto-extended, we add this
00171 many pages to it at a time */
00172 UNIV_INTERN unsigned int srv_auto_extend_increment = 8;
00173 UNIV_INTERN ulint*  srv_data_file_is_raw_partition = NULL;
00174 
00175 /* If the following is TRUE we do not allow inserts etc. This protects
00176 the user from forgetting the 'newraw' keyword to my.cnf */
00177 
00178 UNIV_INTERN ibool srv_created_new_raw = FALSE;
00179 
00180 UNIV_INTERN char**  srv_log_group_home_dirs = NULL;
00181 
00182 UNIV_INTERN ulint srv_n_log_groups  = ULINT_MAX;
00183 UNIV_INTERN ulint srv_n_log_files   = ULINT_MAX;
00184 /* size in database pages */
00185 UNIV_INTERN ulint srv_log_file_size = ULINT_MAX;
00186 /* size in database pages */
00187 UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX;
00188 UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
00189 
00190 /* Try to flush dirty pages so as to avoid IO bursts at
00191 the checkpoints. */
00192 UNIV_INTERN bool  srv_adaptive_flushing = TRUE;
00193 
00196 #define MAX_MUTEX_NOWAIT  20
00197 
00202 #define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT)
00203 
00206 #if defined(BUILD_DRIZZLE)
00207 const byte  srv_latin1_ordering[256]  /* The sort order table of the latin1
00208           character set. The following table is
00209           the MySQL order as of Feb 10th, 2002 */
00210 = {
00211   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
00212 , 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
00213 , 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
00214 , 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F
00215 , 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27
00216 , 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F
00217 , 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37
00218 , 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F
00219 , 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47
00220 , 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F
00221 , 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57
00222 , 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F
00223 , 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47
00224 , 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F
00225 , 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57
00226 , 0x58, 0x59, 0x5A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F
00227 , 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
00228 , 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F
00229 , 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97
00230 , 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F
00231 , 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7
00232 , 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF
00233 , 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7
00234 , 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF
00235 , 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43
00236 , 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49
00237 , 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xD7
00238 , 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xDF
00239 , 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43
00240 , 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49
00241 , 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xF7
00242 , 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xFF
00243 };
00244 #else
00245 UNIV_INTERN const byte*        srv_latin1_ordering;
00246 #endif /* BUILD_DRIZZLE */
00247 
00248 
00249 /* use os/external memory allocator */
00250 UNIV_INTERN my_bool srv_use_sys_malloc  = TRUE;
00251 /* requested size in kilobytes */
00252 UNIV_INTERN ulint srv_buf_pool_size = ULINT_MAX;
00253 /* requested number of buffer pool instances */
00254 UNIV_INTERN ulint       srv_buf_pool_instances  = 1;
00255 /* previously requested size */
00256 UNIV_INTERN ulint srv_buf_pool_old_size;
00257 /* current size in kilobytes */
00258 UNIV_INTERN ulint srv_buf_pool_curr_size  = 0;
00259 /* size in bytes */
00260 UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX;
00261 UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX;
00262 
00263 /* This parameter is deprecated. Use srv_n_io_[read|write]_threads
00264 instead. */
00265 UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX;
00266 UNIV_INTERN ulint srv_n_read_io_threads = ULINT_MAX;
00267 UNIV_INTERN ulint srv_n_write_io_threads  = ULINT_MAX;
00268 
00269 /* User settable value of the number of pages that must be present
00270 in the buffer cache and accessed sequentially for InnoDB to trigger a
00271 readahead request. */
00272 UNIV_INTERN ulong srv_read_ahead_threshold  = 56;
00273 
00274 #ifdef UNIV_LOG_ARCHIVE
00275 UNIV_INTERN ibool   srv_log_archive_on  = FALSE;
00276 UNIV_INTERN ibool   srv_archive_recovery  = 0;
00277 UNIV_INTERN ib_uint64_t srv_archive_recovery_limit_lsn;
00278 #endif /* UNIV_LOG_ARCHIVE */
00279 
00280 /* This parameter is used to throttle the number of insert buffers that are
00281 merged in a batch. By increasing this parameter on a faster disk you can
00282 possibly reduce the number of I/O operations performed to complete the
00283 merge operation. The value of this parameter is used as is by the
00284 background loop when the system is idle (low load), on a busy system
00285 the parameter is scaled down by a factor of 4, this is to avoid putting
00286 a heavier load on the I/O sub system. */
00287 
00288 UNIV_INTERN ulong srv_insert_buffer_batch_size = 20;
00289 
00290 UNIV_INTERN char* srv_file_flush_method_str = NULL;
00291 UNIV_INTERN ulint srv_unix_file_flush_method = SRV_UNIX_FSYNC;
00292 UNIV_INTERN ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
00293 
00294 UNIV_INTERN ulint srv_max_n_open_files    = 300;
00295 
00296 /* Number of IO operations per second the server can do */
00297 UNIV_INTERN ulong srv_io_capacity         = 200;
00298 
00299 /* The InnoDB main thread tries to keep the ratio of modified pages
00300 in the buffer pool to all database pages in the buffer pool smaller than
00301 the following number. But it is not guaranteed that the value stays below
00302 that during a time of heavy update/insert activity. */
00303 
00304 UNIV_INTERN ulong srv_max_buf_pool_modified_pct = 75;
00305 
00306 /* the number of purge threads to use from the worker pool (currently 0 or 1).*/
00307 UNIV_INTERN ulong srv_n_purge_threads = 0;
00308 
00309 /* the number of records to purge in one batch */
00310 UNIV_INTERN ulong srv_purge_batch_size = 20;
00311 
00312 /* variable counts amount of data read in total (in bytes) */
00313 UNIV_INTERN ulint srv_data_read = 0;
00314 
00315 /* here we count the amount of data written in total (in bytes) */
00316 UNIV_INTERN ulint srv_data_written = 0;
00317 
00318 /* the number of the log write requests done */
00319 UNIV_INTERN ulint srv_log_write_requests = 0;
00320 
00321 /* the number of physical writes to the log performed */
00322 UNIV_INTERN ulint srv_log_writes = 0;
00323 
00324 /* amount of data written to the log files in bytes */
00325 UNIV_INTERN ulint srv_os_log_written = 0;
00326 
00327 /* amount of writes being done to the log files */
00328 UNIV_INTERN ulint srv_os_log_pending_writes = 0;
00329 
00330 /* we increase this counter, when there we don't have enough space in the
00331 log buffer and have to flush it */
00332 UNIV_INTERN ulint srv_log_waits = 0;
00333 
00334 /* this variable counts the amount of times, when the doublewrite buffer
00335 was flushed */
00336 UNIV_INTERN ulint srv_dblwr_writes = 0;
00337 
00338 /* here we store the number of pages that have been flushed to the
00339 doublewrite buffer */
00340 UNIV_INTERN ulint srv_dblwr_pages_written = 0;
00341 
00342 /* in this variable we store the number of write requests issued */
00343 UNIV_INTERN ulint srv_buf_pool_write_requests = 0;
00344 
00345 /* here we store the number of times when we had to wait for a free page
00346 in the buffer pool. It happens when the buffer pool is full and we need
00347 to make a flush, in order to be able to read or create a page. */
00348 UNIV_INTERN ulint srv_buf_pool_wait_free = 0;
00349 
00350 /* variable to count the number of pages that were written from buffer
00351 pool to the disk */
00352 UNIV_INTERN ulint srv_buf_pool_flushed = 0;
00353 
00356 UNIV_INTERN ulint srv_buf_pool_reads = 0;
00357 
00358 /* structure to pass status variables to MySQL */
00359 UNIV_INTERN export_struc export_vars;
00360 
00361 /* If the following is != 0 we do not allow inserts etc. This protects
00362 the user from forgetting the innodb_force_recovery keyword to my.cnf */
00363 
00364 UNIV_INTERN ulint srv_force_recovery  = 0;
00365 /*-----------------------*/
00366 /* We are prepared for a situation that we have this many threads waiting for
00367 a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
00368 value. */
00369 
00370 UNIV_INTERN ulint srv_max_n_threads = 0;
00371 
00372 /* The following controls how many threads we let inside InnoDB concurrently:
00373 threads waiting for locks are not counted into the number because otherwise
00374 we could get a deadlock. MySQL creates a thread for each user session, and
00375 semaphore contention and convoy problems can occur withput this restriction.
00376 Value 10 should be good if there are less than 4 processors + 4 disks in the
00377 computer. Bigger computers need bigger values. Value 0 will disable the
00378 concurrency check. */
00379 
00380 UNIV_INTERN ulong srv_thread_concurrency  = 0;
00381 
00382 /* this mutex protects srv_conc data structures */
00383 UNIV_INTERN os_fast_mutex_t srv_conc_mutex;
00384 /* number of transactions that have declared_to_be_inside_innodb set.
00385 It used to be a non-error for this value to drop below zero temporarily.
00386 This is no longer true. We'll, however, keep the lint datatype to add
00387 assertions to catch any corner cases that we may have missed. */
00388 UNIV_INTERN lint  srv_conc_n_threads  = 0;
00389 /* number of OS threads waiting in the FIFO for a permission to enter
00390 InnoDB */
00391 UNIV_INTERN ulint srv_conc_n_waiting_threads = 0;
00392 
00393 typedef struct srv_conc_slot_struct srv_conc_slot_t;
00394 struct srv_conc_slot_struct{
00395   os_event_t      event;    
00396   ibool       reserved; 
00398   ibool       wait_ended; 
00405   UT_LIST_NODE_T(srv_conc_slot_t) srv_conc_queue; 
00406 };
00407 
00408 /* queue of threads waiting to get in */
00409 UNIV_INTERN UT_LIST_BASE_NODE_T(srv_conc_slot_t)  srv_conc_queue;
00410 /* array of wait slots */
00411 UNIV_INTERN srv_conc_slot_t* srv_conc_slots;
00412 
00413 /* Number of times a thread is allowed to enter InnoDB within the same
00414 SQL query after it has once got the ticket at srv_conc_enter_innodb */
00415 #define SRV_FREE_TICKETS_TO_ENTER srv_n_free_tickets_to_enter
00416 #define SRV_THREAD_SLEEP_DELAY srv_thread_sleep_delay
00417 /*-----------------------*/
00418 /* If the following is set to 1 then we do not run purge and insert buffer
00419 merge to completion before shutdown. If it is set to 2, do not even flush the
00420 buffer pool to data files at the shutdown: we effectively 'crash'
00421 InnoDB (but lose no committed transactions). */
00422 UNIV_INTERN ulint srv_fast_shutdown = 0;
00423 
00424 /* Generate a innodb_status.<pid> file */
00425 UNIV_INTERN ibool srv_innodb_status = FALSE;
00426 
00427 /* When estimating number of different key values in an index, sample
00428 this many index pages */
00429 UNIV_INTERN ib_uint64_t srv_stats_sample_pages = 8;
00430 
00431 UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE;
00432 UNIV_INTERN ibool srv_use_checksums = TRUE;
00433 
00434 UNIV_INTERN ulong srv_replication_delay   = 0;
00435 
00436 /*-------------------------------------------*/
00437 UNIV_INTERN ulong srv_n_spin_wait_rounds  = 30;
00438 UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500;
00439 UNIV_INTERN ulong srv_thread_sleep_delay = 10000;
00440 UNIV_INTERN ulong srv_spin_wait_delay = 6;
00441 UNIV_INTERN ibool srv_priority_boost  = TRUE;
00442 
00443 #ifdef UNIV_DEBUG
00444 UNIV_INTERN ibool srv_print_thread_releases = FALSE;
00445 UNIV_INTERN ibool srv_print_lock_waits    = FALSE;
00446 UNIV_INTERN ibool srv_print_buf_io    = FALSE;
00447 UNIV_INTERN ibool srv_print_log_io    = FALSE;
00448 UNIV_INTERN ibool srv_print_latch_waits   = FALSE;
00449 #endif /* UNIV_DEBUG */
00450 
00451 UNIV_INTERN ulint   srv_n_rows_inserted   = 0;
00452 UNIV_INTERN ulint   srv_n_rows_updated    = 0;
00453 UNIV_INTERN ulint   srv_n_rows_deleted    = 0;
00454 UNIV_INTERN ulint   srv_n_rows_read     = 0;
00455 
00456 static ulint  srv_n_rows_inserted_old   = 0;
00457 static ulint  srv_n_rows_updated_old    = 0;
00458 static ulint  srv_n_rows_deleted_old    = 0;
00459 static ulint  srv_n_rows_read_old   = 0;
00460 
00461 UNIV_INTERN ulint   srv_n_lock_wait_count   = 0;
00462 UNIV_INTERN ulint   srv_n_lock_wait_current_count = 0;
00463 UNIV_INTERN ib_int64_t  srv_n_lock_wait_time    = 0;
00464 UNIV_INTERN ulint   srv_n_lock_max_wait_time  = 0;
00465 
00466 UNIV_INTERN ulint   srv_truncated_status_writes = 0;
00467 
00468 /*
00469   Set the following to 0 if you want InnoDB to write messages on
00470   stderr on startup/shutdown
00471 */
00472 UNIV_INTERN ibool srv_print_verbose_log   = TRUE;
00473 UNIV_INTERN ibool srv_print_innodb_monitor  = FALSE;
00474 UNIV_INTERN ibool srv_print_innodb_lock_monitor = FALSE;
00475 UNIV_INTERN ibool srv_print_innodb_tablespace_monitor = FALSE;
00476 UNIV_INTERN ibool srv_print_innodb_table_monitor = FALSE;
00477 
00478 /* Array of English strings describing the current state of an
00479 i/o handler thread */
00480 
00481 UNIV_INTERN const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
00482 UNIV_INTERN const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
00483 
00484 UNIV_INTERN time_t  srv_last_monitor_time;
00485 
00486 UNIV_INTERN mutex_t srv_innodb_monitor_mutex;
00487 
00488 /* Mutex for locking srv_monitor_file */
00489 UNIV_INTERN mutex_t srv_monitor_file_mutex;
00490 
00491 #ifdef UNIV_PFS_MUTEX
00492 /* Key to register kernel_mutex with performance schema */
00493 UNIV_INTERN mysql_pfs_key_t kernel_mutex_key;
00494 /* Key to protect writing the commit_id to the sys header */
00495 UNIV_INTERN mysql_pfs_key_t     commit_id_mutex_key;
00496 /* Key to register srv_innodb_monitor_mutex with performance schema */
00497 UNIV_INTERN mysql_pfs_key_t srv_innodb_monitor_mutex_key;
00498 /* Key to register srv_monitor_file_mutex with performance schema */
00499 UNIV_INTERN mysql_pfs_key_t srv_monitor_file_mutex_key;
00500 /* Key to register srv_dict_tmpfile_mutex with performance schema */
00501 UNIV_INTERN mysql_pfs_key_t srv_dict_tmpfile_mutex_key;
00502 /* Key to register the mutex with performance schema */
00503 UNIV_INTERN mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
00504 #endif /* UNIV_PFS_MUTEX */
00505 
00506 /* Temporary file for innodb monitor output */
00507 UNIV_INTERN FILE* srv_monitor_file;
00508 /* Mutex for locking srv_dict_tmpfile.
00509 This mutex has a very high rank; threads reserving it should not
00510 be holding any InnoDB latches. */
00511 UNIV_INTERN mutex_t srv_dict_tmpfile_mutex;
00512 /* Temporary file for output from the data dictionary */
00513 UNIV_INTERN FILE* srv_dict_tmpfile;
00514 /* Mutex for locking srv_misc_tmpfile.
00515 This mutex has a very low rank; threads reserving it should not
00516 acquire any further latches or sleep before releasing this one. */
00517 UNIV_INTERN mutex_t srv_misc_tmpfile_mutex;
00518 /* Temporary file for miscellanous diagnostic output */
00519 UNIV_INTERN FILE* srv_misc_tmpfile;
00520 
00521 UNIV_INTERN ulint srv_main_thread_process_no  = 0;
00522 UNIV_INTERN ulint srv_main_thread_id    = 0;
00523 
00524 /* The following count work done by srv_master_thread. */
00525 
00526 /* Iterations by the 'once per second' loop. */
00527 static ulint   srv_main_1_second_loops    = 0;
00528 /* Calls to sleep by the 'once per second' loop. */
00529 static ulint   srv_main_sleeps      = 0;
00530 /* Iterations by the 'once per 10 seconds' loop. */
00531 static ulint   srv_main_10_second_loops   = 0;
00532 /* Iterations of the loop bounded by the 'background_loop' label. */
00533 static ulint   srv_main_background_loops  = 0;
00534 /* Iterations of the loop bounded by the 'flush_loop' label. */
00535 static ulint   srv_main_flush_loops   = 0;
00536 /* Log writes involving flush. */
00537 static ulint   srv_log_writes_and_flush   = 0;
00538 
00539 /* This is only ever touched by the master thread. It records the
00540 time when the last flush of log file has happened. The master
00541 thread ensures that we flush the log files at least once per
00542 second. */
00543 static time_t srv_last_log_flush_time;
00544 
00545 /* The master thread performs various tasks based on the current
00546 state of IO activity and the level of IO utilization is past
00547 intervals. Following macros define thresholds for these conditions. */
00548 #define SRV_PEND_IO_THRESHOLD (PCT_IO(3))
00549 #define SRV_RECENT_IO_ACTIVITY  (PCT_IO(5))
00550 #define SRV_PAST_IO_ACTIVITY  (PCT_IO(200))
00551 
00552 /*
00553   IMPLEMENTATION OF THE SERVER MAIN PROGRAM
00554   =========================================
00555 
00556 There is the following analogue between this database
00557 server and an operating system kernel:
00558 
00559 DB concept      equivalent OS concept
00560 ----------      ---------------------
00561 transaction   --  process;
00562 
00563 query thread    --  thread;
00564 
00565 lock      --  semaphore;
00566 
00567 transaction set to
00568 the rollback state  --  kill signal delivered to a process;
00569 
00570 kernel      --  kernel;
00571 
00572 query thread execution:
00573 (a) without kernel mutex
00574 reserved    --  process executing in user mode;
00575 (b) with kernel mutex reserved
00576       --  process executing in kernel mode;
00577 
00578 The server is controlled by a master thread which runs at
00579 a priority higher than normal, that is, higher than user threads.
00580 It sleeps most of the time, and wakes up, say, every 300 milliseconds,
00581 to check whether there is anything happening in the server which
00582 requires intervention of the master thread. Such situations may be,
00583 for example, when flushing of dirty blocks is needed in the buffer
00584 pool or old version of database rows have to be cleaned away.
00585 
00586 The threads which we call user threads serve the queries of
00587 the clients and input from the console of the server.
00588 They run at normal priority. The server may have several
00589 communications endpoints. A dedicated set of user threads waits
00590 at each of these endpoints ready to receive a client request.
00591 Each request is taken by a single user thread, which then starts
00592 processing and, when the result is ready, sends it to the client
00593 and returns to wait at the same endpoint the thread started from.
00594 
00595 So, we do not have dedicated communication threads listening at
00596 the endpoints and dealing the jobs to dedicated worker threads.
00597 Our architecture saves one thread swithch per request, compared
00598 to the solution with dedicated communication threads
00599 which amounts to 15 microseconds on 100 MHz Pentium
00600 running NT. If the client
00601 is communicating over a network, this saving is negligible, but
00602 if the client resides in the same machine, maybe in an SMP machine
00603 on a different processor from the server thread, the saving
00604 can be important as the threads can communicate over shared
00605 memory with an overhead of a few microseconds.
00606 
00607 We may later implement a dedicated communication thread solution
00608 for those endpoints which communicate over a network.
00609 
00610 Our solution with user threads has two problems: for each endpoint
00611 there has to be a number of listening threads. If there are many
00612 communication endpoints, it may be difficult to set the right number
00613 of concurrent threads in the system, as many of the threads
00614 may always be waiting at less busy endpoints. Another problem
00615 is queuing of the messages, as the server internally does not
00616 offer any queue for jobs.
00617 
00618 Another group of user threads is intended for splitting the
00619 queries and processing them in parallel. Let us call these
00620 parallel communication threads. These threads are waiting for
00621 parallelized tasks, suspended on event semaphores.
00622 
00623 A single user thread waits for input from the console,
00624 like a command to shut the database.
00625 
00626 Utility threads are a different group of threads which takes
00627 care of the buffer pool flushing and other, mainly background
00628 operations, in the server.
00629 Some of these utility threads always run at a lower than normal
00630 priority, so that they are always in background. Some of them
00631 may dynamically boost their priority by the pri_adjust function,
00632 even to higher than normal priority, if their task becomes urgent.
00633 The running of utilities is controlled by high- and low-water marks
00634 of urgency. The urgency may be measured by the number of dirty blocks
00635 in the buffer pool, in the case of the flush thread, for example.
00636 When the high-water mark is exceeded, an utility starts running, until
00637 the urgency drops under the low-water mark. Then the utility thread
00638 suspend itself to wait for an event. The master thread is
00639 responsible of signaling this event when the utility thread is
00640 again needed.
00641 
00642 For each individual type of utility, some threads always remain
00643 at lower than normal priority. This is because pri_adjust is implemented
00644 so that the threads at normal or higher priority control their
00645 share of running time by calling sleep. Thus, if the load of the
00646 system sudenly drops, these threads cannot necessarily utilize
00647 the system fully. The background priority threads make up for this,
00648 starting to run when the load drops.
00649 
00650 When there is no activity in the system, also the master thread
00651 suspends itself to wait for an event making
00652 the server totally silent. The responsibility to signal this
00653 event is on the user thread which again receives a message
00654 from a client.
00655 
00656 There is still one complication in our server design. If a
00657 background utility thread obtains a resource (e.g., mutex) needed by a user
00658 thread, and there is also some other user activity in the system,
00659 the user thread may have to wait indefinitely long for the
00660 resource, as the OS does not schedule a background thread if
00661 there is some other runnable user thread. This problem is called
00662 priority inversion in real-time programming.
00663 
00664 One solution to the priority inversion problem would be to
00665 keep record of which thread owns which resource and
00666 in the above case boost the priority of the background thread
00667 so that it will be scheduled and it can release the resource.
00668 This solution is called priority inheritance in real-time programming.
00669 A drawback of this solution is that the overhead of acquiring a mutex
00670 increases slightly, maybe 0.2 microseconds on a 100 MHz Pentium, because
00671 the thread has to call os_thread_get_curr_id.
00672 This may be compared to 0.5 microsecond overhead for a mutex lock-unlock
00673 pair. Note that the thread
00674 cannot store the information in the resource, say mutex, itself,
00675 because competing threads could wipe out the information if it is
00676 stored before acquiring the mutex, and if it stored afterwards,
00677 the information is outdated for the time of one machine instruction,
00678 at least. (To be precise, the information could be stored to
00679 lock_word in mutex if the machine supports atomic swap.)
00680 
00681 The above solution with priority inheritance may become actual in the
00682 future, but at the moment we plan to implement a more coarse solution,
00683 which could be called a global priority inheritance. If a thread
00684 has to wait for a long time, say 300 milliseconds, for a resource,
00685 we just guess that it may be waiting for a resource owned by a background
00686 thread, and boost the the priority of all runnable background threads
00687 to the normal level. The background threads then themselves adjust
00688 their fixed priority back to background after releasing all resources
00689 they had (or, at some fixed points in their program code).
00690 
00691 What is the performance of the global priority inheritance solution?
00692 We may weigh the length of the wait time 300 milliseconds, during
00693 which the system processes some other thread
00694 to the cost of boosting the priority of each runnable background
00695 thread, rescheduling it, and lowering the priority again.
00696 On 100 MHz Pentium + NT this overhead may be of the order 100
00697 microseconds per thread. So, if the number of runnable background
00698 threads is not very big, say < 100, the cost is tolerable.
00699 Utility threads probably will access resources used by
00700 user threads not very often, so collisions of user threads
00701 to preempted utility threads should not happen very often.
00702 
00703 The thread table contains
00704 information of the current status of each thread existing in the system,
00705 and also the event semaphores used in suspending the master thread
00706 and utility and parallel communication threads when they have nothing to do.
00707 The thread table can be seen as an analogue to the process table
00708 in a traditional Unix implementation.
00709 
00710 The thread table is also used in the global priority inheritance
00711 scheme. This brings in one additional complication: threads accessing
00712 the thread table must have at least normal fixed priority,
00713 because the priority inheritance solution does not work if a background
00714 thread is preempted while possessing the mutex protecting the thread table.
00715 So, if a thread accesses the thread table, its priority has to be
00716 boosted at least to normal. This priority requirement can be seen similar to
00717 the privileged mode used when processing the kernel calls in traditional
00718 Unix.*/
00719 
00720 /* Thread slot in the thread table */
00721 struct srv_slot_struct{
00722   os_thread_id_t  id;   
00723   os_thread_t handle;   
00724   unsigned  type:3;   
00725   unsigned  in_use:1; 
00726   unsigned  suspended:1;  
00728   ib_time_t suspend_time; 
00730   os_event_t  event;    
00732   que_thr_t*  thr;    
00734 };
00735 
00736 /* Table for MySQL threads where they will be suspended to wait for locks */
00737 UNIV_INTERN srv_slot_t* srv_mysql_table = NULL;
00738 
00739 UNIV_INTERN os_event_t  srv_timeout_event;
00740 
00741 UNIV_INTERN os_event_t  srv_monitor_event;
00742 
00743 UNIV_INTERN os_event_t  srv_error_event;
00744 
00745 UNIV_INTERN os_event_t  srv_lock_timeout_thread_event;
00746 
00747 UNIV_INTERN srv_sys_t*  srv_sys = NULL;
00748 
00749 /* padding to prevent other memory update hotspots from residing on
00750 the same memory cache line */
00751 UNIV_INTERN byte  srv_pad1[64];
00752 /* mutex protecting the server, trx structs, query threads, and lock table */
00753 UNIV_INTERN mutex_t*  kernel_mutex_temp;
00754 /* mutex protecting the sys header for writing the commit id */
00755 UNIV_INTERN mutex_t*    commit_id_mutex_temp;
00756 
00757 /* padding to prevent other memory update hotspots from residing on
00758 the same memory cache line */
00759 UNIV_INTERN byte  srv_pad2[64];
00760 
00761 #if 0
00762 /* The following three values measure the urgency of the jobs of
00763 buffer, version, and insert threads. They may vary from 0 - 1000.
00764 The server mutex protects all these variables. The low-water values
00765 tell that the server can acquiesce the utility when the value
00766 drops below this low-water mark. */
00767 
00768 static ulint  srv_meter[SRV_MASTER + 1];
00769 static ulint  srv_meter_low_water[SRV_MASTER + 1];
00770 static ulint  srv_meter_high_water[SRV_MASTER + 1];
00771 static ulint  srv_meter_high_water2[SRV_MASTER + 1];
00772 static ulint  srv_meter_foreground[SRV_MASTER + 1];
00773 #endif
00774 
00775 /***********************************************************************
00776 Prints counters for work done by srv_master_thread. */
00777 static
00778 void
00779 srv_print_master_thread_info(
00780 /*=========================*/
00781   FILE  *file)    /* in: output stream */
00782 {
00783   fprintf(file, "srv_master_thread loops: %lu 1_second, %lu sleeps, "
00784     "%lu 10_second, %lu background, %lu flush\n",
00785     srv_main_1_second_loops, srv_main_sleeps,
00786     srv_main_10_second_loops, srv_main_background_loops,
00787     srv_main_flush_loops);
00788   fprintf(file, "srv_master_thread log flush and writes: %lu\n",
00789           srv_log_writes_and_flush);
00790 }
00791 
00792 /* The following values give info about the activity going on in
00793 the database. They are protected by the server mutex. The arrays
00794 are indexed by the type of the thread. */
00795 
00796 UNIV_INTERN ulint srv_n_threads_active[SRV_MASTER + 1];
00797 UNIV_INTERN ulint srv_n_threads[SRV_MASTER + 1];
00798 
00799 /*********************************************************************/
00801 UNIV_INTERN
00802 void
00803 srv_set_io_thread_op_info(
00804 /*======================*/
00805   ulint   i,  
00806   const char* str)  
00808 {
00809   ut_a(i < SRV_MAX_N_IO_THREADS);
00810 
00811   srv_io_thread_op_info[i] = str;
00812 }
00813 
00814 /*********************************************************************/
00818 static
00819 srv_slot_t*
00820 srv_table_get_nth_slot(
00821 /*===================*/
00822   ulint index)    
00823 {
00824   ut_a(index < OS_THREAD_MAX_N);
00825 
00826   return(srv_sys->threads + index);
00827 }
00828 
00829 /*********************************************************************/
00832 UNIV_INTERN
00833 ulint
00834 srv_get_n_threads(void)
00835 /*===================*/
00836 {
00837   ulint i;
00838   ulint n_threads = 0;
00839 
00840   mutex_enter(&kernel_mutex);
00841 
00842   for (i = SRV_COM; i < SRV_MASTER + 1; i++) {
00843 
00844     n_threads += srv_n_threads[i];
00845   }
00846 
00847   mutex_exit(&kernel_mutex);
00848 
00849   return(n_threads);
00850 }
00851 
00852 /*********************************************************************/
00857 static
00858 ulint
00859 srv_table_reserve_slot(
00860 /*===================*/
00861   enum srv_thread_type  type) 
00862 {
00863   srv_slot_t* slot;
00864   ulint   i;
00865 
00866   ut_a(type > 0);
00867   ut_a(type <= SRV_MASTER);
00868 
00869   i = 0;
00870   slot = srv_table_get_nth_slot(i);
00871 
00872   while (slot->in_use) {
00873     i++;
00874     slot = srv_table_get_nth_slot(i);
00875   }
00876 
00877   ut_a(slot->in_use == FALSE);
00878 
00879   slot->in_use = TRUE;
00880   slot->suspended = FALSE;
00881   slot->type = type;
00882   slot->id = os_thread_get_curr_id();
00883   slot->handle = os_thread_get_curr();
00884 
00885   thr_local_create();
00886 
00887   thr_local_set_slot_no(os_thread_get_curr_id(), i);
00888 
00889   return(i);
00890 }
00891 
00892 /*********************************************************************/
00896 static
00897 os_event_t
00898 srv_suspend_thread(void)
00899 /*====================*/
00900 {
00901   srv_slot_t*   slot;
00902   os_event_t    event;
00903   ulint     slot_no;
00904   enum srv_thread_type  type;
00905 
00906   ut_ad(mutex_own(&kernel_mutex));
00907 
00908   slot_no = thr_local_get_slot_no(os_thread_get_curr_id());
00909 
00910   if (srv_print_thread_releases) {
00911     fprintf(stderr,
00912       "Suspending thread %lu to slot %lu\n",
00913       (ulong) os_thread_get_curr_id(), (ulong) slot_no);
00914   }
00915 
00916   slot = srv_table_get_nth_slot(slot_no);
00917 
00918         type = static_cast<srv_thread_type>(slot->type);
00919 
00920   ut_ad(type >= SRV_WORKER);
00921   ut_ad(type <= SRV_MASTER);
00922 
00923   event = slot->event;
00924 
00925   slot->suspended = TRUE;
00926 
00927   ut_ad(srv_n_threads_active[type] > 0);
00928 
00929   srv_n_threads_active[type]--;
00930 
00931   os_event_reset(event);
00932 
00933   return(event);
00934 }
00935 
00936 /*********************************************************************/
00941 UNIV_INTERN
00942 ulint
00943 srv_release_threads(
00944 /*================*/
00945   enum srv_thread_type  type, 
00946   ulint     n)  
00947 {
00948   srv_slot_t* slot;
00949   ulint   i;
00950   ulint   count = 0;
00951 
00952   ut_ad(type >= SRV_WORKER);
00953   ut_ad(type <= SRV_MASTER);
00954   ut_ad(n > 0);
00955   ut_ad(mutex_own(&kernel_mutex));
00956 
00957   for (i = 0; i < OS_THREAD_MAX_N; i++) {
00958 
00959     slot = srv_table_get_nth_slot(i);
00960 
00961                 if (slot->in_use &&
00962                     (static_cast<srv_thread_type>(slot->type) == type) &&
00963                     slot->suspended) {
00964 
00965       slot->suspended = FALSE;
00966 
00967       srv_n_threads_active[type]++;
00968 
00969       os_event_set(slot->event);
00970 
00971       if (srv_print_thread_releases) {
00972         fprintf(stderr,
00973           "Releasing thread %lu type %lu"
00974           " from slot %lu\n",
00975           (ulong) slot->id, (ulong) type,
00976           (ulong) i);
00977       }
00978 
00979       count++;
00980 
00981       if (count == n) {
00982         break;
00983       }
00984     }
00985   }
00986 
00987   return(count);
00988 }
00989 
00990 /*********************************************************************/
00993 UNIV_INTERN
00994 enum srv_thread_type
00995 srv_get_thread_type(void)
00996 /*=====================*/
00997 {
00998   ulint     slot_no;
00999   srv_slot_t*   slot;
01000   enum srv_thread_type  type;
01001 
01002   mutex_enter(&kernel_mutex);
01003 
01004   slot_no = thr_local_get_slot_no(os_thread_get_curr_id());
01005 
01006   slot = srv_table_get_nth_slot(slot_no);
01007 
01008         type = static_cast<srv_thread_type>(slot->type);
01009 
01010   ut_ad(type >= SRV_WORKER);
01011   ut_ad(type <= SRV_MASTER);
01012 
01013   mutex_exit(&kernel_mutex);
01014 
01015   return(type);
01016 }
01017 
01018 /*********************************************************************/
01020 UNIV_INTERN
01021 void
01022 srv_init(void)
01023 /*==========*/
01024 {
01025   srv_conc_slot_t*  conc_slot;
01026   srv_slot_t*   slot;
01027   ulint     i;
01028 
01029         srv_sys = static_cast<srv_sys_t *>(mem_alloc(sizeof(srv_sys_t)));
01030 
01031         kernel_mutex_temp = static_cast<ib_mutex_t *>(mem_alloc(sizeof(mutex_t)));
01032   mutex_create(kernel_mutex_key, &kernel_mutex, SYNC_KERNEL);
01033 
01034   commit_id_mutex_temp = static_cast<ib_mutex_t *>(mem_alloc(sizeof(mutex_t)));
01035   mutex_create(commit_id_mutex_key, &commit_id_mutex, SYNC_COMMIT_ID_LOCK);
01036 
01037   mutex_create(srv_innodb_monitor_mutex_key,
01038          &srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK);
01039 
01040         srv_sys->threads = static_cast<srv_table_t *>(mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t)));
01041 
01042   for (i = 0; i < OS_THREAD_MAX_N; i++) {
01043     slot = srv_table_get_nth_slot(i);
01044     slot->in_use = FALSE;
01045     slot->type=0; /* Avoid purify errors */
01046     slot->event = os_event_create(NULL);
01047     ut_a(slot->event);
01048   }
01049 
01050         srv_mysql_table = static_cast<srv_slot_t *>(mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t)));
01051 
01052   for (i = 0; i < OS_THREAD_MAX_N; i++) {
01053     slot = srv_mysql_table + i;
01054     slot->in_use = FALSE;
01055     slot->type = 0;
01056     slot->event = os_event_create(NULL);
01057     ut_a(slot->event);
01058   }
01059 
01060   srv_error_event = os_event_create(NULL);
01061 
01062   srv_timeout_event = os_event_create(NULL);
01063 
01064   srv_monitor_event = os_event_create(NULL);
01065 
01066   srv_lock_timeout_thread_event = os_event_create(NULL);
01067 
01068   for (i = 0; i < SRV_MASTER + 1; i++) {
01069     srv_n_threads_active[i] = 0;
01070     srv_n_threads[i] = 0;
01071 #if 0
01072     srv_meter[i] = 30;
01073     srv_meter_low_water[i] = 50;
01074     srv_meter_high_water[i] = 100;
01075     srv_meter_high_water2[i] = 200;
01076     srv_meter_foreground[i] = 250;
01077 #endif
01078   }
01079 
01080   UT_LIST_INIT(srv_sys->tasks);
01081 
01082   /* Create dummy indexes for infimum and supremum records */
01083 
01084   dict_ind_init();
01085 
01086   /* Init the server concurrency restriction data structures */
01087 
01088   os_fast_mutex_init(&srv_conc_mutex);
01089 
01090   UT_LIST_INIT(srv_conc_queue);
01091 
01092         srv_conc_slots = static_cast<srv_conc_slot_t *>(mem_alloc(OS_THREAD_MAX_N * sizeof(srv_conc_slot_t)));
01093 
01094   for (i = 0; i < OS_THREAD_MAX_N; i++) {
01095     conc_slot = srv_conc_slots + i;
01096     conc_slot->reserved = FALSE;
01097     conc_slot->event = os_event_create(NULL);
01098     ut_a(conc_slot->event);
01099   }
01100 
01101   /* Initialize some INFORMATION SCHEMA internal structures */
01102   trx_i_s_cache_init(trx_i_s_cache);
01103 }
01104 
01105 /*********************************************************************/
01107 UNIV_INTERN
01108 void
01109 srv_free(void)
01110 /*==========*/
01111 {
01112   os_fast_mutex_free(&srv_conc_mutex);
01113   mem_free(srv_conc_slots);
01114   srv_conc_slots = NULL;
01115 
01116   mem_free(srv_sys->threads);
01117   mem_free(srv_sys);
01118   srv_sys = NULL;
01119 
01120   mem_free(kernel_mutex_temp);
01121   kernel_mutex_temp = NULL;
01122   mem_free(srv_mysql_table);
01123   srv_mysql_table = NULL;
01124 
01125   mem_free(commit_id_mutex_temp);
01126   commit_id_mutex_temp = NULL;
01127 
01128   trx_i_s_cache_free(trx_i_s_cache);
01129 }
01130 
01131 /*********************************************************************/
01134 UNIV_INTERN
01135 void
01136 srv_general_init(void)
01137 /*==================*/
01138 {
01139   ut_mem_init();
01140   /* Reset the system variables in the recovery module. */
01141   recv_sys_var_init();
01142   os_sync_init();
01143   sync_init();
01144   mem_init(srv_mem_pool_size);
01145   thr_local_init();
01146 }
01147 
01148 /*======================= InnoDB Server FIFO queue =======================*/
01149 
01150 /* Maximum allowable purge history length.  <=0 means 'infinite'. */
01151 UNIV_INTERN ulong srv_max_purge_lag   = 0;
01152 
01153 /*********************************************************************/
01156 UNIV_INTERN
01157 void
01158 srv_conc_enter_innodb(
01159 /*==================*/
01160   trx_t*  trx)  
01162 {
01163   ibool     has_slept = FALSE;
01164   srv_conc_slot_t*  slot    = NULL;
01165   ulint     i;
01166 
01167   if (trx->mysql_thd != NULL
01168       && thd_is_replication_slave_thread(trx->mysql_thd)) {
01169 
01170     UT_WAIT_FOR(srv_conc_n_threads
01171           < (lint)srv_thread_concurrency,
01172           srv_replication_delay * 1000);
01173 
01174     return;
01175   }
01176 
01177   /* If trx has 'free tickets' to enter the engine left, then use one
01178   such ticket */
01179 
01180   if (trx->n_tickets_to_enter_innodb > 0) {
01181     trx->n_tickets_to_enter_innodb--;
01182 
01183     return;
01184   }
01185 
01186   os_fast_mutex_lock(&srv_conc_mutex);
01187 retry:
01188   if (trx->declared_to_be_inside_innodb) {
01189     ut_print_timestamp(stderr);
01190     fputs("  InnoDB: Error: trying to declare trx"
01191           " to enter InnoDB, but\n"
01192           "InnoDB: it already is declared.\n", stderr);
01193     trx_print(stderr, trx, 0);
01194     putc('\n', stderr);
01195     os_fast_mutex_unlock(&srv_conc_mutex);
01196 
01197     return;
01198   }
01199 
01200   ut_ad(srv_conc_n_threads >= 0);
01201 
01202   if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
01203 
01204     srv_conc_n_threads++;
01205     trx->declared_to_be_inside_innodb = TRUE;
01206     trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
01207 
01208     os_fast_mutex_unlock(&srv_conc_mutex);
01209 
01210     return;
01211   }
01212 
01213   /* If the transaction is not holding resources, let it sleep
01214   for SRV_THREAD_SLEEP_DELAY microseconds, and try again then */
01215 
01216   if (!has_slept && !trx->has_search_latch
01217       && NULL == UT_LIST_GET_FIRST(trx->trx_locks)) {
01218 
01219     has_slept = TRUE; /* We let it sleep only once to avoid
01220           starvation */
01221 
01222     srv_conc_n_waiting_threads++;
01223 
01224     os_fast_mutex_unlock(&srv_conc_mutex);
01225 
01226     trx->op_info = "sleeping before joining InnoDB queue";
01227 
01228     /* Peter Zaitsev suggested that we take the sleep away
01229     altogether. But the sleep may be good in pathological
01230     situations of lots of thread switches. Simply put some
01231     threads aside for a while to reduce the number of thread
01232     switches. */
01233     if (SRV_THREAD_SLEEP_DELAY > 0) {
01234       os_thread_sleep(SRV_THREAD_SLEEP_DELAY);
01235     }
01236 
01237     trx->op_info = "";
01238 
01239     os_fast_mutex_lock(&srv_conc_mutex);
01240 
01241     srv_conc_n_waiting_threads--;
01242 
01243     goto retry;
01244   }
01245 
01246   /* Too many threads inside: put the current thread to a queue */
01247 
01248   for (i = 0; i < OS_THREAD_MAX_N; i++) {
01249     slot = srv_conc_slots + i;
01250 
01251     if (!slot->reserved) {
01252 
01253       break;
01254     }
01255   }
01256 
01257   if (i == OS_THREAD_MAX_N) {
01258     /* Could not find a free wait slot, we must let the
01259     thread enter */
01260 
01261     srv_conc_n_threads++;
01262     trx->declared_to_be_inside_innodb = TRUE;
01263     trx->n_tickets_to_enter_innodb = 0;
01264 
01265     os_fast_mutex_unlock(&srv_conc_mutex);
01266 
01267     return;
01268   }
01269 
01270   /* Release possible search system latch this thread has */
01271   if (trx->has_search_latch) {
01272     trx_search_latch_release_if_reserved(trx);
01273   }
01274 
01275   /* Add to the queue */
01276   slot->reserved = TRUE;
01277   slot->wait_ended = FALSE;
01278 
01279   UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot);
01280 
01281   os_event_reset(slot->event);
01282 
01283   srv_conc_n_waiting_threads++;
01284 
01285   os_fast_mutex_unlock(&srv_conc_mutex);
01286 
01287   /* Go to wait for the event; when a thread leaves InnoDB it will
01288   release this thread */
01289 
01290   trx->op_info = "waiting in InnoDB queue";
01291 
01292   os_event_wait(slot->event);
01293 
01294   trx->op_info = "";
01295 
01296   os_fast_mutex_lock(&srv_conc_mutex);
01297 
01298   srv_conc_n_waiting_threads--;
01299 
01300   /* NOTE that the thread which released this thread already
01301   incremented the thread counter on behalf of this thread */
01302 
01303   slot->reserved = FALSE;
01304 
01305   UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot);
01306 
01307   trx->declared_to_be_inside_innodb = TRUE;
01308   trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
01309 
01310   os_fast_mutex_unlock(&srv_conc_mutex);
01311 }
01312 
01313 /*********************************************************************/
01316 UNIV_INTERN
01317 void
01318 srv_conc_force_enter_innodb(
01319 /*========================*/
01320   trx_t*  trx)  
01322 {
01323   if (UNIV_LIKELY(!srv_thread_concurrency)) {
01324 
01325     return;
01326   }
01327 
01328   ut_ad(srv_conc_n_threads >= 0);
01329 
01330   os_fast_mutex_lock(&srv_conc_mutex);
01331 
01332   srv_conc_n_threads++;
01333   trx->declared_to_be_inside_innodb = TRUE;
01334   trx->n_tickets_to_enter_innodb = 1;
01335 
01336   os_fast_mutex_unlock(&srv_conc_mutex);
01337 }
01338 
01339 /*********************************************************************/
01342 UNIV_INTERN
01343 void
01344 srv_conc_force_exit_innodb(
01345 /*=======================*/
01346   trx_t*  trx)  
01348 {
01349   srv_conc_slot_t*  slot  = NULL;
01350 
01351   if (trx->mysql_thd != NULL
01352       && thd_is_replication_slave_thread(trx->mysql_thd)) {
01353 
01354     return;
01355   }
01356 
01357   if (trx->declared_to_be_inside_innodb == FALSE) {
01358 
01359     return;
01360   }
01361 
01362   os_fast_mutex_lock(&srv_conc_mutex);
01363 
01364   ut_ad(srv_conc_n_threads > 0);
01365   srv_conc_n_threads--;
01366   trx->declared_to_be_inside_innodb = FALSE;
01367   trx->n_tickets_to_enter_innodb = 0;
01368 
01369   if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
01370     /* Look for a slot where a thread is waiting and no other
01371     thread has yet released the thread */
01372 
01373     slot = UT_LIST_GET_FIRST(srv_conc_queue);
01374 
01375     while (slot && slot->wait_ended == TRUE) {
01376       slot = UT_LIST_GET_NEXT(srv_conc_queue, slot);
01377     }
01378 
01379     if (slot != NULL) {
01380       slot->wait_ended = TRUE;
01381 
01382       /* We increment the count on behalf of the released
01383       thread */
01384 
01385       srv_conc_n_threads++;
01386     }
01387   }
01388 
01389   os_fast_mutex_unlock(&srv_conc_mutex);
01390 
01391   if (slot != NULL) {
01392     os_event_set(slot->event);
01393   }
01394 }
01395 
01396 /*********************************************************************/
01398 UNIV_INTERN
01399 void
01400 srv_conc_exit_innodb(
01401 /*=================*/
01402   trx_t*  trx)  
01404 {
01405   if (trx->n_tickets_to_enter_innodb > 0) {
01406     /* We will pretend the thread is still inside InnoDB though it
01407     now leaves the InnoDB engine. In this way we save
01408     a lot of semaphore operations. srv_conc_force_exit_innodb is
01409     used to declare the thread definitely outside InnoDB. It
01410     should be called when there is a lock wait or an SQL statement
01411     ends. */
01412 
01413     return;
01414   }
01415 
01416   srv_conc_force_exit_innodb(trx);
01417 }
01418 
01419 /*========================================================================*/
01420 
01421 /*********************************************************************/
01424 static
01425 ulint
01426 srv_normalize_init_values(void)
01427 /*===========================*/
01428 {
01429   ulint n;
01430   ulint i;
01431 
01432   n = srv_n_data_files;
01433 
01434   for (i = 0; i < n; i++) {
01435     srv_data_file_sizes[i] = srv_data_file_sizes[i]
01436       * ((1024 * 1024) / UNIV_PAGE_SIZE);
01437   }
01438 
01439   srv_last_file_size_max = srv_last_file_size_max
01440     * ((1024 * 1024) / UNIV_PAGE_SIZE);
01441 
01442   srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE;
01443 
01444   srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
01445 
01446   srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
01447 
01448   return(DB_SUCCESS);
01449 }
01450 
01451 /*********************************************************************/
01454 UNIV_INTERN
01455 ulint
01456 srv_boot(void)
01457 /*==========*/
01458 {
01459   ulint err;
01460 
01461   /* Transform the init parameter values given by MySQL to
01462   use units we use inside InnoDB: */
01463 
01464   err = srv_normalize_init_values();
01465 
01466   if (err != DB_SUCCESS) {
01467     return(err);
01468   }
01469 
01470   /* Initialize synchronization primitives, memory management, and thread
01471   local storage */
01472 
01473   srv_general_init();
01474 
01475   /* Initialize this module */
01476 
01477   srv_init();
01478 
01479   return(DB_SUCCESS);
01480 }
01481 
01482 /*********************************************************************/
01486 static
01487 srv_slot_t*
01488 srv_table_reserve_slot_for_mysql(void)
01489 /*==================================*/
01490 {
01491   srv_slot_t* slot;
01492   ulint   i;
01493 
01494   ut_ad(mutex_own(&kernel_mutex));
01495 
01496   i = 0;
01497   slot = srv_mysql_table + i;
01498 
01499   while (slot->in_use) {
01500     i++;
01501 
01502     if (i >= OS_THREAD_MAX_N) {
01503 
01504       ut_print_timestamp(stderr);
01505 
01506       fprintf(stderr,
01507         "  InnoDB: There appear to be %lu MySQL"
01508         " threads currently waiting\n"
01509         "InnoDB: inside InnoDB, which is the"
01510         " upper limit. Cannot continue operation.\n"
01511         "InnoDB: We intentionally generate"
01512         " a seg fault to print a stack trace\n"
01513         "InnoDB: on Linux. But first we print"
01514         " a list of waiting threads.\n", (ulong) i);
01515 
01516       for (i = 0; i < OS_THREAD_MAX_N; i++) {
01517 
01518         slot = srv_mysql_table + i;
01519 
01520         fprintf(stderr,
01521           "Slot %lu: thread id %lu, type %lu,"
01522           " in use %lu, susp %lu, time %lu\n",
01523           (ulong) i,
01524           (ulong) os_thread_pf(slot->id),
01525           (ulong) slot->type,
01526           (ulong) slot->in_use,
01527           (ulong) slot->suspended,
01528           (ulong) difftime(ut_time(),
01529                slot->suspend_time));
01530       }
01531 
01532       ut_error;
01533     }
01534 
01535     slot = srv_mysql_table + i;
01536   }
01537 
01538   ut_a(slot->in_use == FALSE);
01539 
01540   slot->in_use = TRUE;
01541   slot->id = os_thread_get_curr_id();
01542   slot->handle = os_thread_get_curr();
01543 
01544   return(slot);
01545 }
01546 
01547 /***************************************************************/
01553 UNIV_INTERN
01554 void
01555 srv_suspend_mysql_thread(
01556 /*=====================*/
01557   que_thr_t*  thr)  
01559 {
01560   srv_slot_t* slot;
01561   os_event_t  event;
01562   double    wait_time;
01563   trx_t*    trx;
01564   ulint   had_dict_lock;
01565   ibool   was_declared_inside_innodb  = FALSE;
01566   ib_int64_t  start_time      = 0;
01567   ib_int64_t  finish_time;
01568   ulint   diff_time;
01569   ulint   sec;
01570   ulint   ms;
01571   ulong   lock_wait_timeout;
01572 
01573   ut_ad(!mutex_own(&kernel_mutex));
01574 
01575   trx = thr_get_trx(thr);
01576 
01577   os_event_set(srv_lock_timeout_thread_event);
01578 
01579   mutex_enter(&kernel_mutex);
01580 
01581   trx->error_state = DB_SUCCESS;
01582 
01583   if (thr->state == QUE_THR_RUNNING) {
01584 
01585     ut_ad(thr->is_active == TRUE);
01586 
01587     /* The lock has already been released or this transaction
01588     was chosen as a deadlock victim: no need to suspend */
01589 
01590     if (trx->was_chosen_as_deadlock_victim) {
01591 
01592       trx->error_state = DB_DEADLOCK;
01593       trx->was_chosen_as_deadlock_victim = FALSE;
01594     }
01595 
01596     mutex_exit(&kernel_mutex);
01597 
01598     return;
01599   }
01600 
01601   ut_ad(thr->is_active == FALSE);
01602 
01603   slot = srv_table_reserve_slot_for_mysql();
01604 
01605   event = slot->event;
01606 
01607   slot->thr = thr;
01608 
01609   os_event_reset(event);
01610 
01611   slot->suspend_time = ut_time();
01612 
01613   if (thr->lock_state == QUE_THR_LOCK_ROW) {
01614     srv_n_lock_wait_count++;
01615     srv_n_lock_wait_current_count++;
01616 
01617     if (ut_usectime(&sec, &ms) == -1) {
01618       start_time = -1;
01619     } else {
01620       start_time = (ib_int64_t) sec * 1000000 + ms;
01621     }
01622   }
01623   /* Wake the lock timeout monitor thread, if it is suspended */
01624 
01625   os_event_set(srv_lock_timeout_thread_event);
01626 
01627   mutex_exit(&kernel_mutex);
01628 
01629   if (trx->declared_to_be_inside_innodb) {
01630 
01631     was_declared_inside_innodb = TRUE;
01632 
01633     /* We must declare this OS thread to exit InnoDB, since a
01634     possible other thread holding a lock which this thread waits
01635     for must be allowed to enter, sooner or later */
01636 
01637     srv_conc_force_exit_innodb(trx);
01638   }
01639 
01640   had_dict_lock = trx->dict_operation_lock_mode;
01641 
01642   switch (had_dict_lock) {
01643   case RW_S_LATCH:
01644     /* Release foreign key check latch */
01645     row_mysql_unfreeze_data_dictionary(trx);
01646     break;
01647   case RW_X_LATCH:
01648     /* There should never be a lock wait when the
01649     dictionary latch is reserved in X mode.  Dictionary
01650     transactions should only acquire locks on dictionary
01651     tables, not other tables. All access to dictionary
01652     tables should be covered by dictionary
01653     transactions. */
01654     ut_print_timestamp(stderr);
01655     fputs("  InnoDB: Error: dict X latch held in "
01656           "srv_suspend_mysql_thread\n", stderr);
01657     /* This should never occur. This incorrect handling
01658     was added in the early development of
01659     ha_innobase::add_index() in InnoDB Plugin 1.0. */
01660     /* Release fast index creation latch */
01661     row_mysql_unlock_data_dictionary(trx);
01662     break;
01663   }
01664 
01665   ut_a(trx->dict_operation_lock_mode == 0);
01666 
01667   /* Suspend this thread and wait for the event. */
01668 
01669   os_event_wait(event);
01670 
01671   /* After resuming, reacquire the data dictionary latch if
01672   necessary. */
01673 
01674   switch (had_dict_lock) {
01675   case RW_S_LATCH:
01676     row_mysql_freeze_data_dictionary(trx);
01677     break;
01678   case RW_X_LATCH:
01679     /* This should never occur. This incorrect handling
01680     was added in the early development of
01681     ha_innobase::add_index() in InnoDB Plugin 1.0. */
01682     row_mysql_lock_data_dictionary(trx);
01683     break;
01684   }
01685 
01686   if (was_declared_inside_innodb) {
01687 
01688     /* Return back inside InnoDB */
01689 
01690     srv_conc_force_enter_innodb(trx);
01691   }
01692 
01693   mutex_enter(&kernel_mutex);
01694 
01695   /* Release the slot for others to use */
01696 
01697   slot->in_use = FALSE;
01698 
01699   wait_time = ut_difftime(ut_time(), slot->suspend_time);
01700 
01701   if (thr->lock_state == QUE_THR_LOCK_ROW) {
01702     if (ut_usectime(&sec, &ms) == -1) {
01703       finish_time = -1;
01704     } else {
01705       finish_time = (ib_int64_t) sec * 1000000 + ms;
01706     }
01707 
01708     diff_time = (ulint) (finish_time - start_time);
01709 
01710     srv_n_lock_wait_current_count--;
01711     srv_n_lock_wait_time = srv_n_lock_wait_time + diff_time;
01712     if (diff_time > srv_n_lock_max_wait_time &&
01713         /* only update the variable if we successfully
01714         retrieved the start and finish times. See Bug#36819. */
01715         start_time != -1 && finish_time != -1) {
01716       srv_n_lock_max_wait_time = diff_time;
01717     }
01718 
01719     /* Record the lock wait time for this thread */
01720     thd_set_lock_wait_time(trx->mysql_thd, diff_time);
01721   }
01722 
01723   if (trx->was_chosen_as_deadlock_victim) {
01724 
01725     trx->error_state = DB_DEADLOCK;
01726     trx->was_chosen_as_deadlock_victim = FALSE;
01727   }
01728 
01729   mutex_exit(&kernel_mutex);
01730 
01731   /* InnoDB system transactions (such as the purge, and
01732   incomplete transactions that are being rolled back after crash
01733   recovery) will use the global value of
01734   innodb_lock_wait_timeout, because trx->mysql_thd == NULL. */
01735   lock_wait_timeout = thd_lock_wait_timeout(trx->mysql_thd);
01736 
01737   if (lock_wait_timeout < 100000000
01738       && wait_time > (double) lock_wait_timeout) {
01739 
01740     trx->error_state = DB_LOCK_WAIT_TIMEOUT;
01741   }
01742 
01743   if (trx_is_interrupted(trx)) {
01744 
01745     trx->error_state = DB_INTERRUPTED;
01746   }
01747 }
01748 
01749 /********************************************************************/
01752 UNIV_INTERN
01753 void
01754 srv_release_mysql_thread_if_suspended(
01755 /*==================================*/
01756   que_thr_t*  thr)  
01758 {
01759   srv_slot_t* slot;
01760   ulint   i;
01761 
01762   ut_ad(mutex_own(&kernel_mutex));
01763 
01764   for (i = 0; i < OS_THREAD_MAX_N; i++) {
01765 
01766     slot = srv_mysql_table + i;
01767 
01768     if (slot->in_use && slot->thr == thr) {
01769       /* Found */
01770 
01771       os_event_set(slot->event);
01772 
01773       return;
01774     }
01775   }
01776 
01777   /* not found */
01778 }
01779 
01780 /******************************************************************/
01782 static
01783 void
01784 srv_refresh_innodb_monitor_stats(void)
01785 /*==================================*/
01786 {
01787   mutex_enter(&srv_innodb_monitor_mutex);
01788 
01789   srv_last_monitor_time = time(NULL);
01790 
01791   os_aio_refresh_stats();
01792 
01793   btr_cur_n_sea_old = btr_cur_n_sea;
01794   btr_cur_n_non_sea_old = btr_cur_n_non_sea;
01795 
01796   log_refresh_stats();
01797 
01798   buf_refresh_io_stats_all();
01799 
01800   srv_n_rows_inserted_old = srv_n_rows_inserted;
01801   srv_n_rows_updated_old = srv_n_rows_updated;
01802   srv_n_rows_deleted_old = srv_n_rows_deleted;
01803   srv_n_rows_read_old = srv_n_rows_read;
01804 
01805   mutex_exit(&srv_innodb_monitor_mutex);
01806 }
01807 
01808 /******************************************************************/
01812 UNIV_INTERN
01813 ibool
01814 srv_printf_innodb_monitor(
01815 /*======================*/
01816   FILE* file,   
01817   ibool nowait,   
01818   ulint*  trx_start,  
01820   ulint*  trx_end)  
01822 {
01823   double  time_elapsed;
01824   time_t  current_time;
01825   ulint n_reserved;
01826   ibool ret;
01827 
01828   mutex_enter(&srv_innodb_monitor_mutex);
01829 
01830   current_time = time(NULL);
01831 
01832   /* We add 0.001 seconds to time_elapsed to prevent division
01833   by zero if two users happen to call SHOW INNODB STATUS at the same
01834   time */
01835 
01836   time_elapsed = difftime(current_time, srv_last_monitor_time)
01837     + 0.001;
01838 
01839   srv_last_monitor_time = time(NULL);
01840 
01841   fputs("\n=====================================\n", file);
01842 
01843   ut_print_timestamp(file);
01844   fprintf(file,
01845     " INNODB MONITOR OUTPUT\n"
01846     "=====================================\n"
01847     "Per second averages calculated from the last %lu seconds\n",
01848     (ulong)time_elapsed);
01849 
01850   fputs("-----------------\n"
01851         "BACKGROUND THREAD\n"
01852         "-----------------\n", file);
01853   srv_print_master_thread_info(file);
01854 
01855   fputs("----------\n"
01856         "SEMAPHORES\n"
01857         "----------\n", file);
01858   sync_print(file);
01859 
01860   /* Conceptually, srv_innodb_monitor_mutex has a very high latching
01861   order level in sync0sync.h, while dict_foreign_err_mutex has a very
01862   low level 135. Therefore we can reserve the latter mutex here without
01863   a danger of a deadlock of threads. */
01864 
01865   mutex_enter(&dict_foreign_err_mutex);
01866 
01867   if (ftell(dict_foreign_err_file) != 0L) {
01868     fputs("------------------------\n"
01869           "LATEST FOREIGN KEY ERROR\n"
01870           "------------------------\n", file);
01871     ut_copy_file(file, dict_foreign_err_file);
01872   }
01873 
01874   mutex_exit(&dict_foreign_err_mutex);
01875 
01876   /* Only if lock_print_info_summary proceeds correctly,
01877   before we call the lock_print_info_all_transactions
01878   to print all the lock information. */
01879   ret = lock_print_info_summary(file, nowait);
01880 
01881   if (ret) {
01882     if (trx_start) {
01883       long  t = ftell(file);
01884       if (t < 0) {
01885         *trx_start = ULINT_UNDEFINED;
01886       } else {
01887         *trx_start = (ulint) t;
01888       }
01889     }
01890     lock_print_info_all_transactions(file);
01891     if (trx_end) {
01892       long  t = ftell(file);
01893       if (t < 0) {
01894         *trx_end = ULINT_UNDEFINED;
01895       } else {
01896         *trx_end = (ulint) t;
01897       }
01898     }
01899   }
01900 
01901   fputs("--------\n"
01902         "FILE I/O\n"
01903         "--------\n", file);
01904   os_aio_print(file);
01905 
01906   fputs("-------------------------------------\n"
01907         "INSERT BUFFER AND ADAPTIVE HASH INDEX\n"
01908         "-------------------------------------\n", file);
01909   ibuf_print(file);
01910 
01911   ha_print_info(file, btr_search_sys->hash_index);
01912 
01913   fprintf(file,
01914     "%.2f hash searches/s, %.2f non-hash searches/s\n",
01915     (btr_cur_n_sea - btr_cur_n_sea_old)
01916     / time_elapsed,
01917     (btr_cur_n_non_sea - btr_cur_n_non_sea_old)
01918     / time_elapsed);
01919   btr_cur_n_sea_old = btr_cur_n_sea;
01920   btr_cur_n_non_sea_old = btr_cur_n_non_sea;
01921 
01922   fputs("---\n"
01923         "LOG\n"
01924         "---\n", file);
01925   log_print(file);
01926 
01927   fputs("----------------------\n"
01928         "BUFFER POOL AND MEMORY\n"
01929         "----------------------\n", file);
01930   fprintf(file,
01931     "Total memory allocated " ULINTPF
01932     "; in additional pool allocated " ULINTPF "\n",
01933     ut_total_allocated_memory,
01934     mem_pool_get_reserved(mem_comm_pool));
01935   fprintf(file, "Dictionary memory allocated " ULINTPF "\n",
01936     dict_sys->size);
01937 
01938   buf_print_io(file);
01939 
01940   fputs("--------------\n"
01941         "ROW OPERATIONS\n"
01942         "--------------\n", file);
01943   fprintf(file, "%ld queries inside InnoDB, %lu queries in queue\n",
01944     (long) srv_conc_n_threads,
01945     (ulong) srv_conc_n_waiting_threads);
01946 
01947   fprintf(file, "%lu read views open inside InnoDB\n",
01948     static_cast<ulint>(UT_LIST_GET_LEN(trx_sys->view_list)));
01949 
01950   n_reserved = fil_space_get_n_reserved_extents(0);
01951   if (n_reserved > 0) {
01952     fprintf(file,
01953       "%lu tablespace extents now reserved for"
01954       " B-tree split operations\n",
01955       (ulong) n_reserved);
01956   }
01957 
01958 #ifdef UNIV_LINUX
01959   fprintf(file, "Main thread process no. %lu, id %lu, state: %s\n",
01960     (ulong) srv_main_thread_process_no,
01961     (ulong) srv_main_thread_id,
01962     srv_main_thread_op_info);
01963 #else
01964   fprintf(file, "Main thread id %lu, state: %s\n",
01965     (ulong) srv_main_thread_id,
01966     srv_main_thread_op_info);
01967 #endif
01968   fprintf(file,
01969     "Number of rows inserted " ULINTPF
01970     ", updated " ULINTPF ", deleted " ULINTPF
01971     ", read " ULINTPF "\n",
01972     srv_n_rows_inserted,
01973     srv_n_rows_updated,
01974     srv_n_rows_deleted,
01975     srv_n_rows_read);
01976   fprintf(file,
01977     "%.2f inserts/s, %.2f updates/s,"
01978     " %.2f deletes/s, %.2f reads/s\n",
01979     (srv_n_rows_inserted - srv_n_rows_inserted_old)
01980     / time_elapsed,
01981     (srv_n_rows_updated - srv_n_rows_updated_old)
01982     / time_elapsed,
01983     (srv_n_rows_deleted - srv_n_rows_deleted_old)
01984     / time_elapsed,
01985     (srv_n_rows_read - srv_n_rows_read_old)
01986     / time_elapsed);
01987 
01988   srv_n_rows_inserted_old = srv_n_rows_inserted;
01989   srv_n_rows_updated_old = srv_n_rows_updated;
01990   srv_n_rows_deleted_old = srv_n_rows_deleted;
01991   srv_n_rows_read_old = srv_n_rows_read;
01992 
01993   fputs("----------------------------\n"
01994         "END OF INNODB MONITOR OUTPUT\n"
01995         "============================\n", file);
01996   mutex_exit(&srv_innodb_monitor_mutex);
01997   fflush(file);
01998 
01999   return(ret);
02000 }
02001 
02002 /******************************************************************/
02004 UNIV_INTERN
02005 void
02006 srv_export_innodb_status(void)
02007 /*==========================*/
02008 {
02009   buf_pool_stat_t stat;
02010   ulint   LRU_len;
02011   ulint   free_len;
02012   ulint   flush_list_len;
02013 
02014   buf_get_total_stat(&stat);
02015   buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
02016 
02017   mutex_enter(&srv_innodb_monitor_mutex);
02018 
02019   export_vars.innodb_data_pending_reads
02020     = os_n_pending_reads;
02021   export_vars.innodb_data_pending_writes
02022     = os_n_pending_writes;
02023   export_vars.innodb_data_pending_fsyncs
02024     = fil_n_pending_log_flushes
02025     + fil_n_pending_tablespace_flushes;
02026   export_vars.innodb_data_fsyncs = os_n_fsyncs;
02027   export_vars.innodb_data_read = srv_data_read;
02028   export_vars.innodb_data_reads = os_n_file_reads;
02029   export_vars.innodb_data_writes = os_n_file_writes;
02030   export_vars.innodb_data_written = srv_data_written;
02031   export_vars.innodb_buffer_pool_read_requests = stat.n_page_gets;
02032   export_vars.innodb_buffer_pool_write_requests
02033     = srv_buf_pool_write_requests;
02034   export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free;
02035   export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed;
02036   export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads;
02037   export_vars.innodb_buffer_pool_read_ahead
02038     = stat.n_ra_pages_read;
02039   export_vars.innodb_buffer_pool_read_ahead_evicted
02040     = stat.n_ra_pages_evicted;
02041   export_vars.innodb_buffer_pool_pages_data = LRU_len;
02042   export_vars.innodb_buffer_pool_pages_dirty = flush_list_len;
02043   export_vars.innodb_buffer_pool_pages_free = free_len;
02044 #ifdef UNIV_DEBUG
02045   export_vars.innodb_buffer_pool_pages_latched
02046     = buf_get_latched_pages_number();
02047 #endif /* UNIV_DEBUG */
02048   export_vars.innodb_buffer_pool_pages_total = buf_pool_get_n_pages();
02049 
02050   export_vars.innodb_buffer_pool_pages_misc
02051           = buf_pool_get_n_pages() - LRU_len - free_len;
02052 #ifdef HAVE_ATOMIC_BUILTINS
02053   export_vars.innodb_have_atomic_builtins = 1;
02054 #else
02055   export_vars.innodb_have_atomic_builtins = 0;
02056 #endif
02057   export_vars.innodb_page_size = UNIV_PAGE_SIZE;
02058   export_vars.innodb_log_waits = srv_log_waits;
02059   export_vars.innodb_os_log_written = srv_os_log_written;
02060   export_vars.innodb_os_log_fsyncs = fil_n_log_flushes;
02061   export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes;
02062   export_vars.innodb_os_log_pending_writes = srv_os_log_pending_writes;
02063   export_vars.innodb_log_write_requests = srv_log_write_requests;
02064   export_vars.innodb_log_writes = srv_log_writes;
02065   export_vars.innodb_dblwr_pages_written = srv_dblwr_pages_written;
02066   export_vars.innodb_dblwr_writes = srv_dblwr_writes;
02067   export_vars.innodb_pages_created = stat.n_pages_created;
02068   export_vars.innodb_pages_read = stat.n_pages_read;
02069   export_vars.innodb_pages_written = stat.n_pages_written;
02070   export_vars.innodb_row_lock_waits = srv_n_lock_wait_count;
02071   export_vars.innodb_row_lock_current_waits
02072     = srv_n_lock_wait_current_count;
02073   export_vars.innodb_row_lock_time = srv_n_lock_wait_time / 1000;
02074   if (srv_n_lock_wait_count > 0) {
02075     export_vars.innodb_row_lock_time_avg = (ulint)
02076       (srv_n_lock_wait_time / 1000 / srv_n_lock_wait_count);
02077   } else {
02078     export_vars.innodb_row_lock_time_avg = 0;
02079   }
02080   export_vars.innodb_row_lock_time_max
02081     = srv_n_lock_max_wait_time / 1000;
02082   export_vars.innodb_rows_read = srv_n_rows_read;
02083   export_vars.innodb_rows_inserted = srv_n_rows_inserted;
02084   export_vars.innodb_rows_updated = srv_n_rows_updated;
02085   export_vars.innodb_rows_deleted = srv_n_rows_deleted;
02086   export_vars.innodb_truncated_status_writes = srv_truncated_status_writes;
02087 
02088   mutex_exit(&srv_innodb_monitor_mutex);
02089 }
02090 
02091 /*********************************************************************/
02094 UNIV_INTERN
02095 os_thread_ret_t
02096 srv_monitor_thread(
02097 /*===============*/
02098   void* /*arg __attribute__((unused))*/)
02101 {
02102   ib_int64_t  sig_count;
02103   double    time_elapsed;
02104   time_t    current_time;
02105   time_t    last_table_monitor_time;
02106   time_t    last_tablespace_monitor_time;
02107   time_t    last_monitor_time;
02108   ulint   mutex_skipped;
02109   ibool   last_srv_print_monitor;
02110 
02111 #ifdef UNIV_DEBUG_THREAD_CREATION
02112   fprintf(stderr, "Lock timeout thread starts, id %lu\n",
02113     os_thread_pf(os_thread_get_curr_id()));
02114 #endif
02115 
02116 #ifdef UNIV_PFS_THREAD
02117   pfs_register_thread(srv_monitor_thread_key);
02118 #endif
02119 
02120   srv_last_monitor_time = ut_time();
02121   last_table_monitor_time = ut_time();
02122   last_tablespace_monitor_time = ut_time();
02123   last_monitor_time = ut_time();
02124   mutex_skipped = 0;
02125   last_srv_print_monitor = srv_print_innodb_monitor;
02126 loop:
02127   srv_monitor_active = TRUE;
02128 
02129   /* Wake up every 5 seconds to see if we need to print
02130   monitor information or if signalled at shutdown. */
02131 
02132   sig_count = os_event_reset(srv_monitor_event);
02133 
02134   os_event_wait_time_low(srv_monitor_event, 5000000, sig_count);
02135 
02136   current_time = ut_time();
02137 
02138   time_elapsed = difftime(current_time, last_monitor_time);
02139 
02140   if (time_elapsed > 15) {
02141     last_monitor_time = ut_time();
02142 
02143     if (srv_print_innodb_monitor) {
02144       /* Reset mutex_skipped counter everytime
02145       srv_print_innodb_monitor changes. This is to
02146       ensure we will not be blocked by kernel_mutex
02147       for short duration information printing,
02148       such as requested by sync_array_print_long_waits() */
02149       if (!last_srv_print_monitor) {
02150         mutex_skipped = 0;
02151         last_srv_print_monitor = TRUE;
02152       }
02153 
02154       if (!srv_printf_innodb_monitor(stderr,
02155             MUTEX_NOWAIT(mutex_skipped),
02156             NULL, NULL)) {
02157         mutex_skipped++;
02158       } else {
02159         /* Reset the counter */
02160         mutex_skipped = 0;
02161       }
02162     } else {
02163       last_srv_print_monitor = FALSE;
02164     }
02165 
02166 
02167     if (srv_innodb_status) {
02168       mutex_enter(&srv_monitor_file_mutex);
02169       rewind(srv_monitor_file);
02170       if (!srv_printf_innodb_monitor(srv_monitor_file,
02171             MUTEX_NOWAIT(mutex_skipped),
02172             NULL, NULL)) {
02173         mutex_skipped++;
02174       } else {
02175         mutex_skipped = 0;
02176       }
02177 
02178       os_file_set_eof(srv_monitor_file);
02179       mutex_exit(&srv_monitor_file_mutex);
02180     }
02181 
02182     if (srv_print_innodb_tablespace_monitor
02183         && difftime(current_time,
02184         last_tablespace_monitor_time) > 60) {
02185       last_tablespace_monitor_time = ut_time();
02186 
02187       fputs("========================"
02188             "========================\n",
02189             stderr);
02190 
02191       ut_print_timestamp(stderr);
02192 
02193       fputs(" INNODB TABLESPACE MONITOR OUTPUT\n"
02194             "========================"
02195             "========================\n",
02196             stderr);
02197 
02198       fsp_print(0);
02199       fputs("Validating tablespace\n", stderr);
02200       fsp_validate(0);
02201       fputs("Validation ok\n"
02202             "---------------------------------------\n"
02203             "END OF INNODB TABLESPACE MONITOR OUTPUT\n"
02204             "=======================================\n",
02205             stderr);
02206     }
02207 
02208     if (srv_print_innodb_table_monitor
02209         && difftime(current_time, last_table_monitor_time) > 60) {
02210 
02211       last_table_monitor_time = ut_time();
02212 
02213       fputs("===========================================\n",
02214             stderr);
02215 
02216       ut_print_timestamp(stderr);
02217 
02218       fputs(" INNODB TABLE MONITOR OUTPUT\n"
02219             "===========================================\n",
02220             stderr);
02221       dict_print();
02222 
02223       fputs("-----------------------------------\n"
02224             "END OF INNODB TABLE MONITOR OUTPUT\n"
02225             "==================================\n",
02226             stderr);
02227     }
02228   }
02229 
02230   if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
02231     goto exit_func;
02232   }
02233 
02234   if (srv_print_innodb_monitor
02235       || srv_print_innodb_lock_monitor
02236       || srv_print_innodb_tablespace_monitor
02237       || srv_print_innodb_table_monitor) {
02238     goto loop;
02239   }
02240 
02241   srv_monitor_active = FALSE;
02242 
02243   goto loop;
02244 
02245 exit_func:
02246   srv_monitor_active = FALSE;
02247 
02248   /* We count the number of threads in os_thread_exit(). A created
02249   thread should always use that to exit and not use return() to exit. */
02250 
02251   os_thread_exit(NULL);
02252 
02253   OS_THREAD_DUMMY_RETURN;
02254 }
02255 
02256 /*********************************************************************/
02259 UNIV_INTERN
02260 os_thread_ret_t
02261 srv_lock_timeout_thread(
02262 /*====================*/
02263   void* /*arg __attribute__((unused))*/)
02264       /* in: a dummy parameter required by
02265       os_thread_create */
02266 {
02267   srv_slot_t* slot;
02268   ibool   some_waits;
02269   double    wait_time;
02270   ulint   i;
02271   ib_int64_t  sig_count;
02272 
02273 #ifdef UNIV_PFS_THREAD
02274   pfs_register_thread(srv_lock_timeout_thread_key);
02275 #endif
02276 
02277 loop:
02278 
02279   /* When someone is waiting for a lock, we wake up every second
02280   and check if a timeout has passed for a lock wait */
02281 
02282   sig_count = os_event_reset(srv_timeout_event);
02283 
02284   os_event_wait_time_low(srv_timeout_event, 1000000, sig_count);
02285 
02286   srv_lock_timeout_active = TRUE;
02287 
02288   mutex_enter(&kernel_mutex);
02289 
02290   some_waits = FALSE;
02291 
02292   /* Check of all slots if a thread is waiting there, and if it
02293   has exceeded the time limit */
02294 
02295   for (i = 0; i < OS_THREAD_MAX_N; i++) {
02296 
02297     slot = srv_mysql_table + i;
02298 
02299     if (slot->in_use) {
02300       trx_t*  trx;
02301       ulong lock_wait_timeout;
02302 
02303       some_waits = TRUE;
02304 
02305       wait_time = ut_difftime(ut_time(), slot->suspend_time);
02306 
02307       trx = thr_get_trx(slot->thr);
02308       lock_wait_timeout = thd_lock_wait_timeout(
02309         trx->mysql_thd);
02310 
02311       if (trx_is_interrupted(trx)
02312           || (lock_wait_timeout < 100000000
02313         && (wait_time > (double) lock_wait_timeout
02314             || wait_time < 0))) {
02315 
02316         /* Timeout exceeded or a wrap-around in system
02317         time counter: cancel the lock request queued
02318         by the transaction and release possible
02319         other transactions waiting behind; it is
02320         possible that the lock has already been
02321         granted: in that case do nothing */
02322 
02323         if (trx->wait_lock) {
02324           lock_cancel_waiting_and_release(
02325             trx->wait_lock);
02326         }
02327       }
02328     }
02329   }
02330 
02331   os_event_reset(srv_lock_timeout_thread_event);
02332 
02333   mutex_exit(&kernel_mutex);
02334 
02335   if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
02336     goto exit_func;
02337   }
02338 
02339   if (some_waits) {
02340     goto loop;
02341   }
02342 
02343   srv_lock_timeout_active = FALSE;
02344 
02345 #if 0
02346   /* The following synchronisation is disabled, since
02347   the InnoDB monitor output is to be updated every 15 seconds. */
02348   os_event_wait(srv_lock_timeout_thread_event);
02349 #endif
02350   goto loop;
02351 
02352 exit_func:
02353   srv_lock_timeout_active = FALSE;
02354 
02355   /* We count the number of threads in os_thread_exit(). A created
02356   thread should always use that to exit and not use return() to exit. */
02357 
02358   os_thread_exit(NULL);
02359 
02360   OS_THREAD_DUMMY_RETURN;
02361 }
02362 
02363 /*********************************************************************/
02367 UNIV_INTERN
02368 os_thread_ret_t
02369 srv_error_monitor_thread(
02370 /*=====================*/
02371   void* /*arg __attribute__((unused))*/)
02374 {
02375   /* number of successive fatal timeouts observed */
02376   ulint   fatal_cnt = 0;
02377   ib_uint64_t old_lsn;
02378   ib_uint64_t new_lsn;
02379   ib_int64_t  sig_count;
02380 
02381   old_lsn = srv_start_lsn;
02382 
02383 #ifdef UNIV_DEBUG_THREAD_CREATION
02384   fprintf(stderr, "Error monitor thread starts, id %lu\n",
02385     os_thread_pf(os_thread_get_curr_id()));
02386 #endif
02387 
02388 #ifdef UNIV_PFS_THREAD
02389   pfs_register_thread(srv_error_monitor_thread_key);
02390 #endif
02391 
02392 loop:
02393   srv_error_monitor_active = TRUE;
02394 
02395   /* Try to track a strange bug reported by Harald Fuchs and others,
02396   where the lsn seems to decrease at times */
02397 
02398   new_lsn = log_get_lsn();
02399 
02400   if (new_lsn < old_lsn) {
02401           drizzled::errmsg_printf(drizzled::error::INFO,
02402                                   "InnoDB: Error: old log sequence number %"PRIu64" was greater than the new log sequence number %"PRIu64"!"
02403                                   "InnoDB: Please submit a bug report to http://bugs.launchpad.net/drizzle",
02404                                   old_lsn, new_lsn);
02405   }
02406 
02407   old_lsn = new_lsn;
02408 
02409   if (difftime(time(NULL), srv_last_monitor_time) > 60) {
02410     /* We referesh InnoDB Monitor values so that averages are
02411     printed from at most 60 last seconds */
02412 
02413     srv_refresh_innodb_monitor_stats();
02414   }
02415 
02416   /* Update the statistics collected for deciding LRU
02417   eviction policy. */
02418   buf_LRU_stat_update();
02419 
02420   /* Update the statistics collected for flush rate policy. */
02421   buf_flush_stat_update();
02422 
02423   /* In case mutex_exit is not a memory barrier, it is
02424   theoretically possible some threads are left waiting though
02425   the semaphore is already released. Wake up those threads: */
02426 
02427   sync_arr_wake_threads_if_sema_free();
02428 
02429   if (sync_array_print_long_waits()) {
02430     fatal_cnt++;
02431     if (fatal_cnt > 10) {
02432 
02433       fprintf(stderr,
02434         "InnoDB: Error: semaphore wait has lasted"
02435         " > %lu seconds\n"
02436         "InnoDB: We intentionally crash the server,"
02437         " because it appears to be hung.\n",
02438         (ulong) srv_fatal_semaphore_wait_threshold);
02439 
02440       ut_error;
02441     }
02442   } else {
02443     fatal_cnt = 0;
02444   }
02445 
02446   /* Flush stderr so that a database user gets the output
02447   to possible MySQL error file */
02448 
02449   fflush(stderr);
02450 
02451   sig_count = os_event_reset(srv_error_event);
02452 
02453   os_event_wait_time_low(srv_error_event, 1000000, sig_count);
02454 
02455   if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) {
02456 
02457     goto loop;
02458   }
02459 
02460   srv_error_monitor_active = FALSE;
02461 
02462   /* We count the number of threads in os_thread_exit(). A created
02463   thread should always use that to exit and not use return() to exit. */
02464 
02465   os_thread_exit(NULL);
02466 
02467   OS_THREAD_DUMMY_RETURN;
02468 }
02469 
02470 /**********************************************************************/
02473 UNIV_INTERN
02474 ibool
02475 srv_is_any_background_thread_active(void)
02476 /*=====================================*/
02477 {
02478   ulint i;
02479   ibool ret = FALSE;
02480 
02481   mutex_enter(&kernel_mutex);
02482 
02483   for (i = SRV_COM; i <= SRV_MASTER; ++i) {
02484     if (srv_n_threads_active[i] != 0) {
02485       ret = TRUE;
02486       break;
02487     }
02488   }
02489 
02490   mutex_exit(&kernel_mutex);
02491 
02492   return(ret);
02493 }
02494 
02495 /*******************************************************************/
02501 UNIV_INTERN
02502 void
02503 srv_active_wake_master_thread(void)
02504 /*===============================*/
02505 {
02506   srv_activity_count++;
02507 
02508   if (srv_n_threads_active[SRV_MASTER] == 0) {
02509 
02510     mutex_enter(&kernel_mutex);
02511 
02512     srv_release_threads(SRV_MASTER, 1);
02513 
02514     mutex_exit(&kernel_mutex);
02515   }
02516 }
02517 
02518 /*******************************************************************/
02524 UNIV_INTERN
02525 void
02526 srv_wake_purge_thread_if_not_active(void)
02527 /*=====================================*/
02528 {
02529   ut_ad(!mutex_own(&kernel_mutex));
02530 
02531   if (srv_n_purge_threads > 0
02532       && srv_n_threads_active[SRV_WORKER] == 0) {
02533 
02534     mutex_enter(&kernel_mutex);
02535 
02536     srv_release_threads(SRV_WORKER, 1);
02537 
02538     mutex_exit(&kernel_mutex);
02539   }
02540 }
02541 
02542 /*******************************************************************/
02544 UNIV_INTERN
02545 void
02546 srv_wake_master_thread(void)
02547 /*========================*/
02548 {
02549   srv_activity_count++;
02550 
02551   mutex_enter(&kernel_mutex);
02552 
02553   srv_release_threads(SRV_MASTER, 1);
02554 
02555   mutex_exit(&kernel_mutex);
02556 }
02557 
02558 /*******************************************************************/
02560 UNIV_INTERN
02561 void
02562 srv_wake_purge_thread(void)
02563 /*=======================*/
02564 {
02565   ut_ad(!mutex_own(&kernel_mutex));
02566 
02567   if (srv_n_purge_threads > 0) {
02568 
02569     mutex_enter(&kernel_mutex);
02570 
02571     srv_release_threads(SRV_WORKER, 1);
02572 
02573     mutex_exit(&kernel_mutex);
02574   }
02575 }
02576 
02577 /**********************************************************************
02578 The master thread is tasked to ensure that flush of log file happens
02579 once every second in the background. This is to ensure that not more
02580 than one second of trxs are lost in case of crash when
02581 innodb_flush_logs_at_trx_commit != 1 */
02582 static
02583 void
02584 srv_sync_log_buffer_in_background(void)
02585 /*===================================*/
02586 {
02587   time_t  current_time = time(NULL);
02588 
02589   srv_main_thread_op_info = "flushing log";
02590   if (difftime(current_time, srv_last_log_flush_time) >= 1) {
02591     log_buffer_sync_in_background(TRUE);
02592     srv_last_log_flush_time = current_time;
02593     srv_log_writes_and_flush++;
02594   }
02595 }
02596 
02597 /********************************************************************/
02600 static
02601 void
02602 srv_master_do_purge(void)
02603 /*=====================*/
02604 {
02605   ulint n_pages_purged;
02606 
02607   ut_ad(!mutex_own(&kernel_mutex));
02608 
02609   ut_a(srv_n_purge_threads == 0);
02610 
02611   do {
02612     /* Check for shutdown and change in purge config. */
02613     if (srv_fast_shutdown && srv_shutdown_state > 0) {
02614       /* Nothing to purge. */
02615       n_pages_purged = 0;
02616     } else {
02617       n_pages_purged = trx_purge(srv_purge_batch_size);
02618     }
02619 
02620     srv_sync_log_buffer_in_background();
02621 
02622   } while (n_pages_purged > 0);
02623 }
02624 
02625 /*********************************************************************/
02628 UNIV_INTERN
02629 os_thread_ret_t
02630 srv_master_thread(
02631 /*==============*/
02632   void* /*arg __attribute__((unused))*/)
02635 {
02636   buf_pool_stat_t buf_stat;
02637   os_event_t  event;
02638   ulint   old_activity_count;
02639   ulint   n_pages_purged  = 0;
02640   ulint   n_bytes_merged;
02641   ulint   n_pages_flushed;
02642   ulint   n_bytes_archived;
02643   ulint   n_tables_to_drop;
02644   ulint   n_ios;
02645   ulint   n_ios_old;
02646   ulint   n_ios_very_old;
02647   ulint   n_pend_ios;
02648   ulint   next_itr_time;
02649   ulint   i;
02650 
02651 #ifdef UNIV_DEBUG_THREAD_CREATION
02652   fprintf(stderr, "Master thread starts, id %lu\n",
02653     os_thread_pf(os_thread_get_curr_id()));
02654 #endif
02655 
02656 #ifdef UNIV_PFS_THREAD
02657   pfs_register_thread(srv_master_thread_key);
02658 #endif
02659 
02660   srv_main_thread_process_no = os_proc_get_number();
02661   srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
02662 
02663   srv_table_reserve_slot(SRV_MASTER);
02664 
02665   mutex_enter(&kernel_mutex);
02666 
02667   srv_n_threads_active[SRV_MASTER]++;
02668 
02669   mutex_exit(&kernel_mutex);
02670 
02671 loop:
02672   /*****************************************************************/
02673   /* ---- When there is database activity by users, we cycle in this
02674   loop */
02675 
02676   srv_main_thread_op_info = "reserving kernel mutex";
02677 
02678   buf_get_total_stat(&buf_stat);
02679   n_ios_very_old = log_sys->n_log_ios + buf_stat.n_pages_read
02680     + buf_stat.n_pages_written;
02681   mutex_enter(&kernel_mutex);
02682 
02683   /* Store the user activity counter at the start of this loop */
02684   old_activity_count = srv_activity_count;
02685 
02686   mutex_exit(&kernel_mutex);
02687 
02688   if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
02689 
02690     goto suspend_thread;
02691   }
02692 
02693   /* ---- We run the following loop approximately once per second
02694   when there is database activity */
02695 
02696   srv_last_log_flush_time = time(NULL);
02697 
02698   /* Sleep for 1 second on entrying the for loop below the first time. */
02699   next_itr_time = ut_time_ms() + 1000;
02700 
02701   for (i = 0; i < 10; i++) {
02702     ulint cur_time = ut_time_ms();
02703 
02704     /* ALTER TABLE in MySQL requires on Unix that the table handler
02705     can drop tables lazily after there no longer are SELECT
02706     queries to them. */
02707 
02708     srv_main_thread_op_info = "doing background drop tables";
02709 
02710     row_drop_tables_for_mysql_in_background();
02711 
02712     srv_main_thread_op_info = "";
02713 
02714     if (srv_fast_shutdown && srv_shutdown_state > 0) {
02715 
02716       goto background_loop;
02717     }
02718 
02719     buf_get_total_stat(&buf_stat);
02720 
02721     n_ios_old = log_sys->n_log_ios + buf_stat.n_pages_read
02722       + buf_stat.n_pages_written;
02723 
02724     srv_main_thread_op_info = "sleeping";
02725     srv_main_1_second_loops++;
02726 
02727     if (next_itr_time > cur_time
02728         && srv_shutdown_state == SRV_SHUTDOWN_NONE) {
02729 
02730       /* Get sleep interval in micro seconds. We use
02731       ut_min() to avoid long sleep in case of
02732       wrap around. */
02733       os_thread_sleep(ut_min(1000000,
02734           (next_itr_time - cur_time)
02735            * 1000));
02736       srv_main_sleeps++;
02737     }
02738 
02739     /* Each iteration should happen at 1 second interval. */
02740     next_itr_time = ut_time_ms() + 1000;
02741 
02742     /* Flush logs if needed */
02743     srv_sync_log_buffer_in_background();
02744 
02745     srv_main_thread_op_info = "making checkpoint";
02746     log_free_check();
02747 
02748     /* If i/os during one second sleep were less than 5% of
02749     capacity, we assume that there is free disk i/o capacity
02750     available, and it makes sense to do an insert buffer merge. */
02751 
02752     buf_get_total_stat(&buf_stat);
02753     n_pend_ios = buf_get_n_pending_ios()
02754       + log_sys->n_pending_writes;
02755     n_ios = log_sys->n_log_ios + buf_stat.n_pages_read
02756       + buf_stat.n_pages_written;
02757     if (n_pend_ios < SRV_PEND_IO_THRESHOLD
02758         && (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) {
02759       srv_main_thread_op_info = "doing insert buffer merge";
02760       ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
02761 
02762       /* Flush logs if needed */
02763       srv_sync_log_buffer_in_background();
02764     }
02765 
02766     if (UNIV_UNLIKELY(buf_get_modified_ratio_pct()
02767           > srv_max_buf_pool_modified_pct)) {
02768 
02769       /* Try to keep the number of modified pages in the
02770       buffer pool under the limit wished by the user */
02771 
02772       srv_main_thread_op_info =
02773         "flushing buffer pool pages";
02774       n_pages_flushed = buf_flush_list(
02775         PCT_IO(100), IB_ULONGLONG_MAX);
02776 
02777     } else if (srv_adaptive_flushing) {
02778 
02779       /* Try to keep the rate of flushing of dirty
02780       pages such that redo log generation does not
02781       produce bursts of IO at checkpoint time. */
02782       ulint n_flush = buf_flush_get_desired_flush_rate();
02783 
02784       if (n_flush) {
02785         srv_main_thread_op_info =
02786           "flushing buffer pool pages";
02787         n_flush = ut_min(PCT_IO(100), n_flush);
02788         n_pages_flushed =
02789           buf_flush_list(
02790             n_flush,
02791             IB_ULONGLONG_MAX);
02792       }
02793     }
02794 
02795     if (srv_activity_count == old_activity_count) {
02796 
02797       /* There is no user activity at the moment, go to
02798       the background loop */
02799 
02800       goto background_loop;
02801     }
02802   }
02803 
02804   /* ---- We perform the following code approximately once per
02805   10 seconds when there is database activity */
02806 
02807 #ifdef MEM_PERIODIC_CHECK
02808   /* Check magic numbers of every allocated mem block once in 10
02809   seconds */
02810   mem_validate_all_blocks();
02811 #endif
02812   /* If i/os during the 10 second period were less than 200% of
02813   capacity, we assume that there is free disk i/o capacity
02814   available, and it makes sense to flush srv_io_capacity pages.
02815 
02816   Note that this is done regardless of the fraction of dirty
02817   pages relative to the max requested by the user. The one second
02818   loop above requests writes for that case. The writes done here
02819   are not required, and may be disabled. */
02820 
02821   buf_get_total_stat(&buf_stat);
02822   n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
02823   n_ios = log_sys->n_log_ios + buf_stat.n_pages_read
02824     + buf_stat.n_pages_written;
02825 
02826   srv_main_10_second_loops++;
02827   if (n_pend_ios < SRV_PEND_IO_THRESHOLD
02828       && (n_ios - n_ios_very_old < SRV_PAST_IO_ACTIVITY)) {
02829 
02830     srv_main_thread_op_info = "flushing buffer pool pages";
02831     buf_flush_list(PCT_IO(100), IB_ULONGLONG_MAX);
02832 
02833     /* Flush logs if needed */
02834     srv_sync_log_buffer_in_background();
02835   }
02836 
02837   /* We run a batch of insert buffer merge every 10 seconds,
02838   even if the server were active */
02839 
02840   srv_main_thread_op_info = "doing insert buffer merge";
02841   ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
02842 
02843   /* Flush logs if needed */
02844   srv_sync_log_buffer_in_background();
02845 
02846   if (srv_n_purge_threads == 0) {
02847     srv_main_thread_op_info = "master purging";
02848 
02849     srv_master_do_purge();
02850 
02851     if (srv_fast_shutdown && srv_shutdown_state > 0) {
02852 
02853       goto background_loop;
02854     }
02855   }
02856 
02857   srv_main_thread_op_info = "flushing buffer pool pages";
02858 
02859   /* Flush a few oldest pages to make a new checkpoint younger */
02860 
02861   if (buf_get_modified_ratio_pct() > 70) {
02862 
02863     /* If there are lots of modified pages in the buffer pool
02864     (> 70 %), we assume we can afford reserving the disk(s) for
02865     the time it requires to flush 100 pages */
02866 
02867     n_pages_flushed = buf_flush_list(
02868       PCT_IO(100), IB_ULONGLONG_MAX);
02869   } else {
02870     /* Otherwise, we only flush a small number of pages so that
02871     we do not unnecessarily use much disk i/o capacity from
02872     other work */
02873 
02874     n_pages_flushed = buf_flush_list(
02875         PCT_IO(10), IB_ULONGLONG_MAX);
02876   }
02877 
02878   srv_main_thread_op_info = "making checkpoint";
02879 
02880   /* Make a new checkpoint about once in 10 seconds */
02881 
02882   log_checkpoint(TRUE, FALSE);
02883 
02884   srv_main_thread_op_info = "reserving kernel mutex";
02885 
02886   mutex_enter(&kernel_mutex);
02887 
02888   /* ---- When there is database activity, we jump from here back to
02889   the start of loop */
02890 
02891   if (srv_activity_count != old_activity_count) {
02892     mutex_exit(&kernel_mutex);
02893     goto loop;
02894   }
02895 
02896   mutex_exit(&kernel_mutex);
02897 
02898   /* If the database is quiet, we enter the background loop */
02899 
02900   /*****************************************************************/
02901 background_loop:
02902   /* ---- In this loop we run background operations when the server
02903   is quiet from user activity. Also in the case of a shutdown, we
02904   loop here, flushing the buffer pool to the data files. */
02905 
02906   /* The server has been quiet for a while: start running background
02907   operations */
02908   srv_main_background_loops++;
02909   srv_main_thread_op_info = "doing background drop tables";
02910 
02911   n_tables_to_drop = row_drop_tables_for_mysql_in_background();
02912 
02913   if (n_tables_to_drop > 0) {
02914     /* Do not monopolize the CPU even if there are tables waiting
02915     in the background drop queue. (It is essentially a bug if
02916     MySQL tries to drop a table while there are still open handles
02917     to it and we had to put it to the background drop queue.) */
02918 
02919     if (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
02920       os_thread_sleep(100000);
02921     }
02922   }
02923 
02924   if (srv_n_purge_threads == 0) {
02925     srv_main_thread_op_info = "master purging";
02926 
02927     srv_master_do_purge();
02928   }
02929 
02930   srv_main_thread_op_info = "reserving kernel mutex";
02931 
02932   mutex_enter(&kernel_mutex);
02933   if (srv_activity_count != old_activity_count) {
02934     mutex_exit(&kernel_mutex);
02935     goto loop;
02936   }
02937   mutex_exit(&kernel_mutex);
02938 
02939   srv_main_thread_op_info = "doing insert buffer merge";
02940 
02941   if (srv_fast_shutdown && srv_shutdown_state > 0) {
02942     n_bytes_merged = 0;
02943   } else {
02944     /* This should do an amount of IO similar to the number of
02945     dirty pages that will be flushed in the call to
02946     buf_flush_list below. Otherwise, the system favors
02947     clean pages over cleanup throughput. */
02948     n_bytes_merged = ibuf_contract_for_n_pages(FALSE,
02949                  PCT_IO(100));
02950   }
02951 
02952   srv_main_thread_op_info = "reserving kernel mutex";
02953 
02954   mutex_enter(&kernel_mutex);
02955   if (srv_activity_count != old_activity_count) {
02956     mutex_exit(&kernel_mutex);
02957     goto loop;
02958   }
02959   mutex_exit(&kernel_mutex);
02960 
02961 flush_loop:
02962   srv_main_thread_op_info = "flushing buffer pool pages";
02963   srv_main_flush_loops++;
02964   if (srv_fast_shutdown < 2) {
02965     n_pages_flushed = buf_flush_list(
02966         PCT_IO(100), IB_ULONGLONG_MAX);
02967   } else {
02968     /* In the fastest shutdown we do not flush the buffer pool
02969     to data files: we set n_pages_flushed to 0 artificially. */
02970 
02971     n_pages_flushed = 0;
02972   }
02973 
02974   srv_main_thread_op_info = "reserving kernel mutex";
02975 
02976   mutex_enter(&kernel_mutex);
02977   if (srv_activity_count != old_activity_count) {
02978     mutex_exit(&kernel_mutex);
02979     goto loop;
02980   }
02981   mutex_exit(&kernel_mutex);
02982 
02983   srv_main_thread_op_info = "waiting for buffer pool flush to end";
02984   buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
02985 
02986   /* Flush logs if needed */
02987   srv_sync_log_buffer_in_background();
02988 
02989   srv_main_thread_op_info = "making checkpoint";
02990 
02991   log_checkpoint(TRUE, FALSE);
02992 
02993   if (buf_get_modified_ratio_pct() > srv_max_buf_pool_modified_pct) {
02994 
02995     /* Try to keep the number of modified pages in the
02996     buffer pool under the limit wished by the user */
02997 
02998     goto flush_loop;
02999   }
03000 
03001   srv_main_thread_op_info = "reserving kernel mutex";
03002 
03003   mutex_enter(&kernel_mutex);
03004   if (srv_activity_count != old_activity_count) {
03005     mutex_exit(&kernel_mutex);
03006     goto loop;
03007   }
03008   mutex_exit(&kernel_mutex);
03009   /*
03010   srv_main_thread_op_info = "archiving log (if log archive is on)";
03011 
03012   log_archive_do(FALSE, &n_bytes_archived);
03013   */
03014   n_bytes_archived = 0;
03015 
03016   /* Keep looping in the background loop if still work to do */
03017 
03018   if (srv_fast_shutdown && srv_shutdown_state > 0) {
03019     if (n_tables_to_drop + n_pages_flushed
03020         + n_bytes_archived != 0) {
03021 
03022       /* If we are doing a fast shutdown (= the default)
03023       we do not do purge or insert buffer merge. But we
03024       flush the buffer pool completely to disk.
03025       In a 'very fast' shutdown we do not flush the buffer
03026       pool to data files: we have set n_pages_flushed to
03027       0 artificially. */
03028 
03029       goto background_loop;
03030     }
03031   } else if (n_tables_to_drop
03032        + n_pages_purged + n_bytes_merged + n_pages_flushed
03033        + n_bytes_archived != 0) {
03034     /* In a 'slow' shutdown we run purge and the insert buffer
03035     merge to completion */
03036 
03037     goto background_loop;
03038   }
03039 
03040   /* There is no work for background operations either: suspend
03041   master thread to wait for more server activity */
03042 
03043 suspend_thread:
03044   srv_main_thread_op_info = "suspending";
03045 
03046   mutex_enter(&kernel_mutex);
03047 
03048   if (row_get_background_drop_list_len_low() > 0) {
03049     mutex_exit(&kernel_mutex);
03050 
03051     goto loop;
03052   }
03053 
03054   event = srv_suspend_thread();
03055 
03056   mutex_exit(&kernel_mutex);
03057 
03058   /* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
03059   waits for database activity to die down when converting < 4.1.x
03060   databases, and relies on this string being exactly as it is. InnoDB
03061   manual also mentions this string in several places. */
03062   srv_main_thread_op_info = "waiting for server activity";
03063 
03064   os_event_wait(event);
03065 
03066   if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
03067     /* This is only extra safety, the thread should exit
03068     already when the event wait ends */
03069 
03070     os_thread_exit(NULL);
03071 
03072   }
03073 
03074   /* When there is user activity, InnoDB will set the event and the
03075   main thread goes back to loop. */
03076 
03077   goto loop;
03078 
03079 
03080 #if !defined(__SUNPRO_C)
03081   OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
03082 #endif
03083 }
03084 
03085 /*********************************************************************/
03088 UNIV_INTERN
03089 os_thread_ret_t
03090 srv_purge_thread(
03091 /*=============*/
03092   void* /*arg __attribute__((unused))*/)  
03094 {
03095   srv_slot_t* slot;
03096   ulint   slot_no = ULINT_UNDEFINED;
03097   ulint   n_total_purged = ULINT_UNDEFINED;
03098 
03099   ut_a(srv_n_purge_threads == 1);
03100 
03101 #ifdef UNIV_DEBUG_THREAD_CREATION
03102   fprintf(stderr, "InnoDB: Purge thread running, id %lu\n",
03103     os_thread_pf(os_thread_get_curr_id()));
03104 #endif /* UNIV_DEBUG_THREAD_CREATION */
03105 
03106   mutex_enter(&kernel_mutex);
03107 
03108   slot_no = srv_table_reserve_slot(SRV_WORKER);
03109 
03110   slot = srv_table_get_nth_slot(slot_no);
03111 
03112   ++srv_n_threads_active[SRV_WORKER];
03113 
03114   mutex_exit(&kernel_mutex);
03115 
03116   while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
03117 
03118     ulint n_pages_purged;
03119 
03120     /* If there are very few records to purge or the last
03121     purge didn't purge any records then wait for activity.
03122           We peek at the history len without holding any mutex
03123     because in the worst case we will end up waiting for
03124     the next purge event. */
03125     if (trx_sys->rseg_history_len < srv_purge_batch_size
03126         || n_total_purged == 0) {
03127 
03128       os_event_t  event;
03129 
03130       mutex_enter(&kernel_mutex);
03131 
03132       event = srv_suspend_thread();
03133 
03134       mutex_exit(&kernel_mutex);
03135 
03136       os_event_wait(event);
03137     }
03138 
03139     /* Check for shutdown and whether we should do purge at all. */
03140     if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND
03141         || srv_shutdown_state != 0
03142         || srv_fast_shutdown) {
03143 
03144       break;
03145     }
03146 
03147     n_total_purged = 0;
03148 
03149     /* Purge until there are no more records to purge and there is
03150     no change in configuration or server state. */
03151     do {
03152       n_pages_purged = trx_purge(srv_purge_batch_size);
03153 
03154       n_total_purged += n_pages_purged;
03155 
03156     } while (n_pages_purged > 0 && !srv_fast_shutdown);
03157 
03158     srv_sync_log_buffer_in_background();
03159   }
03160 
03161   mutex_enter(&kernel_mutex);
03162 
03163   ut_ad(srv_table_get_nth_slot(slot_no) == slot);
03164 
03165   /* Decrement the active count. */
03166   srv_suspend_thread();
03167 
03168   slot->in_use = FALSE;
03169 
03170   /* Free the thread local memory. */
03171   thr_local_free(os_thread_get_curr_id());
03172 
03173   mutex_exit(&kernel_mutex);
03174 
03175 #ifdef UNIV_DEBUG_THREAD_CREATION
03176   fprintf(stderr, "InnoDB: Purge thread exiting, id %lu\n",
03177     os_thread_pf(os_thread_get_curr_id()));
03178 #endif /* UNIV_DEBUG_THREAD_CREATION */
03179 
03180   /* We count the number of threads in os_thread_exit(). A created
03181   thread should always use that to exit and not use return() to exit. */
03182   os_thread_exit(NULL);
03183 
03184   OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
03185 }
03186 
03187 /**********************************************************************/
03190 UNIV_INTERN
03191 void
03192 srv_que_task_enqueue_low(
03193 /*=====================*/
03194   que_thr_t*  thr)  
03195 {
03196   ut_ad(thr);
03197 
03198   mutex_enter(&kernel_mutex);
03199 
03200   UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
03201 
03202   srv_release_threads(SRV_WORKER, 1);
03203 
03204   mutex_exit(&kernel_mutex);
03205 }