00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00026 #include "buf0flu.h"
00027
00028 #ifdef UNIV_NONINL
00029 #include "buf0flu.ic"
00030 #endif
00031
00032 #include "buf0buf.h"
00033 #include "srv0srv.h"
00034 #include "page0zip.h"
00035 #ifndef UNIV_HOTBACKUP
00036 #include "ut0byte.h"
00037 #include "ut0lst.h"
00038 #include "page0page.h"
00039 #include "fil0fil.h"
00040 #include "buf0lru.h"
00041 #include "buf0rea.h"
00042 #include "ibuf0ibuf.h"
00043 #include "log0log.h"
00044 #include "os0file.h"
00045 #include "trx0sys.h"
00046
00047
00048
00049
00050
00051
00052
00053
00054
00058 #define BUF_FLUSH_STAT_N_INTERVAL 20
00059
00062 static buf_flush_stat_t buf_flush_stat_arr[BUF_FLUSH_STAT_N_INTERVAL];
00063
00065 static ulint buf_flush_stat_arr_ind;
00066
00069 static buf_flush_stat_t buf_flush_stat_cur;
00070
00073 static buf_flush_stat_t buf_flush_stat_sum;
00074
00076 static ulint buf_lru_flush_page_count = 0;
00077
00078
00079
00080 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
00081
00084 static
00085 ibool
00086 buf_flush_validate_low(
00087
00088 buf_pool_t* buf_pool);
00089 #endif
00090
00091
00096 static
00097 buf_page_t*
00098 buf_flush_insert_in_flush_rbt(
00099
00100 buf_page_t* bpage)
00101 {
00102 const ib_rbt_node_t* c_node;
00103 const ib_rbt_node_t* p_node;
00104 buf_page_t* prev = NULL;
00105 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
00106
00107 ut_ad(buf_flush_list_mutex_own(buf_pool));
00108
00109
00110 c_node = rbt_insert(buf_pool->flush_rbt, &bpage, &bpage);
00111 ut_a(c_node != NULL);
00112
00113
00114 p_node = rbt_prev(buf_pool->flush_rbt, c_node);
00115
00116 if (p_node != NULL) {
00117 buf_page_t** value;
00118 value = rbt_value(buf_page_t*, p_node);
00119 prev = *value;
00120 ut_a(prev != NULL);
00121 }
00122
00123 return(prev);
00124 }
00125
00126
00128 static
00129 void
00130 buf_flush_delete_from_flush_rbt(
00131
00132 buf_page_t* bpage)
00133 {
00134 #ifdef UNIV_DEBUG
00135 ibool ret = FALSE;
00136 #endif
00137 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
00138
00139 ut_ad(buf_flush_list_mutex_own(buf_pool));
00140
00141 #ifdef UNIV_DEBUG
00142 ret =
00143 #endif
00144 rbt_delete(buf_pool->flush_rbt, &bpage);
00145 ut_ad(ret);
00146 }
00147
00148
00158 static
00159 int
00160 buf_flush_block_cmp(
00161
00162 const void* p1,
00163 const void* p2)
00164 {
00165 int ret;
00166 const buf_page_t* b1 = *(const buf_page_t**) p1;
00167 const buf_page_t* b2 = *(const buf_page_t**) p2;
00168 #ifdef UNIV_DEBUG
00169 buf_pool_t* buf_pool = buf_pool_from_bpage(b1);
00170 #endif
00171
00172 ut_ad(b1 != NULL);
00173 ut_ad(b2 != NULL);
00174
00175 ut_ad(buf_flush_list_mutex_own(buf_pool));
00176
00177 ut_ad(b1->in_flush_list);
00178 ut_ad(b2->in_flush_list);
00179
00180 if (b2->oldest_modification > b1->oldest_modification) {
00181 return(1);
00182 } else if (b2->oldest_modification < b1->oldest_modification) {
00183 return(-1);
00184 }
00185
00186
00187 ret = (int)(b2->space - b1->space);
00188
00189
00190 return(ret ? ret : (int)(b2->offset - b1->offset));
00191 }
00192
00193
00197 UNIV_INTERN
00198 void
00199 buf_flush_init_flush_rbt(void)
00200
00201 {
00202 ulint i;
00203
00204 for (i = 0; i < srv_buf_pool_instances; i++) {
00205 buf_pool_t* buf_pool;
00206
00207 buf_pool = buf_pool_from_array(i);
00208
00209 buf_flush_list_mutex_enter(buf_pool);
00210
00211
00212 buf_pool->flush_rbt = rbt_create(
00213 sizeof(buf_page_t*), buf_flush_block_cmp);
00214
00215 buf_flush_list_mutex_exit(buf_pool);
00216 }
00217 }
00218
00219
00221 UNIV_INTERN
00222 void
00223 buf_flush_free_flush_rbt(void)
00224
00225 {
00226 ulint i;
00227
00228 for (i = 0; i < srv_buf_pool_instances; i++) {
00229 buf_pool_t* buf_pool;
00230
00231 buf_pool = buf_pool_from_array(i);
00232
00233 buf_flush_list_mutex_enter(buf_pool);
00234
00235 #ifdef UNIV_DEBUG_VALGRIND
00236 {
00237 ulint zip_size = buf_block_get_zip_size(block);
00238
00239 if (UNIV_UNLIKELY(zip_size)) {
00240 UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
00241 } else {
00242 UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
00243 }
00244 }
00245 #endif
00246 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
00247 ut_a(buf_flush_validate_low(buf_pool));
00248 #endif
00249
00250 rbt_free(buf_pool->flush_rbt);
00251 buf_pool->flush_rbt = NULL;
00252
00253 buf_flush_list_mutex_exit(buf_pool);
00254 }
00255 }
00256
00257
00259 UNIV_INTERN
00260 void
00261 buf_flush_insert_into_flush_list(
00262
00263 buf_pool_t* buf_pool,
00264 buf_block_t* block,
00265 ib_uint64_t lsn)
00266 {
00267 ut_ad(!buf_pool_mutex_own(buf_pool));
00268 ut_ad(log_flush_order_mutex_own());
00269 ut_ad(mutex_own(&block->mutex));
00270
00271 buf_flush_list_mutex_enter(buf_pool);
00272
00273 ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
00274 || (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
00275 <= lsn));
00276
00277
00278
00279 if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
00280 buf_flush_list_mutex_exit(buf_pool);
00281 buf_flush_insert_sorted_into_flush_list(buf_pool, block, lsn);
00282 return;
00283 }
00284
00285 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
00286 ut_ad(!block->page.in_flush_list);
00287
00288 ut_d(block->page.in_flush_list = TRUE);
00289 block->page.oldest_modification = lsn;
00290 UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
00291
00292 #ifdef UNIV_DEBUG_VALGRIND
00293 {
00294 ulint zip_size = buf_block_get_zip_size(block);
00295
00296 if (UNIV_UNLIKELY(zip_size)) {
00297 UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
00298 } else {
00299 UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
00300 }
00301 }
00302 #endif
00303 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
00304 ut_a(buf_flush_validate_low(buf_pool));
00305 #endif
00306
00307 buf_flush_list_mutex_exit(buf_pool);
00308 }
00309
00310
00314 UNIV_INTERN
00315 void
00316 buf_flush_insert_sorted_into_flush_list(
00317
00318 buf_pool_t* buf_pool,
00319 buf_block_t* block,
00320 ib_uint64_t lsn)
00321 {
00322 buf_page_t* prev_b;
00323 buf_page_t* b;
00324
00325 ut_ad(!buf_pool_mutex_own(buf_pool));
00326 ut_ad(log_flush_order_mutex_own());
00327 ut_ad(mutex_own(&block->mutex));
00328 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
00329
00330 buf_flush_list_mutex_enter(buf_pool);
00331
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342 ut_ad(block->page.in_LRU_list);
00343 ut_ad(block->page.in_page_hash);
00344
00345
00346 ut_ad(!block->page.in_zip_hash);
00347
00348 ut_ad(!block->page.in_flush_list);
00349 ut_d(block->page.in_flush_list = TRUE);
00350 block->page.oldest_modification = lsn;
00351
00352 #ifdef UNIV_DEBUG_VALGRIND
00353 {
00354 ulint zip_size = buf_block_get_zip_size(block);
00355
00356 if (UNIV_UNLIKELY(zip_size)) {
00357 UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
00358 } else {
00359 UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
00360 }
00361 }
00362 #endif
00363
00364 #ifdef UNIV_DEBUG_VALGRIND
00365 {
00366 ulint zip_size = buf_block_get_zip_size(block);
00367
00368 if (UNIV_UNLIKELY(zip_size)) {
00369 UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
00370 } else {
00371 UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
00372 }
00373 }
00374 #endif
00375
00376 prev_b = NULL;
00377
00378
00379
00380
00381
00382
00383
00384 if (buf_pool->flush_rbt) {
00385
00386 prev_b = buf_flush_insert_in_flush_rbt(&block->page);
00387
00388 } else {
00389
00390 b = UT_LIST_GET_FIRST(buf_pool->flush_list);
00391
00392 while (b && b->oldest_modification
00393 > block->page.oldest_modification) {
00394 ut_ad(b->in_flush_list);
00395 prev_b = b;
00396 b = UT_LIST_GET_NEXT(list, b);
00397 }
00398 }
00399
00400 if (prev_b == NULL) {
00401 UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
00402 } else {
00403 UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
00404 prev_b, &block->page);
00405 }
00406
00407 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
00408 ut_a(buf_flush_validate_low(buf_pool));
00409 #endif
00410
00411 buf_flush_list_mutex_exit(buf_pool);
00412 }
00413
00414
00418 UNIV_INTERN
00419 ibool
00420 buf_flush_ready_for_replace(
00421
00422 buf_page_t* bpage)
00424 {
00425 #ifdef UNIV_DEBUG
00426 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
00427 ut_ad(buf_pool_mutex_own(buf_pool));
00428 #endif
00429 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
00430 ut_ad(bpage->in_LRU_list);
00431
00432 if (UNIV_LIKELY(buf_page_in_file(bpage))) {
00433
00434 return(bpage->oldest_modification == 0
00435 && buf_page_get_io_fix(bpage) == BUF_IO_NONE
00436 && bpage->buf_fix_count == 0);
00437 }
00438
00439 ut_print_timestamp(stderr);
00440 fprintf(stderr,
00441 " InnoDB: Error: buffer block state %lu"
00442 " in the LRU list!\n",
00443 (ulong) buf_page_get_state(bpage));
00444 ut_print_buf(stderr, bpage, sizeof(buf_page_t));
00445 putc('\n', stderr);
00446
00447 return(FALSE);
00448 }
00449
00450
00453 UNIV_INLINE
00454 ibool
00455 buf_flush_ready_for_flush(
00456
00457 buf_page_t* bpage,
00459 enum buf_flush flush_type)
00460 {
00461 #ifdef UNIV_DEBUG
00462 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
00463 ut_ad(buf_pool_mutex_own(buf_pool));
00464 #endif
00465 ut_a(buf_page_in_file(bpage));
00466 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
00467 ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
00468
00469 if (bpage->oldest_modification != 0
00470 && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
00471 ut_ad(bpage->in_flush_list);
00472
00473 if (flush_type != BUF_FLUSH_LRU) {
00474
00475 return(TRUE);
00476
00477 } else if (bpage->buf_fix_count == 0) {
00478
00479
00480
00481
00482
00483 return(TRUE);
00484 }
00485 }
00486
00487 return(FALSE);
00488 }
00489
00490
00492 UNIV_INTERN
00493 void
00494 buf_flush_remove(
00495
00496 buf_page_t* bpage)
00497 {
00498 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
00499
00500 ut_ad(buf_pool_mutex_own(buf_pool));
00501 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
00502 ut_ad(bpage->in_flush_list);
00503
00504 buf_flush_list_mutex_enter(buf_pool);
00505
00506 switch (buf_page_get_state(bpage)) {
00507 case BUF_BLOCK_ZIP_PAGE:
00508
00509 case BUF_BLOCK_ZIP_FREE:
00510 case BUF_BLOCK_NOT_USED:
00511 case BUF_BLOCK_READY_FOR_USE:
00512 case BUF_BLOCK_MEMORY:
00513 case BUF_BLOCK_REMOVE_HASH:
00514 ut_error;
00515 return;
00516 case BUF_BLOCK_ZIP_DIRTY:
00517 buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
00518 UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
00519 buf_LRU_insert_zip_clean(bpage);
00520 break;
00521 case BUF_BLOCK_FILE_PAGE:
00522 UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
00523 break;
00524 }
00525
00526
00527 if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
00528 buf_flush_delete_from_flush_rbt(bpage);
00529 }
00530
00531
00532
00533 ut_d(bpage->in_flush_list = FALSE);
00534
00535 bpage->oldest_modification = 0;
00536
00537 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
00538 ut_a(buf_flush_validate_low(buf_pool));
00539 #endif
00540
00541 buf_flush_list_mutex_exit(buf_pool);
00542 }
00543
00544
00555 UNIV_INTERN
00556 void
00557 buf_flush_relocate_on_flush_list(
00558
00559 buf_page_t* bpage,
00560 buf_page_t* dpage)
00561 {
00562 buf_page_t* prev;
00563 buf_page_t* prev_b = NULL;
00564 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
00565
00566 ut_ad(buf_pool_mutex_own(buf_pool));
00567
00568 ut_ad(buf_pool == buf_pool_from_bpage(dpage));
00569
00570 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
00571
00572 buf_flush_list_mutex_enter(buf_pool);
00573
00574
00575
00576
00577
00578
00579
00580
00581 ut_ad(bpage->in_flush_list);
00582 ut_ad(dpage->in_flush_list);
00583
00584
00585
00586 if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
00587 buf_flush_delete_from_flush_rbt(bpage);
00588 prev_b = buf_flush_insert_in_flush_rbt(dpage);
00589 }
00590
00591
00592
00593 ut_d(bpage->in_flush_list = FALSE);
00594
00595 prev = UT_LIST_GET_PREV(list, bpage);
00596 UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
00597
00598 if (prev) {
00599 ut_ad(prev->in_flush_list);
00600 UT_LIST_INSERT_AFTER(
00601 list,
00602 buf_pool->flush_list,
00603 prev, dpage);
00604 } else {
00605 UT_LIST_ADD_FIRST(
00606 list,
00607 buf_pool->flush_list,
00608 dpage);
00609 }
00610
00611
00612
00613 ut_a(!buf_pool->flush_rbt || prev_b == prev);
00614
00615 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
00616 ut_a(buf_flush_validate_low(buf_pool));
00617 #endif
00618
00619 buf_flush_list_mutex_exit(buf_pool);
00620 }
00621
00622
00624 UNIV_INTERN
00625 void
00626 buf_flush_write_complete(
00627
00628 buf_page_t* bpage)
00629 {
00630 enum buf_flush flush_type;
00631 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
00632
00633 ut_ad(bpage);
00634
00635 buf_flush_remove(bpage);
00636
00637 flush_type = buf_page_get_flush_type(bpage);
00638 buf_pool->n_flush[flush_type]--;
00639
00640 if (flush_type == BUF_FLUSH_LRU) {
00641
00642
00643
00644 buf_LRU_make_block_old(bpage);
00645
00646 buf_pool->LRU_flush_ended++;
00647 }
00648
00649
00650
00651
00652 if (buf_pool->n_flush[flush_type] == 0
00653 && buf_pool->init_flush[flush_type] == FALSE) {
00654
00655
00656
00657 os_event_set(buf_pool->no_flush[flush_type]);
00658 }
00659 }
00660
00661
00664 static
00665 void
00666 buf_flush_sync_datafiles(void)
00667
00668 {
00669
00670
00671 os_aio_simulated_wake_handler_threads();
00672
00673
00674
00675 os_aio_wait_until_no_pending_writes();
00676
00677
00678 fil_flush_file_spaces(FIL_TABLESPACE);
00679
00680 return;
00681 }
00682
00683
00689 static
00690 void
00691 buf_flush_buffered_writes(void)
00692
00693 {
00694 byte* write_buf;
00695 ulint len;
00696 ulint len2;
00697 ulint i;
00698
00699 if (!srv_use_doublewrite_buf || trx_doublewrite == NULL) {
00700
00701 buf_flush_sync_datafiles();
00702 return;
00703 }
00704
00705 mutex_enter(&(trx_doublewrite->mutex));
00706
00707
00708
00709
00710
00711 if (trx_doublewrite->first_free == 0) {
00712
00713 mutex_exit(&(trx_doublewrite->mutex));
00714
00715 return;
00716 }
00717
00718 for (i = 0; i < trx_doublewrite->first_free; i++) {
00719
00720 const buf_block_t* block;
00721
00722 block = (buf_block_t*) trx_doublewrite->buf_block_arr[i];
00723
00724 if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE
00725 || block->page.zip.data) {
00726
00727 continue;
00728 }
00729
00730 if (UNIV_UNLIKELY
00731 (memcmp(block->frame + (FIL_PAGE_LSN + 4),
00732 block->frame + (UNIV_PAGE_SIZE
00733 - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
00734 4))) {
00735 ut_print_timestamp(stderr);
00736 fprintf(stderr,
00737 " InnoDB: ERROR: The page to be written"
00738 " seems corrupt!\n"
00739 "InnoDB: The lsn fields do not match!"
00740 " Noticed in the buffer pool\n"
00741 "InnoDB: before posting to the"
00742 " doublewrite buffer.\n");
00743 }
00744
00745 if (!block->check_index_page_at_flush) {
00746 } else if (page_is_comp(block->frame)) {
00747 if (UNIV_UNLIKELY
00748 (!page_simple_validate_new(block->frame))) {
00749 corrupted_page:
00750 buf_page_print(block->frame, 0);
00751
00752 ut_print_timestamp(stderr);
00753 fprintf(stderr,
00754 " InnoDB: Apparent corruption of an"
00755 " index page n:o %lu in space %lu\n"
00756 "InnoDB: to be written to data file."
00757 " We intentionally crash server\n"
00758 "InnoDB: to prevent corrupt data"
00759 " from ending up in data\n"
00760 "InnoDB: files.\n",
00761 (ulong) buf_block_get_page_no(block),
00762 (ulong) buf_block_get_space(block));
00763
00764 ut_error;
00765 }
00766 } else if (UNIV_UNLIKELY
00767 (!page_simple_validate_old(block->frame))) {
00768
00769 goto corrupted_page;
00770 }
00771 }
00772
00773
00774 srv_dblwr_pages_written+= trx_doublewrite->first_free;
00775 srv_dblwr_writes++;
00776
00777 len = ut_min(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE,
00778 trx_doublewrite->first_free) * UNIV_PAGE_SIZE;
00779
00780 write_buf = trx_doublewrite->write_buf;
00781 i = 0;
00782
00783 fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
00784 trx_doublewrite->block1, 0, len,
00785 (void*) write_buf, NULL);
00786
00787 for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
00788 len2 += UNIV_PAGE_SIZE, i++) {
00789 const buf_block_t* block = (buf_block_t*)
00790 trx_doublewrite->buf_block_arr[i];
00791
00792 if (UNIV_LIKELY(!block->page.zip.data)
00793 && UNIV_LIKELY(buf_block_get_state(block)
00794 == BUF_BLOCK_FILE_PAGE)
00795 && UNIV_UNLIKELY
00796 (memcmp(write_buf + len2 + (FIL_PAGE_LSN + 4),
00797 write_buf + len2
00798 + (UNIV_PAGE_SIZE
00799 - FIL_PAGE_END_LSN_OLD_CHKSUM + 4), 4))) {
00800 ut_print_timestamp(stderr);
00801 fprintf(stderr,
00802 " InnoDB: ERROR: The page to be written"
00803 " seems corrupt!\n"
00804 "InnoDB: The lsn fields do not match!"
00805 " Noticed in the doublewrite block1.\n");
00806 }
00807 }
00808
00809 if (trx_doublewrite->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
00810 goto flush;
00811 }
00812
00813 len = (trx_doublewrite->first_free - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
00814 * UNIV_PAGE_SIZE;
00815
00816 write_buf = trx_doublewrite->write_buf
00817 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
00818 ut_ad(i == TRX_SYS_DOUBLEWRITE_BLOCK_SIZE);
00819
00820 fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
00821 trx_doublewrite->block2, 0, len,
00822 (void*) write_buf, NULL);
00823
00824 for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
00825 len2 += UNIV_PAGE_SIZE, i++) {
00826 const buf_block_t* block = (buf_block_t*)
00827 trx_doublewrite->buf_block_arr[i];
00828
00829 if (UNIV_LIKELY(!block->page.zip.data)
00830 && UNIV_LIKELY(buf_block_get_state(block)
00831 == BUF_BLOCK_FILE_PAGE)
00832 && UNIV_UNLIKELY
00833 (memcmp(write_buf + len2 + (FIL_PAGE_LSN + 4),
00834 write_buf + len2
00835 + (UNIV_PAGE_SIZE
00836 - FIL_PAGE_END_LSN_OLD_CHKSUM + 4), 4))) {
00837 ut_print_timestamp(stderr);
00838 fprintf(stderr,
00839 " InnoDB: ERROR: The page to be"
00840 " written seems corrupt!\n"
00841 "InnoDB: The lsn fields do not match!"
00842 " Noticed in"
00843 " the doublewrite block2.\n");
00844 }
00845 }
00846
00847 flush:
00848
00849
00850 fil_flush(TRX_SYS_SPACE);
00851
00852
00853
00854
00855
00856 for (i = 0; i < trx_doublewrite->first_free; i++) {
00857 const buf_block_t* block = (buf_block_t*)
00858 trx_doublewrite->buf_block_arr[i];
00859
00860 ut_a(buf_page_in_file(&block->page));
00861 if (UNIV_LIKELY_NULL(block->page.zip.data)) {
00862 fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
00863 FALSE, buf_page_get_space(&block->page),
00864 buf_page_get_zip_size(&block->page),
00865 buf_page_get_page_no(&block->page), 0,
00866 buf_page_get_zip_size(&block->page),
00867 (void*)block->page.zip.data,
00868 (void*)block);
00869
00870
00871
00872 buf_LRU_stat_inc_io();
00873
00874 continue;
00875 }
00876
00877 ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
00878
00879 if (UNIV_UNLIKELY(memcmp(block->frame + (FIL_PAGE_LSN + 4),
00880 block->frame
00881 + (UNIV_PAGE_SIZE
00882 - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
00883 4))) {
00884 ut_print_timestamp(stderr);
00885 fprintf(stderr,
00886 " InnoDB: ERROR: The page to be written"
00887 " seems corrupt!\n"
00888 "InnoDB: The lsn fields do not match!"
00889 " Noticed in the buffer pool\n"
00890 "InnoDB: after posting and flushing"
00891 " the doublewrite buffer.\n"
00892 "InnoDB: Page buf fix count %lu,"
00893 " io fix %lu, state %lu\n",
00894 (ulong)block->page.buf_fix_count,
00895 (ulong)buf_block_get_io_fix(block),
00896 (ulong)buf_block_get_state(block));
00897 }
00898
00899 fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
00900 FALSE, buf_block_get_space(block), 0,
00901 buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE,
00902 (void*)block->frame, (void*)block);
00903
00904
00905
00906 buf_LRU_stat_inc_io();
00907 }
00908
00909
00910 buf_flush_sync_datafiles();
00911
00912
00913 trx_doublewrite->first_free = 0;
00914
00915 mutex_exit(&(trx_doublewrite->mutex));
00916 }
00917
00918
00922 static
00923 void
00924 buf_flush_post_to_doublewrite_buf(
00925
00926 buf_page_t* bpage)
00927 {
00928 ulint zip_size;
00929 try_again:
00930 mutex_enter(&(trx_doublewrite->mutex));
00931
00932 ut_a(buf_page_in_file(bpage));
00933
00934 if (trx_doublewrite->first_free
00935 >= 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
00936 mutex_exit(&(trx_doublewrite->mutex));
00937
00938 buf_flush_buffered_writes();
00939
00940 goto try_again;
00941 }
00942
00943 zip_size = buf_page_get_zip_size(bpage);
00944
00945 if (UNIV_UNLIKELY(zip_size)) {
00946 UNIV_MEM_ASSERT_RW(bpage->zip.data, zip_size);
00947
00948 memcpy(trx_doublewrite->write_buf
00949 + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
00950 bpage->zip.data, zip_size);
00951 memset(trx_doublewrite->write_buf
00952 + UNIV_PAGE_SIZE * trx_doublewrite->first_free
00953 + zip_size, 0, UNIV_PAGE_SIZE - zip_size);
00954 } else {
00955 ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
00956 UNIV_MEM_ASSERT_RW(((buf_block_t*) bpage)->frame,
00957 UNIV_PAGE_SIZE);
00958
00959 memcpy(trx_doublewrite->write_buf
00960 + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
00961 ((buf_block_t*) bpage)->frame, UNIV_PAGE_SIZE);
00962 }
00963
00964 trx_doublewrite->buf_block_arr[trx_doublewrite->first_free] = bpage;
00965
00966 trx_doublewrite->first_free++;
00967
00968 if (trx_doublewrite->first_free
00969 >= 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
00970 mutex_exit(&(trx_doublewrite->mutex));
00971
00972 buf_flush_buffered_writes();
00973
00974 return;
00975 }
00976
00977 mutex_exit(&(trx_doublewrite->mutex));
00978 }
00979 #endif
00980
00981
00983 UNIV_INTERN
00984 void
00985 buf_flush_init_for_writing(
00986
00987 byte* page,
00988 void* page_zip_,
00989 ib_uint64_t newest_lsn)
00991 {
00992 ut_ad(page);
00993
00994 if (page_zip_) {
00995 page_zip_des_t* page_zip = static_cast<page_zip_des_t *>(page_zip_);
00996 ulint zip_size = page_zip_get_size(page_zip);
00997 ut_ad(zip_size);
00998 ut_ad(ut_is_2pow(zip_size));
00999 ut_ad(zip_size <= UNIV_PAGE_SIZE);
01000
01001 switch (UNIV_EXPECT(fil_page_get_type(page), FIL_PAGE_INDEX)) {
01002 case FIL_PAGE_TYPE_ALLOCATED:
01003 case FIL_PAGE_INODE:
01004 case FIL_PAGE_IBUF_BITMAP:
01005 case FIL_PAGE_TYPE_FSP_HDR:
01006 case FIL_PAGE_TYPE_XDES:
01007
01008 memcpy(page_zip->data, page, zip_size);
01009
01010 case FIL_PAGE_TYPE_ZBLOB:
01011 case FIL_PAGE_TYPE_ZBLOB2:
01012 case FIL_PAGE_INDEX:
01013 mach_write_to_8(page_zip->data
01014 + FIL_PAGE_LSN, newest_lsn);
01015 memset(page_zip->data + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
01016 mach_write_to_4(page_zip->data
01017 + FIL_PAGE_SPACE_OR_CHKSUM,
01018 srv_use_checksums
01019 ? page_zip_calc_checksum(
01020 page_zip->data, zip_size)
01021 : BUF_NO_CHECKSUM_MAGIC);
01022 return;
01023 }
01024
01025 ut_print_timestamp(stderr);
01026 fputs(" InnoDB: ERROR: The compressed page to be written"
01027 " seems corrupt:", stderr);
01028 ut_print_buf(stderr, page, zip_size);
01029 fputs("\nInnoDB: Possibly older version of the page:", stderr);
01030 ut_print_buf(stderr, page_zip->data, zip_size);
01031 putc('\n', stderr);
01032 ut_error;
01033 }
01034
01035
01036 mach_write_to_8(page + FIL_PAGE_LSN, newest_lsn);
01037
01038 mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
01039 newest_lsn);
01040
01041
01042
01043 mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
01044 srv_use_checksums
01045 ? buf_calc_page_new_checksum(page)
01046 : BUF_NO_CHECKSUM_MAGIC);
01047
01048
01049
01050
01051
01052
01053 mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
01054 srv_use_checksums
01055 ? buf_calc_page_old_checksum(page)
01056 : BUF_NO_CHECKSUM_MAGIC);
01057 }
01058
01059 #ifndef UNIV_HOTBACKUP
01060
01064 static
01065 void
01066 buf_flush_write_block_low(
01067
01068 buf_page_t* bpage)
01069 {
01070 ulint zip_size = buf_page_get_zip_size(bpage);
01071 page_t* frame = NULL;
01072
01073 #ifdef UNIV_DEBUG
01074 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
01075 ut_ad(!buf_pool_mutex_own(buf_pool));
01076 #endif
01077
01078 #ifdef UNIV_LOG_DEBUG
01079 static ibool univ_log_debug_warned;
01080 #endif
01081
01082 ut_ad(buf_page_in_file(bpage));
01083
01084
01085
01086
01087
01088
01089 ut_ad(!buf_pool_mutex_own(buf_pool));
01090 ut_ad(!buf_flush_list_mutex_own(buf_pool));
01091 ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
01092 ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
01093 ut_ad(bpage->oldest_modification != 0);
01094
01095 #ifdef UNIV_IBUF_COUNT_DEBUG
01096 ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
01097 #endif
01098 ut_ad(bpage->newest_modification != 0);
01099
01100 #ifdef UNIV_LOG_DEBUG
01101 if (!univ_log_debug_warned) {
01102 univ_log_debug_warned = TRUE;
01103 fputs("Warning: cannot force log to disk if"
01104 " UNIV_LOG_DEBUG is defined!\n"
01105 "Crash recovery will not work!\n",
01106 stderr);
01107 }
01108 #else
01109
01110 log_write_up_to(bpage->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE);
01111 #endif
01112 switch (buf_page_get_state(bpage)) {
01113 case BUF_BLOCK_ZIP_FREE:
01114 case BUF_BLOCK_ZIP_PAGE:
01115 case BUF_BLOCK_NOT_USED:
01116 case BUF_BLOCK_READY_FOR_USE:
01117 case BUF_BLOCK_MEMORY:
01118 case BUF_BLOCK_REMOVE_HASH:
01119 ut_error;
01120 break;
01121 case BUF_BLOCK_ZIP_DIRTY:
01122 frame = bpage->zip.data;
01123 if (UNIV_LIKELY(srv_use_checksums)) {
01124 ut_a(mach_read_from_4(frame + FIL_PAGE_SPACE_OR_CHKSUM)
01125 == page_zip_calc_checksum(frame, zip_size));
01126 }
01127 mach_write_to_8(frame + FIL_PAGE_LSN,
01128 bpage->newest_modification);
01129 memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
01130 break;
01131 case BUF_BLOCK_FILE_PAGE:
01132 frame = bpage->zip.data;
01133 if (!frame) {
01134 frame = ((buf_block_t*) bpage)->frame;
01135 }
01136
01137 buf_flush_init_for_writing(((buf_block_t*) bpage)->frame,
01138 bpage->zip.data
01139 ? &bpage->zip : NULL,
01140 bpage->newest_modification);
01141 break;
01142 }
01143
01144 if (!srv_use_doublewrite_buf || !trx_doublewrite) {
01145 fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
01146 FALSE, buf_page_get_space(bpage), zip_size,
01147 buf_page_get_page_no(bpage), 0,
01148 zip_size ? zip_size : UNIV_PAGE_SIZE,
01149 frame, bpage);
01150 } else {
01151 buf_flush_post_to_doublewrite_buf(bpage);
01152 }
01153 }
01154
01155 # if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
01156
01162 UNIV_INTERN
01163 ibool
01164 buf_flush_page_try(
01165
01166 buf_pool_t* buf_pool,
01167 buf_block_t* block)
01168 {
01169 ut_ad(buf_pool_mutex_own(buf_pool));
01170 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
01171 ut_ad(mutex_own(&block->mutex));
01172
01173 if (!buf_flush_ready_for_flush(&block->page, BUF_FLUSH_LRU)) {
01174 return(FALSE);
01175 }
01176
01177 if (buf_pool->n_flush[BUF_FLUSH_LRU] > 0
01178 || buf_pool->init_flush[BUF_FLUSH_LRU]) {
01179
01180 return(FALSE);
01181 }
01182
01183 buf_pool->init_flush[BUF_FLUSH_LRU] = TRUE;
01184
01185 buf_page_set_io_fix(&block->page, BUF_IO_WRITE);
01186
01187 buf_page_set_flush_type(&block->page, BUF_FLUSH_LRU);
01188
01189 if (buf_pool->n_flush[BUF_FLUSH_LRU]++ == 0) {
01190
01191 os_event_reset(buf_pool->no_flush[BUF_FLUSH_LRU]);
01192 }
01193
01194
01195
01196
01197
01198
01199
01200
01201 rw_lock_s_lock_gen(&block->lock, BUF_IO_WRITE);
01202
01203
01204
01205
01206
01207 mutex_exit(&block->mutex);
01208 buf_pool_mutex_exit(buf_pool);
01209
01210
01211
01212
01213
01214
01215 buf_flush_write_block_low(&block->page);
01216
01217 buf_pool_mutex_enter(buf_pool);
01218 buf_pool->init_flush[BUF_FLUSH_LRU] = FALSE;
01219
01220 if (buf_pool->n_flush[BUF_FLUSH_LRU] == 0) {
01221
01222 os_event_set(buf_pool->no_flush[BUF_FLUSH_LRU]);
01223 }
01224
01225 buf_pool_mutex_exit(buf_pool);
01226 buf_flush_buffered_writes();
01227
01228 return(TRUE);
01229 }
01230 # endif
01231
01232
01239 static
01240 void
01241 buf_flush_page(
01242
01243 buf_pool_t* buf_pool,
01244 buf_page_t* bpage,
01245 enum buf_flush flush_type)
01247 {
01248 mutex_t* block_mutex;
01249 ibool is_uncompressed;
01250
01251 ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
01252 ut_ad(buf_pool_mutex_own(buf_pool));
01253 ut_ad(buf_page_in_file(bpage));
01254
01255 block_mutex = buf_page_get_mutex(bpage);
01256 ut_ad(mutex_own(block_mutex));
01257
01258 ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
01259
01260 buf_page_set_io_fix(bpage, BUF_IO_WRITE);
01261
01262 buf_page_set_flush_type(bpage, flush_type);
01263
01264 if (buf_pool->n_flush[flush_type] == 0) {
01265
01266 os_event_reset(buf_pool->no_flush[flush_type]);
01267 }
01268
01269 buf_pool->n_flush[flush_type]++;
01270
01271 is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
01272 ut_ad(is_uncompressed == (block_mutex != &buf_pool->zip_mutex));
01273
01274 switch (flush_type) {
01275 ibool is_s_latched;
01276 case BUF_FLUSH_LIST:
01277
01278
01279
01280
01281 is_s_latched = (bpage->buf_fix_count == 0);
01282 if (is_s_latched && is_uncompressed) {
01283 rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
01284 BUF_IO_WRITE);
01285 }
01286
01287 mutex_exit(block_mutex);
01288 buf_pool_mutex_exit(buf_pool);
01289
01290
01291
01292
01293
01294
01295
01296 if (!is_s_latched) {
01297 buf_flush_buffered_writes();
01298
01299 if (is_uncompressed) {
01300 rw_lock_s_lock_gen(&((buf_block_t*) bpage)
01301 ->lock, BUF_IO_WRITE);
01302 }
01303 }
01304
01305 break;
01306
01307 case BUF_FLUSH_LRU:
01308
01309
01310
01311
01312
01313
01314
01315 if (is_uncompressed) {
01316 rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
01317 BUF_IO_WRITE);
01318 }
01319
01320
01321
01322
01323
01324 mutex_exit(block_mutex);
01325 buf_pool_mutex_exit(buf_pool);
01326 break;
01327
01328 default:
01329 ut_error;
01330 }
01331
01332
01333
01334
01335
01336
01337 #ifdef UNIV_DEBUG
01338 if (buf_debug_prints) {
01339 fprintf(stderr,
01340 "Flushing %u space %u page %u\n",
01341 flush_type, bpage->space, bpage->offset);
01342 }
01343 #endif
01344 buf_flush_write_block_low(bpage);
01345 }
01346
01347
01350 static
01351 ulint
01352 buf_flush_try_neighbors(
01353
01354 ulint space,
01355 ulint offset,
01356 enum buf_flush flush_type,
01358 ulint n_flushed,
01360 ulint n_to_flush)
01362 {
01363 ulint i;
01364 ulint low;
01365 ulint high;
01366 ulint count = 0;
01367 buf_pool_t* buf_pool = buf_pool_get(space, offset);
01368
01369 ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
01370
01371 if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
01372
01373
01374
01375 low = offset;
01376 high = offset + 1;
01377 } else {
01378
01379
01380
01381
01382 ulint buf_flush_area;
01383
01384 buf_flush_area = ut_min(
01385 BUF_READ_AHEAD_AREA(buf_pool),
01386 buf_pool->curr_size / 16);
01387
01388 low = (offset / buf_flush_area) * buf_flush_area;
01389 high = (offset / buf_flush_area + 1) * buf_flush_area;
01390 }
01391
01392
01393
01394 if (high > fil_space_get_size(space)) {
01395 high = fil_space_get_size(space);
01396 }
01397
01398 for (i = low; i < high; i++) {
01399
01400 buf_page_t* bpage;
01401
01402 if ((count + n_flushed) >= n_to_flush) {
01403
01404
01405
01406
01407
01408
01409
01410 if (i <= offset) {
01411 i = offset;
01412 } else {
01413 break;
01414 }
01415 }
01416
01417 buf_pool = buf_pool_get(space, i);
01418
01419 buf_pool_mutex_enter(buf_pool);
01420
01421
01422 bpage = buf_page_hash_get(buf_pool, space, i);
01423
01424 if (!bpage) {
01425
01426 buf_pool_mutex_exit(buf_pool);
01427 continue;
01428 }
01429
01430 ut_a(buf_page_in_file(bpage));
01431
01432
01433
01434
01435 if (flush_type != BUF_FLUSH_LRU
01436 || i == offset
01437 || buf_page_is_old(bpage)) {
01438 mutex_t* block_mutex = buf_page_get_mutex(bpage);
01439
01440 mutex_enter(block_mutex);
01441
01442 if (buf_flush_ready_for_flush(bpage, flush_type)
01443 && (i == offset || !bpage->buf_fix_count)) {
01444
01445
01446
01447
01448
01449
01450
01451
01452
01453 buf_flush_page(buf_pool, bpage, flush_type);
01454 ut_ad(!mutex_own(block_mutex));
01455 ut_ad(!buf_pool_mutex_own(buf_pool));
01456 count++;
01457 continue;
01458 } else {
01459 mutex_exit(block_mutex);
01460 }
01461 }
01462 buf_pool_mutex_exit(buf_pool);
01463 }
01464
01465 return(count);
01466 }
01467
01468
01475 static
01476 ibool
01477 buf_flush_page_and_try_neighbors(
01478
01479 buf_page_t* bpage,
01482 enum buf_flush flush_type,
01484 ulint n_to_flush,
01486 ulint* count)
01488 {
01489 mutex_t* block_mutex;
01490 ibool flushed = FALSE;
01491 #ifdef UNIV_DEBUG
01492 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
01493 #endif
01494
01495 ut_ad(buf_pool_mutex_own(buf_pool));
01496
01497 block_mutex = buf_page_get_mutex(bpage);
01498 mutex_enter(block_mutex);
01499
01500 ut_a(buf_page_in_file(bpage));
01501
01502 if (buf_flush_ready_for_flush(bpage, flush_type)) {
01503 ulint space;
01504 ulint offset;
01505 buf_pool_t* buf_pool;
01506
01507 buf_pool = buf_pool_from_bpage(bpage);
01508
01509 buf_pool_mutex_exit(buf_pool);
01510
01511
01512
01513 space = buf_page_get_space(bpage);
01514 offset = buf_page_get_page_no(bpage);
01515
01516 mutex_exit(block_mutex);
01517
01518
01519 *count += buf_flush_try_neighbors(space,
01520 offset,
01521 flush_type,
01522 *count,
01523 n_to_flush);
01524
01525 buf_pool_mutex_enter(buf_pool);
01526 flushed = TRUE;
01527 } else {
01528 mutex_exit(block_mutex);
01529 }
01530
01531 ut_ad(buf_pool_mutex_own(buf_pool));
01532
01533 return(flushed);
01534 }
01535
01536
01542 static
01543 ulint
01544 buf_flush_LRU_list_batch(
01545
01546 buf_pool_t* buf_pool,
01547 ulint max)
01548 {
01549 buf_page_t* bpage;
01550 ulint count = 0;
01551
01552 ut_ad(buf_pool_mutex_own(buf_pool));
01553
01554 do {
01555
01556
01557 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
01558
01559
01560
01561 while (bpage != NULL
01562 && !buf_flush_page_and_try_neighbors(
01563 bpage, BUF_FLUSH_LRU, max, &count)) {
01564
01565 bpage = UT_LIST_GET_PREV(LRU, bpage);
01566 }
01567 } while (bpage != NULL && count < max);
01568
01569
01570
01571
01572 buf_lru_flush_page_count += count;
01573
01574 ut_ad(buf_pool_mutex_own(buf_pool));
01575
01576 return(count);
01577 }
01578
01579
01585 static
01586 ulint
01587 buf_flush_flush_list_batch(
01588
01589 buf_pool_t* buf_pool,
01590 ulint min_n,
01594 ib_uint64_t lsn_limit)
01599 {
01600 ulint len;
01601 buf_page_t* bpage;
01602 ulint count = 0;
01603
01604 ut_ad(buf_pool_mutex_own(buf_pool));
01605
01606
01607 do {
01608
01609
01610
01611 buf_flush_list_mutex_enter(buf_pool);
01612
01613
01614
01615
01616
01617
01618 len = UT_LIST_GET_LEN(buf_pool->flush_list);
01619 bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
01620
01621 if (bpage) {
01622 ut_a(bpage->oldest_modification > 0);
01623 }
01624
01625 if (!bpage || bpage->oldest_modification >= lsn_limit) {
01626
01627
01628 buf_flush_list_mutex_exit(buf_pool);
01629 break;
01630 }
01631
01632 ut_a(bpage->oldest_modification > 0);
01633
01634 ut_ad(bpage->in_flush_list);
01635
01636 buf_flush_list_mutex_exit(buf_pool);
01637
01638
01639
01640
01641 while (bpage != NULL
01642 && len > 0
01643 && !buf_flush_page_and_try_neighbors(
01644 bpage, BUF_FLUSH_LIST, min_n, &count)) {
01645
01646 buf_flush_list_mutex_enter(buf_pool);
01647
01648
01649
01650
01651
01652
01653
01654
01655
01656
01657
01658
01659 if (bpage->oldest_modification == 0) {
01660 buf_flush_list_mutex_exit(buf_pool);
01661 break;
01662 }
01663
01664 bpage = UT_LIST_GET_PREV(list, bpage);
01665
01666 ut_ad(!bpage || bpage->in_flush_list);
01667
01668 buf_flush_list_mutex_exit(buf_pool);
01669
01670 --len;
01671 }
01672
01673 } while (count < min_n && bpage != NULL && len > 0);
01674
01675 ut_ad(buf_pool_mutex_own(buf_pool));
01676
01677 return(count);
01678 }
01679
01680
01688 static
01689 ulint
01690 buf_flush_batch(
01691
01692 buf_pool_t* buf_pool,
01693 enum buf_flush flush_type,
01697 ulint min_n,
01700 ib_uint64_t lsn_limit)
01705 {
01706 ulint count = 0;
01707
01708 ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
01709 #ifdef UNIV_SYNC_DEBUG
01710 ut_ad((flush_type != BUF_FLUSH_LIST)
01711 || sync_thread_levels_empty_gen(TRUE));
01712 #endif
01713
01714 buf_pool_mutex_enter(buf_pool);
01715
01716
01717
01718 switch(flush_type) {
01719 case BUF_FLUSH_LRU:
01720 count = buf_flush_LRU_list_batch(buf_pool, min_n);
01721 break;
01722 case BUF_FLUSH_LIST:
01723 count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
01724 break;
01725 default:
01726 ut_error;
01727 }
01728
01729 buf_pool_mutex_exit(buf_pool);
01730
01731 buf_flush_buffered_writes();
01732
01733 #ifdef UNIV_DEBUG
01734 if (buf_debug_prints && count > 0) {
01735 fprintf(stderr, flush_type == BUF_FLUSH_LRU
01736 ? "Flushed %lu pages in LRU flush\n"
01737 : "Flushed %lu pages in flush list flush\n",
01738 (ulong) count);
01739 }
01740 #endif
01741
01742 srv_buf_pool_flushed += count;
01743
01744 return(count);
01745 }
01746
01747
01749 static
01750 void
01751 buf_flush_common(
01752
01753 enum buf_flush flush_type,
01754 ulint page_count)
01755 {
01756 buf_flush_buffered_writes();
01757
01758 ut_a(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
01759
01760 #ifdef UNIV_DEBUG
01761 if (buf_debug_prints && page_count > 0) {
01762 fprintf(stderr, flush_type == BUF_FLUSH_LRU
01763 ? "Flushed %lu pages in LRU flush\n"
01764 : "Flushed %lu pages in flush list flush\n",
01765 (ulong) page_count);
01766 }
01767 #endif
01768
01769 srv_buf_pool_flushed += page_count;
01770
01771 if (flush_type == BUF_FLUSH_LRU) {
01772
01773
01774
01775 buf_lru_flush_page_count += page_count;
01776 }
01777 }
01778
01779
01781 static
01782 ibool
01783 buf_flush_start(
01784
01785 buf_pool_t* buf_pool,
01786 enum buf_flush flush_type)
01788 {
01789 buf_pool_mutex_enter(buf_pool);
01790
01791 if (buf_pool->n_flush[flush_type] > 0
01792 || buf_pool->init_flush[flush_type] == TRUE) {
01793
01794
01795
01796 buf_pool_mutex_exit(buf_pool);
01797
01798 return(FALSE);
01799 }
01800
01801 buf_pool->init_flush[flush_type] = TRUE;
01802
01803 buf_pool_mutex_exit(buf_pool);
01804
01805 return(TRUE);
01806 }
01807
01808
01810 static
01811 void
01812 buf_flush_end(
01813
01814 buf_pool_t* buf_pool,
01815 enum buf_flush flush_type)
01817 {
01818 buf_pool_mutex_enter(buf_pool);
01819
01820 buf_pool->init_flush[flush_type] = FALSE;
01821
01822 if (buf_pool->n_flush[flush_type] == 0) {
01823
01824
01825
01826 os_event_set(buf_pool->no_flush[flush_type]);
01827 }
01828
01829 buf_pool_mutex_exit(buf_pool);
01830 }
01831
01832
01834 UNIV_INTERN
01835 void
01836 buf_flush_wait_batch_end(
01837
01838 buf_pool_t* buf_pool,
01839 enum buf_flush type)
01841 {
01842 ut_ad(type == BUF_FLUSH_LRU || type == BUF_FLUSH_LIST);
01843
01844 if (buf_pool == NULL) {
01845 ulint i;
01846
01847 for (i = 0; i < srv_buf_pool_instances; ++i) {
01848 buf_pool_t* i_buf_pool = buf_pool_from_array(i);
01849
01850 os_event_wait(i_buf_pool->no_flush[type]);
01851 }
01852 } else {
01853 os_event_wait(buf_pool->no_flush[type]);
01854 }
01855 }
01856
01857
01864 UNIV_INTERN
01865 ulint
01866 buf_flush_LRU(
01867
01868 buf_pool_t* buf_pool,
01869 ulint min_n)
01872 {
01873 ulint page_count;
01874
01875 if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) {
01876 return(ULINT_UNDEFINED);
01877 }
01878
01879 page_count = buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0);
01880
01881 buf_flush_end(buf_pool, BUF_FLUSH_LRU);
01882
01883 buf_flush_common(BUF_FLUSH_LRU, page_count);
01884
01885 return(page_count);
01886 }
01887
01888
01894 UNIV_INTERN
01895 ulint
01896 buf_flush_list(
01897
01898 ulint min_n,
01901 ib_uint64_t lsn_limit)
01906 {
01907 ulint i;
01908 ulint total_page_count = 0;
01909 ibool skipped = FALSE;
01910
01911 if (min_n != ULINT_MAX) {
01912
01913
01914
01915
01916 min_n = (min_n + srv_buf_pool_instances - 1)
01917 / srv_buf_pool_instances;
01918 }
01919
01920
01921 for (i = 0; i < srv_buf_pool_instances; i++) {
01922 buf_pool_t* buf_pool;
01923 ulint page_count = 0;
01924
01925 buf_pool = buf_pool_from_array(i);
01926
01927 if (!buf_flush_start(buf_pool, BUF_FLUSH_LIST)) {
01928
01929
01930
01931
01932
01933
01934
01935
01936
01937
01938 skipped = TRUE;
01939
01940 continue;
01941 }
01942
01943 page_count = buf_flush_batch(
01944 buf_pool, BUF_FLUSH_LIST, min_n, lsn_limit);
01945
01946 buf_flush_end(buf_pool, BUF_FLUSH_LIST);
01947
01948 buf_flush_common(BUF_FLUSH_LIST, page_count);
01949
01950 total_page_count += page_count;
01951 }
01952
01953 return(lsn_limit != IB_ULONGLONG_MAX && skipped
01954 ? ULINT_UNDEFINED : total_page_count);
01955 }
01956
01957
01963 static
01964 ulint
01965 buf_flush_LRU_recommendation(
01966
01967 buf_pool_t* buf_pool)
01968 {
01969 buf_page_t* bpage;
01970 ulint n_replaceable;
01971 ulint distance = 0;
01972
01973 buf_pool_mutex_enter(buf_pool);
01974
01975 n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
01976
01977 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
01978
01979 while ((bpage != NULL)
01980 && (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)
01981 + BUF_FLUSH_EXTRA_MARGIN(buf_pool))
01982 && (distance < BUF_LRU_FREE_SEARCH_LEN(buf_pool))) {
01983
01984 mutex_t* block_mutex = buf_page_get_mutex(bpage);
01985
01986 mutex_enter(block_mutex);
01987
01988 if (buf_flush_ready_for_replace(bpage)) {
01989 n_replaceable++;
01990 }
01991
01992 mutex_exit(block_mutex);
01993
01994 distance++;
01995
01996 bpage = UT_LIST_GET_PREV(LRU, bpage);
01997 }
01998
01999 buf_pool_mutex_exit(buf_pool);
02000
02001 if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
02002
02003 return(0);
02004 }
02005
02006 return(BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)
02007 + BUF_FLUSH_EXTRA_MARGIN(buf_pool)
02008 - n_replaceable);
02009 }
02010
02011
02017 UNIV_INTERN
02018 void
02019 buf_flush_free_margin(
02020
02021 buf_pool_t* buf_pool)
02022 {
02023 ulint n_to_flush;
02024
02025 n_to_flush = buf_flush_LRU_recommendation(buf_pool);
02026
02027 if (n_to_flush > 0) {
02028 ulint n_flushed;
02029
02030 n_flushed = buf_flush_LRU(buf_pool, n_to_flush);
02031
02032 if (n_flushed == ULINT_UNDEFINED) {
02033
02034
02035
02036 buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
02037 }
02038 }
02039 }
02040
02041
02043 UNIV_INTERN
02044 void
02045 buf_flush_free_margins(void)
02046
02047 {
02048 ulint i;
02049
02050 for (i = 0; i < srv_buf_pool_instances; i++) {
02051 buf_pool_t* buf_pool;
02052
02053 buf_pool = buf_pool_from_array(i);
02054
02055 buf_flush_free_margin(buf_pool);
02056 }
02057 }
02058
02059
02060
02061
02062
02063
02064 UNIV_INTERN
02065 void
02066 buf_flush_stat_update(void)
02067
02068 {
02069 buf_flush_stat_t* item;
02070 ib_uint64_t lsn_diff;
02071 ib_uint64_t lsn;
02072 ulint n_flushed;
02073
02074 lsn = log_get_lsn();
02075 if (buf_flush_stat_cur.redo == 0) {
02076
02077
02078 buf_flush_stat_cur.redo = lsn;
02079 return;
02080 }
02081
02082 item = &buf_flush_stat_arr[buf_flush_stat_arr_ind];
02083
02084
02085 lsn_diff = lsn - buf_flush_stat_cur.redo;
02086 n_flushed = buf_lru_flush_page_count
02087 - buf_flush_stat_cur.n_flushed;
02088
02089
02090 buf_flush_stat_sum.redo += lsn_diff - item->redo;
02091 buf_flush_stat_sum.n_flushed += n_flushed - item->n_flushed;
02092
02093
02094 item->redo = lsn_diff;
02095 item->n_flushed = n_flushed;
02096
02097
02098 buf_flush_stat_arr_ind++;
02099 buf_flush_stat_arr_ind %= BUF_FLUSH_STAT_N_INTERVAL;
02100
02101
02102 buf_flush_stat_cur.redo = lsn;
02103 buf_flush_stat_cur.n_flushed = buf_lru_flush_page_count;
02104 }
02105
02106
02107
02108
02109
02110
02111
02112
02113
02114 UNIV_INTERN
02115 ulint
02116 buf_flush_get_desired_flush_rate(void)
02117
02118 {
02119 ulint i;
02120 lint rate;
02121 ulint redo_avg;
02122 ulint n_dirty = 0;
02123 ulint n_flush_req;
02124 ulint lru_flush_avg;
02125 ib_uint64_t lsn = log_get_lsn();
02126 ulint log_capacity = log_get_capacity();
02127
02128
02129
02130 ut_ad(log_capacity != 0);
02131
02132
02133
02134
02135 for (i = 0; i < srv_buf_pool_instances; i++) {
02136 buf_pool_t* buf_pool;
02137
02138 buf_pool = buf_pool_from_array(i);
02139 n_dirty += UT_LIST_GET_LEN(buf_pool->flush_list);
02140 }
02141
02142
02143
02144
02145
02146
02147
02148
02149 redo_avg = (ulint) (buf_flush_stat_sum.redo
02150 / BUF_FLUSH_STAT_N_INTERVAL
02151 + (lsn - buf_flush_stat_cur.redo));
02152
02153
02154
02155
02156
02157
02158
02159
02160
02161 lru_flush_avg = buf_flush_stat_sum.n_flushed
02162 / BUF_FLUSH_STAT_N_INTERVAL
02163 + (buf_lru_flush_page_count
02164 - buf_flush_stat_cur.n_flushed);
02165
02166 n_flush_req = (n_dirty * redo_avg) / log_capacity;
02167
02168
02169
02170
02171
02172 rate = n_flush_req - lru_flush_avg;
02173 return(rate > 0 ? (ulint) rate : 0);
02174 }
02175
02176 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
02177
02180 static
02181 ibool
02182 buf_flush_validate_low(
02183
02184 buf_pool_t* buf_pool)
02185 {
02186 buf_page_t* bpage;
02187 const ib_rbt_node_t* rnode = NULL;
02188
02189 ut_ad(buf_flush_list_mutex_own(buf_pool));
02190
02191 UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
02192 ut_ad(ut_list_node_313->in_flush_list));
02193
02194 bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
02195
02196
02197
02198
02199 if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
02200 rnode = rbt_first(buf_pool->flush_rbt);
02201 }
02202
02203 while (bpage != NULL) {
02204 const ib_uint64_t om = bpage->oldest_modification;
02205
02206 ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
02207
02208 ut_ad(bpage->in_flush_list);
02209
02210
02211
02212
02213
02214
02215
02216 ut_a(buf_page_in_file(bpage)
02217 || buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
02218 ut_a(om > 0);
02219
02220 if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
02221 buf_page_t** prpage;
02222
02223 ut_a(rnode);
02224 prpage = rbt_value(buf_page_t*, rnode);
02225
02226 ut_a(*prpage);
02227 ut_a(*prpage == bpage);
02228 rnode = rbt_next(buf_pool->flush_rbt, rnode);
02229 }
02230
02231 bpage = UT_LIST_GET_NEXT(list, bpage);
02232
02233 ut_a(!bpage || om >= bpage->oldest_modification);
02234 }
02235
02236
02237
02238 ut_a(rnode == NULL);
02239
02240 return(TRUE);
02241 }
02242
02243
02246 UNIV_INTERN
02247 ibool
02248 buf_flush_validate(
02249
02250 buf_pool_t* buf_pool)
02251 {
02252 ibool ret;
02253
02254 buf_flush_list_mutex_enter(buf_pool);
02255
02256 ret = buf_flush_validate_low(buf_pool);
02257
02258 buf_flush_list_mutex_exit(buf_pool);
02259
02260 return(ret);
02261 }
02262 #endif
02263 #endif