diff options
Diffstat (limited to 'storage/maria/ma_pagecache.c')
-rw-r--r-- | storage/maria/ma_pagecache.c | 414 |
1 files changed, 362 insertions, 52 deletions
diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c index d10595fffd9..3ba3f9c7682 100644 --- a/storage/maria/ma_pagecache.c +++ b/storage/maria/ma_pagecache.c @@ -85,6 +85,9 @@ #define PAGECACHE_DEBUG #define PAGECACHE_DEBUG_LOG "my_pagecache_debug.log" */ +#undef PAGECACHE_DEBUG +#define PAGECACHE_DEBUG_LOG "my_pagecache_debug.log" +#define _VARARGS(X) X /* In key cache we have external raw locking here we use @@ -127,7 +130,8 @@ my_bool my_disable_flush_pagecache_blocks= 0; #define COND_FOR_REQUESTED 0 /* queue of thread waiting for read operation */ #define COND_FOR_SAVED 1 /* queue of thread waiting for flush */ #define COND_FOR_WRLOCK 2 /* queue of write lock */ -#define COND_SIZE 3 /* number of COND_* queues */ +#define COND_FOR_BIG_BLOCK 3 /* queue of waiting fo big block read */ +#define COND_SIZE 4 /* number of COND_* queues */ typedef mysql_cond_t KEYCACHE_CONDVAR; @@ -146,7 +150,7 @@ struct st_pagecache_hash_link struct st_pagecache_block_link *block; /* reference to the block for the page: */ PAGECACHE_FILE file; /* from such a file */ - pgcache_page_no_t pageno; /* this page */ + pgcache_page_no_t pageno; /* this page */ uint requests; /* number of requests for the page */ }; @@ -174,6 +178,7 @@ struct st_pagecache_hash_link #define PCBLOCK_CHANGED 32 /* block buffer contains a dirty page */ #define PCBLOCK_DIRECT_W 64 /* possible direct write to the block */ #define PCBLOCK_DEL_WRITE 128 /* should be written on delete */ +#define PCBLOCK_BIG_READ 256 /* the first block of the big read in progress */ /* page status, returned by find_block */ #define PAGE_READ 0 @@ -507,37 +512,45 @@ static void test_key_cache(PAGECACHE *pagecache, #define DEFAULT_PAGECACHE_DEBUG_LOG "pagecache_debug.log" -#if defined(PAGECACHE_DEBUG) && ! defined(PAGECACHE_DEBUG_LOG) -#define PAGECACHE_DEBUG_LOG DEFAULT_PAGECACHE_DEBUG_LOG -#endif - -#if defined(PAGECACHE_DEBUG_LOG) +#if defined(PAGECACHE_DEBUG) static FILE *pagecache_debug_log= NULL; static void pagecache_debug_print _VARARGS((const char *fmt, ...)); -#define PAGECACHE_DEBUG_OPEN \ - if (!pagecache_debug_log) \ - { \ - pagecache_debug_log= fopen(PAGECACHE_DEBUG_LOG, "w"); \ - (void) setvbuf(pagecache_debug_log, NULL, _IOLBF, BUFSIZ); \ +#define PAGECACHE_DEBUG_OPEN \ + if (!pagecache_debug_log) \ + { \ + if ((pagecache_debug_log= fopen(PAGECACHE_DEBUG_LOG, "w"))) \ + (void) setvbuf(pagecache_debug_log, NULL, _IOLBF, BUFSIZ); \ } -#define PAGECACHE_DEBUG_CLOSE \ - if (pagecache_debug_log) \ - { \ - fclose(pagecache_debug_log); \ - pagecache_debug_log= 0; \ +#define PAGECACHE_DEBUG_CLOSE \ + if (pagecache_debug_log) \ + { \ + fclose(pagecache_debug_log); \ + pagecache_debug_log= 0; \ } #else #define PAGECACHE_DEBUG_OPEN #define PAGECACHE_DEBUG_CLOSE #endif /* defined(PAGECACHE_DEBUG_LOG) */ -#if defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG) +#if defined(PAGECACHE_DEBUG) #define KEYCACHE_PRINT(l, m) KEYCACHE_DBUG_PRINT(l,m) + +#ifdef PAGECACHE_DEBUG_DLOG +#define KEYCACHE_DBUG_PRINT(l, m) \ + { if (pagecache_debug_log) \ + { \ + fprintf(pagecache_debug_log, "%s: ", l); \ + DBUG_PRINT("PCDEBUG", ("%s: ", l)); \ + } \ + pagecache_debug_print m; } +#else #define KEYCACHE_DBUG_PRINT(l, m) \ { if (pagecache_debug_log) \ fprintf(pagecache_debug_log, "%s: ", l); \ pagecache_debug_print m; } +#endif + #define KEYCACHE_DBUG_ASSERT(a) \ { if (! (a) && pagecache_debug_log) \ @@ -547,20 +560,21 @@ static void pagecache_debug_print _VARARGS((const char *fmt, ...)); #define KEYCACHE_PRINT(l, m) #define KEYCACHE_DBUG_PRINT(l, m) DBUG_PRINT(l, m) #define KEYCACHE_DBUG_ASSERT(a) DBUG_ASSERT(a) -#endif /* defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG) */ +#endif /* defined(PAGECACHE_DEBUG) */ #if defined(PAGECACHE_DEBUG) || !defined(DBUG_OFF) -static long pagecache_thread_id; +static my_thread_id pagecache_thread_id; #define KEYCACHE_THREAD_TRACE(l) \ - KEYCACHE_DBUG_PRINT(l,("|thread %ld",pagecache_thread_id)) + KEYCACHE_DBUG_PRINT(l,("|thread %lld",pagecache_thread_id)) #define KEYCACHE_THREAD_TRACE_BEGIN(l) \ { struct st_my_thread_var *thread_var= my_thread_var; \ pagecache_thread_id= thread_var->id; \ - KEYCACHE_DBUG_PRINT(l,("[thread %ld",pagecache_thread_id)) } + KEYCACHE_DBUG_PRINT(l,("[thread %lld",pagecache_thread_id)); \ + } #define KEYCACHE_THREAD_TRACE_END(l) \ - KEYCACHE_DBUG_PRINT(l,("]thread %ld",pagecache_thread_id)) + KEYCACHE_DBUG_PRINT(l,("]thread %lld",pagecache_thread_id)) #else #define KEYCACHE_PRINT(l,m) #define KEYCACHE_THREAD_TRACE_BEGIN(l) @@ -586,13 +600,13 @@ static int ___pagecache_pthread_mutex_lock(mysql_mutex_t *mutex); static void ___pagecache_pthread_mutex_unlock(mysql_mutex_t *mutex); static int ___pagecache_pthread_cond_signal(mysql_cond_t *cond); #define pagecache_pthread_mutex_lock(M) \ -{ DBUG_PRINT("lock", ("mutex lock 0x%lx %u", (ulong)(M), __LINE__)); \ +{ DBUG_PRINT("lock", ("mutex lock %p %u", (M), __LINE__)); \ ___pagecache_pthread_mutex_lock(M);} #define pagecache_pthread_mutex_unlock(M) \ -{ DBUG_PRINT("lock", ("mutex unlock 0x%lx %u", (ulong)(M), __LINE__)); \ +{ DBUG_PRINT("lock", ("mutex unlock %p %u", (M), __LINE__)); \ ___pagecache_pthread_mutex_unlock(M);} #define pagecache_pthread_cond_signal(M) \ -{ DBUG_PRINT("lock", ("signal 0x%lx %u", (ulong)(M), __LINE__)); \ +{ DBUG_PRINT("lock", ("signal %p %u", (M), __LINE__)); \ ___pagecache_pthread_cond_signal(M);} #else #define pagecache_pthread_mutex_lock mysql_mutex_lock @@ -748,7 +762,8 @@ static inline uint next_power(uint value) size_t init_pagecache(PAGECACHE *pagecache, size_t use_mem, uint division_limit, uint age_threshold, - uint block_size, uint changed_blocks_hash_size, + uint block_size, + uint changed_blocks_hash_size, myf my_readwrite_flags) { size_t blocks, hash_links, length; @@ -756,6 +771,10 @@ size_t init_pagecache(PAGECACHE *pagecache, size_t use_mem, DBUG_ENTER("init_pagecache"); DBUG_ASSERT(block_size >= 512); + // By default we init usual cache (variables will be assigned to switch to s3) + pagecache->big_block_read= NULL; + pagecache->big_block_free= NULL; + PAGECACHE_DEBUG_OPEN; if (pagecache->inited && pagecache->disk_blocks > 0) { @@ -1350,6 +1369,8 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, } } while (thread != last_thread); + DBUG_PRINT("XXX", ("hash_link (link block): %p, hash_link: %p -> %p", + hash_link, hash_link->block, block)); hash_link->block= block; /* Ensure that no other thread tries to use this block */ block->status|= PCBLOCK_REASSIGNED; @@ -1646,6 +1667,9 @@ static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link) if ((*hash_link->prev= hash_link->next)) hash_link->next->prev= hash_link->prev; + + DBUG_PRINT("XXX", ("hash_link (unlink): %p, hash_link: %p -> NULL", + hash_link, hash_link->block)); hash_link->block= NULL; if (pagecache->waiting_for_hash_link.last_thread) { @@ -1893,6 +1917,7 @@ static PAGECACHE_BLOCK_LINK *find_block(PAGECACHE *pagecache, my_bool wrmode, my_bool block_is_copied, my_bool reg_req, + my_bool fast, int *page_st) { PAGECACHE_HASH_LINK *hash_link; @@ -1909,6 +1934,7 @@ static PAGECACHE_BLOCK_LINK *find_block(PAGECACHE *pagecache, DBUG_EXECUTE("check_pagecache", test_key_cache(pagecache, "start of find_block", 0);); #endif + DBUG_ASSERT(!fast || !wrmode); restart: /* Find the hash link for the requested page (file, pageno) */ @@ -2018,9 +2044,11 @@ restart: /* This is a request for a new page or for a page not to be removed */ if (! block) { + DBUG_PRINT("XXX", ("request for a new page")); /* No block is assigned for the page yet */ if (pagecache->blocks_unused) { + DBUG_PRINT("XXX", ("there is never used blocks")); if (pagecache->free_block_list) { /* There is a block in the free list. */ @@ -2054,7 +2082,11 @@ restart: block->last_hit_time= 0; block->rec_lsn= LSN_MAX; link_to_file_list(pagecache, block, file, 0); + DBUG_PRINT("XXX", ("block (no block assigned): %p, hash_link: %p -> %p", + block, block->hash_link, hash_link)); block->hash_link= hash_link; + DBUG_PRINT("XXX", ("hash_link (no block assignment): %p, hash_link: %p -> %p", + hash_link, hash_link->block, block)); hash_link->block= block; page_status= PAGE_TO_BE_READ; DBUG_PRINT("info", ("page to be read set for page %p (%u)", @@ -2065,6 +2097,7 @@ restart: } else { + DBUG_PRINT("XXX", ("there is NOT never used blocks")); /* There are no never used blocks, use a block from the LRU chain */ /* @@ -2076,6 +2109,8 @@ restart: if (! pagecache->used_last) { + struct st_my_thread_var *thread; + DBUG_PRINT("XXX", ("there is NOT UNUSED blocks")); /* Wait until a new block is added to the LRU chain; several threads might wait here for the same page, @@ -2084,8 +2119,18 @@ restart: The block is given to us by the next thread executing link_block(). */ + if (fast) + { + DBUG_ASSERT(hash_link->requests == 0); + unlink_hash(pagecache, hash_link); + DBUG_PRINT("info", ("fast and no blocks in LRU")); - struct st_my_thread_var *thread= my_thread_var; + KEYCACHE_DBUG_PRINT("find_block", + ("fast and no blocks in LRU")); + DBUG_RETURN(0); + } + + thread= my_thread_var; thread->keycache_link= (void *) hash_link; wqueue_link_into_queue(&pagecache->waiting_for_block, thread); do @@ -2104,13 +2149,30 @@ restart: } else { + DBUG_PRINT("XXX", ("take a block from LRU")); /* Take the first block from the LRU chain unlinking it from the chain */ block= pagecache->used_last->next_used; + if (fast && + ((block->status & (PCBLOCK_IN_FLUSH | PCBLOCK_CHANGED)) || + (block->hash_link && block->hash_link != hash_link && + block->hash_link->requests))) + { + DBUG_ASSERT(hash_link->requests == 0); + unlink_hash(pagecache, hash_link); + DBUG_PRINT("info", ("fast and LRU block is in switch or has " + "readers")); + KEYCACHE_DBUG_PRINT("find_block", + ("fast and LRU block is in switch or has " + "readers")); + DBUG_RETURN (0); + } if (reg_req) reg_requests(pagecache, block, 1); + DBUG_PRINT("XXX", ("hash_link (LRU): %p, hash_link: %p -> %p", + hash_link, hash_link->block, block)); hash_link->block= block; DBUG_ASSERT(block->requests == 1); } @@ -2181,6 +2243,8 @@ restart: link_to_file_list(pagecache, block, file, (my_bool)(block->hash_link ? 1 : 0)); + DBUG_PRINT("XXX", ("block (LRU): %p, hash_link: %p -> %p", + block, block->hash_link, hash_link)); block->hash_link= hash_link; PCBLOCK_INFO(block); block->hits_left= init_hits_left; @@ -2665,8 +2729,221 @@ retry: DBUG_ASSERT(block->hash_link->requests > 0); block->hash_link->requests--; DBUG_RETURN(1); +} + + +/** + @brief Reading of a big block in the S3 storage engine. + + @param pagecache Page cache + @param block Block to read + + @note + + Page cache is segmented in logical blocks of size 'block_size'. All + read request are for blocks of 'block_size'. + + When using a file with 'big blocks', the file is split into a + header, header size (for index information) and then blocks of + big_block_size. he last block may be smaller than big_block_size. + All 'big blocks' are a multiple of block_size. + The header is never read into the page cache. It's used to store + the table definition and status and is only read by open(). + + When wanting to read a block, we register a read request for that + block and for the first block that is part of the big block read. We + also put a special flag on the first block so that if another thread + would want to do a big block read, it will wait on signal, and then + check if the block it requested is now in the page cache. If it's + not in the cache it will retry. + + After the big block is read, we will put all read block that was not in the + page cache. Blocks that where already in page cache will not be touched + and will not be added first in the FIFO. + + The block for which we had a read request is added first in FIFO and + returned. +*/ + +#ifdef WITH_S3_STORAGE_ENGINE +static my_bool read_big_block(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block) +{ + int page_st; + size_t big_block_size_in_pages; + size_t offset; + pgcache_page_no_t page, our_page; + pgcache_page_no_t page_to_read; + PAGECACHE_BLOCK_LINK *block_to_read= NULL; + PAGECACHE_IO_HOOK_ARGS args; + S3_BLOCK data; + DBUG_ENTER("read_big_block"); + DBUG_PRINT("enter", ("read BIG block: %p", block)); + bzero((void*) &data, sizeof(data)); + + DBUG_ASSERT(block->hash_link->file.big_block_size % + pagecache->block_size == 0); + big_block_size_in_pages= + block->hash_link->file.big_block_size / pagecache->block_size; + + our_page= block->hash_link->pageno; + + /* find first page of the big block (page_to_read) */ + page_to_read= ((block->hash_link->pageno - + block->hash_link->file.head_blocks) / + big_block_size_in_pages); + page_to_read= (page_to_read * big_block_size_in_pages + + block->hash_link->file.head_blocks); + if (page_to_read != our_page) + { + block_to_read= find_block(pagecache, &block->hash_link->file, + page_to_read, 1, + FALSE, TRUE /* copy under protection (?)*/, + TRUE /*register*/, FALSE, &page_st); + DBUG_ASSERT(block_to_read == block_to_read->hash_link->block); + + if (block_to_read->status & PCBLOCK_ERROR) + { + /* We get first block with an error so all operation failed */ + block->status|= PCBLOCK_ERROR; + block->error= block_to_read->error; + DBUG_RETURN(FALSE); // no retry + } + // only primary request here, PAGE_WAIT_TO_BE_READ is impossible + DBUG_ASSERT(page_st != PAGE_WAIT_TO_BE_READ); + if (block_to_read->status & PCBLOCK_BIG_READ) + { + struct st_my_thread_var *thread; + DBUG_ASSERT(page_st != PAGE_TO_BE_READ); + /* + Block read failed because somebody else is reading the first block + (and all other blocks part of this one). + Wait until block is available. + */ + unreg_request(pagecache, block, 1); + thread= my_thread_var; + /* Put the request into a queue and wait until it can be processed */ + wqueue_add_to_queue(&block->wqueue[COND_FOR_BIG_BLOCK], thread); + do + { + DBUG_PRINT("wait", + ("suspend thread %s %ld", thread->name, + (ulong) thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + } + while (thread->next); + DBUG_RETURN(TRUE); + } + } + else + { + block_to_read= block; + page_st= PAGE_TO_BE_READ; + } + + DBUG_ASSERT(!(block_to_read->status & PCBLOCK_BIG_READ)); + // Mark the first page of a big block + block_to_read->status|= PCBLOCK_BIG_READ; + + // Don't keep cache locked during the possible slow read from s3 + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + + // perform read of big block + args.page= NULL; + args.pageno= page_to_read; + args.data= block->hash_link->file.callback_data; + + if (pagecache->big_block_read(pagecache, &args, &block->hash_link->file, + &data)) + { + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + block_to_read->status|= PCBLOCK_ERROR; + block->status|= PCBLOCK_ERROR; + block_to_read->error= block->error= (int16) my_errno; + pagecache->big_block_free(&data); + if (block_to_read != block) + { + remove_reader(block_to_read); + unreg_request(pagecache, block_to_read, 1); + } + DBUG_RETURN(FALSE); // no retry + } + + /* + We need to keep the mutex locked while filling pages. + As there is no changed blocks to flush, this operation should + be reasonable fast + */ + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + + /* Copy the first page to the cache */ + if (page_st != PAGE_READ) + { + DBUG_ASSERT(page_st != PAGE_WAIT_TO_BE_READ); + memcpy(block_to_read->buffer, data.str, pagecache->block_size); + block_to_read->status|= PCBLOCK_READ; + } + else + DBUG_ASSERT(block_to_read->status & PCBLOCK_READ); + /* Copy the rest of the pages */ + for (offset= pagecache->block_size, page= page_to_read + 1; + offset < data.length; + offset+= pagecache->block_size, page++) + { + DBUG_ASSERT(offset + pagecache->block_size <= data.length); + if (page == our_page) + { + DBUG_ASSERT(!(block->status & PCBLOCK_READ)); + memcpy(block->buffer, data.str + offset, pagecache->block_size); + block->status|= PCBLOCK_READ; + } + else + { + PAGECACHE_BLOCK_LINK *bl; + bl= find_block(pagecache, &block->hash_link->file, page, 1, + FALSE, TRUE /* copy under protection (?)*/, + TRUE /*register*/, TRUE /*fast*/, &page_st); + if (!bl) + { + // we run out of easy avaliable pages in the cache + break; + } + DBUG_ASSERT(bl == bl->hash_link->block); + if ((bl->status & PCBLOCK_ERROR) == 0 && + page_st == PAGE_TO_BE_READ) + { + memcpy(bl->buffer, data.str + offset, pagecache->block_size); + bl->status|= PCBLOCK_READ; + } + remove_reader(bl); + unreg_request(pagecache, bl, 1); + } + } + if (page < our_page) + { + /* we break earlier, but still have to fill page what was requested */ + DBUG_ASSERT(!(block->status & PCBLOCK_READ)); + memcpy(block->buffer, + data.str + ((our_page - page_to_read) * pagecache->block_size), + pagecache->block_size); + block->status|= PCBLOCK_READ; + } + pagecache->big_block_free(&data); + + block_to_read->status&= ~PCBLOCK_BIG_READ; + if (block_to_read != block) + { + remove_reader(block_to_read); + unreg_request(pagecache, block_to_read, 1); + } + if (block->wqueue[COND_FOR_BIG_BLOCK].last_thread) + wqueue_release_queue(&block->wqueue[COND_FOR_BIG_BLOCK]); + + DBUG_RETURN(FALSE); } +#endif /* WITH_S3_STORAGE_ENGINE */ /* @@ -2861,7 +3138,7 @@ void pagecache_unlock(PAGECACHE *pagecache, inc_counter_for_resize_op(pagecache); /* See NOTE for pagecache_unlock about registering requests */ block= find_block(pagecache, file, pageno, 0, 0, 0, - pin == PAGECACHE_PIN_LEFT_UNPINNED, &page_st); + pin == PAGECACHE_PIN_LEFT_UNPINNED, FALSE, &page_st); PCBLOCK_INFO(block); DBUG_ASSERT(block != 0 && page_st == PAGE_READ); if (first_REDO_LSN_for_page) @@ -2948,7 +3225,7 @@ void pagecache_unpin(PAGECACHE *pagecache, inc_counter_for_resize_op(pagecache); /* See NOTE for pagecache_unlock about registering requests */ - block= find_block(pagecache, file, pageno, 0, 0, 0, 0, &page_st); + block= find_block(pagecache, file, pageno, 0, 0, 0, 0, FALSE, &page_st); DBUG_ASSERT(block != 0); DBUG_ASSERT(page_st == PAGE_READ); /* we can't unpin such page without unlock */ @@ -3349,7 +3626,7 @@ uchar *pagecache_read(PAGECACHE *pagecache, char llbuf[22]; DBUG_ENTER("pagecache_read"); DBUG_PRINT("enter", ("fd: %u page: %s buffer: %p level: %u " - "t:%s (%d)%s->%s %s->%s", + "t:%s (%d)%s->%s %s->%s big block: %d", (uint) file->file, ullstr(pageno, llbuf), buff, level, page_cache_page_type_str[type], @@ -3357,7 +3634,8 @@ uchar *pagecache_read(PAGECACHE *pagecache, page_cache_page_lock_str[lock_to_read[lock].new_lock], page_cache_page_lock_str[lock_to_read[lock].unlock_lock], page_cache_page_pin_str[new_pin], - page_cache_page_pin_str[unlock_pin])); + page_cache_page_pin_str[unlock_pin], + MY_TEST(pagecache->big_block_read))); DBUG_ASSERT(buff != 0 || (buff == 0 && (unlock_pin == PAGECACHE_PIN || unlock_pin == PAGECACHE_PIN_LEFT_PINNED))); DBUG_ASSERT(pageno < ((1ULL) << 40)); @@ -3369,6 +3647,14 @@ uchar *pagecache_read(PAGECACHE *pagecache, restart: + /* + If we use big block than the big block is multiple of blocks and we + have enouch blocks in cache + */ + DBUG_ASSERT(!pagecache->big_block_read || + (file->big_block_size != 0 && + file->big_block_size % pagecache->block_size == 0)); + if (pagecache->can_be_used) { /* Key cache is used */ @@ -3387,19 +3673,45 @@ restart: pagecache->global_cache_r_requests++; /* See NOTE for pagecache_unlock about registering requests. */ reg_request= ((new_pin == PAGECACHE_PIN_LEFT_UNPINNED) || - (new_pin == PAGECACHE_PIN)); + (new_pin == PAGECACHE_PIN) || + pagecache->big_block_read); block= find_block(pagecache, file, pageno, level, lock == PAGECACHE_LOCK_WRITE, buff != 0, - reg_request, &page_st); + reg_request, FALSE, &page_st); DBUG_PRINT("info", ("Block type: %s current type %s", page_cache_page_type_str[block->type], page_cache_page_type_str[type])); if (((block->status & PCBLOCK_ERROR) == 0) && (page_st != PAGE_READ)) { - /* The requested page is to be read into the block buffer */ - read_block(pagecache, block, - (my_bool)(page_st == PAGE_TO_BE_READ)); - DBUG_PRINT("info", ("read is done")); +#ifdef WITH_S3_STORAGE_ENGINE + if (!pagecache->big_block_read) +#endif /* WITH_S3_STORAGE_ENGINE */ + { + /* The requested page is to be read into the block buffer */ + read_block(pagecache, block, page_st == PAGE_TO_BE_READ); + DBUG_PRINT("info", ("read is done")); + } +#ifdef WITH_S3_STORAGE_ENGINE + else + { + /* It is big read and this thread should read */ + DBUG_ASSERT(page_st == PAGE_TO_BE_READ); + + if (read_big_block(pagecache, block)) + { + /* block is unregistered in read_big_block */ + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_PRINT("restart", ("big block fail, restarting...")); + goto restart; + } + if (!((new_pin == PAGECACHE_PIN_LEFT_UNPINNED) || + (new_pin == PAGECACHE_PIN))) + { + /* we registered request only for big_block_read */ + unreg_request(pagecache, block, 1); + } + } +#endif /* WITH_S3_STORAGE_ENGINE */ } /* Assert after block is read. Imagine two concurrent SELECTs on same @@ -3990,6 +4302,7 @@ my_bool pagecache_write_part(PAGECACHE *pagecache, DBUG_ASSERT(lock != PAGECACHE_LOCK_READ_UNLOCK); DBUG_ASSERT(offset + size <= pagecache->block_size); DBUG_ASSERT(pageno < ((1ULL) << 40)); + DBUG_ASSERT(pagecache->big_block_read == 0); #endif if (!page_link) @@ -4026,7 +4339,7 @@ restart: (pin == PAGECACHE_PIN)); block= find_block(pagecache, file, pageno, level, TRUE, FALSE, - reg_request, &page_st); + reg_request, FALSE, &page_st); if (!block) { DBUG_ASSERT(write_mode != PAGECACHE_WRITE_DONE); @@ -4278,6 +4591,8 @@ static my_bool free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, block->type= PAGECACHE_EMPTY_PAGE; #endif block->rec_lsn= LSN_MAX; + DBUG_PRINT("XXX", ("block (Free): %p, hash_link: %p -> NULL", + block, block->hash_link)); block->hash_link= NULL; if (block->temperature == PCBLOCK_WARM) pagecache->warm_blocks--; @@ -5230,6 +5545,7 @@ static int pagecache_pthread_cond_wait(mysql_cond_t *cond, #endif #endif /* defined(PAGECACHE_TIMEOUT) && !defined(__WIN__) */ + #if defined(PAGECACHE_DEBUG) static int ___pagecache_pthread_mutex_lock(mysql_mutex_t *mutex) { @@ -5256,32 +5572,26 @@ static int ___pagecache_pthread_cond_signal(mysql_cond_t *cond) } -#if defined(PAGECACHE_DEBUG_LOG) - - static void pagecache_debug_print(const char * fmt, ...) { va_list args; va_start(args,fmt); if (pagecache_debug_log) { - VOID(vfprintf(pagecache_debug_log, fmt, args)); - VOID(fputc('\n',pagecache_debug_log)); + vfprintf(pagecache_debug_log, fmt, args); + fputc('\n',pagecache_debug_log); +#ifdef PAGECACHE_DEBUG_DLOG + _db_doprnt_(fmt, args); +#endif } va_end(args); } -#endif /* defined(PAGECACHE_DEBUG_LOG) */ - -#if defined(PAGECACHE_DEBUG_LOG) - void pagecache_debug_log_close(void) { if (pagecache_debug_log) fclose(pagecache_debug_log); } -#endif /* defined(PAGECACHE_DEBUG_LOG) */ - #endif /* defined(PAGECACHE_DEBUG) */ /** @@ -5307,8 +5617,7 @@ static void null_post_write_hook(int res __attribute__((unused)), return; } -void -pagecache_file_set_null_hooks(PAGECACHE_FILE *file) +void pagecache_file_set_null_hooks(PAGECACHE_FILE *file) { file->pre_read_hook= null_pre_hook; file->post_read_hook= null_post_read_hook; @@ -5316,4 +5625,5 @@ pagecache_file_set_null_hooks(PAGECACHE_FILE *file) file->post_write_hook= null_post_write_hook; file->flush_log_callback= null_pre_hook; file->callback_data= NULL; + file->head_blocks= file->big_block_size= 0; } |