summaryrefslogtreecommitdiff
path: root/storage/maria/ma_pagecache.c
diff options
context:
space:
mode:
Diffstat (limited to 'storage/maria/ma_pagecache.c')
-rw-r--r--storage/maria/ma_pagecache.c414
1 files changed, 362 insertions, 52 deletions
diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c
index d10595fffd9..3ba3f9c7682 100644
--- a/storage/maria/ma_pagecache.c
+++ b/storage/maria/ma_pagecache.c
@@ -85,6 +85,9 @@
#define PAGECACHE_DEBUG
#define PAGECACHE_DEBUG_LOG "my_pagecache_debug.log"
*/
+#undef PAGECACHE_DEBUG
+#define PAGECACHE_DEBUG_LOG "my_pagecache_debug.log"
+#define _VARARGS(X) X
/*
In key cache we have external raw locking here we use
@@ -127,7 +130,8 @@ my_bool my_disable_flush_pagecache_blocks= 0;
#define COND_FOR_REQUESTED 0 /* queue of thread waiting for read operation */
#define COND_FOR_SAVED 1 /* queue of thread waiting for flush */
#define COND_FOR_WRLOCK 2 /* queue of write lock */
-#define COND_SIZE 3 /* number of COND_* queues */
+#define COND_FOR_BIG_BLOCK 3 /* queue of waiting fo big block read */
+#define COND_SIZE 4 /* number of COND_* queues */
typedef mysql_cond_t KEYCACHE_CONDVAR;
@@ -146,7 +150,7 @@ struct st_pagecache_hash_link
struct st_pagecache_block_link
*block; /* reference to the block for the page: */
PAGECACHE_FILE file; /* from such a file */
- pgcache_page_no_t pageno; /* this page */
+ pgcache_page_no_t pageno; /* this page */
uint requests; /* number of requests for the page */
};
@@ -174,6 +178,7 @@ struct st_pagecache_hash_link
#define PCBLOCK_CHANGED 32 /* block buffer contains a dirty page */
#define PCBLOCK_DIRECT_W 64 /* possible direct write to the block */
#define PCBLOCK_DEL_WRITE 128 /* should be written on delete */
+#define PCBLOCK_BIG_READ 256 /* the first block of the big read in progress */
/* page status, returned by find_block */
#define PAGE_READ 0
@@ -507,37 +512,45 @@ static void test_key_cache(PAGECACHE *pagecache,
#define DEFAULT_PAGECACHE_DEBUG_LOG "pagecache_debug.log"
-#if defined(PAGECACHE_DEBUG) && ! defined(PAGECACHE_DEBUG_LOG)
-#define PAGECACHE_DEBUG_LOG DEFAULT_PAGECACHE_DEBUG_LOG
-#endif
-
-#if defined(PAGECACHE_DEBUG_LOG)
+#if defined(PAGECACHE_DEBUG)
static FILE *pagecache_debug_log= NULL;
static void pagecache_debug_print _VARARGS((const char *fmt, ...));
-#define PAGECACHE_DEBUG_OPEN \
- if (!pagecache_debug_log) \
- { \
- pagecache_debug_log= fopen(PAGECACHE_DEBUG_LOG, "w"); \
- (void) setvbuf(pagecache_debug_log, NULL, _IOLBF, BUFSIZ); \
+#define PAGECACHE_DEBUG_OPEN \
+ if (!pagecache_debug_log) \
+ { \
+ if ((pagecache_debug_log= fopen(PAGECACHE_DEBUG_LOG, "w"))) \
+ (void) setvbuf(pagecache_debug_log, NULL, _IOLBF, BUFSIZ); \
}
-#define PAGECACHE_DEBUG_CLOSE \
- if (pagecache_debug_log) \
- { \
- fclose(pagecache_debug_log); \
- pagecache_debug_log= 0; \
+#define PAGECACHE_DEBUG_CLOSE \
+ if (pagecache_debug_log) \
+ { \
+ fclose(pagecache_debug_log); \
+ pagecache_debug_log= 0; \
}
#else
#define PAGECACHE_DEBUG_OPEN
#define PAGECACHE_DEBUG_CLOSE
#endif /* defined(PAGECACHE_DEBUG_LOG) */
-#if defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG)
+#if defined(PAGECACHE_DEBUG)
#define KEYCACHE_PRINT(l, m) KEYCACHE_DBUG_PRINT(l,m)
+
+#ifdef PAGECACHE_DEBUG_DLOG
+#define KEYCACHE_DBUG_PRINT(l, m) \
+ { if (pagecache_debug_log) \
+ { \
+ fprintf(pagecache_debug_log, "%s: ", l); \
+ DBUG_PRINT("PCDEBUG", ("%s: ", l)); \
+ } \
+ pagecache_debug_print m; }
+#else
#define KEYCACHE_DBUG_PRINT(l, m) \
{ if (pagecache_debug_log) \
fprintf(pagecache_debug_log, "%s: ", l); \
pagecache_debug_print m; }
+#endif
+
#define KEYCACHE_DBUG_ASSERT(a) \
{ if (! (a) && pagecache_debug_log) \
@@ -547,20 +560,21 @@ static void pagecache_debug_print _VARARGS((const char *fmt, ...));
#define KEYCACHE_PRINT(l, m)
#define KEYCACHE_DBUG_PRINT(l, m) DBUG_PRINT(l, m)
#define KEYCACHE_DBUG_ASSERT(a) DBUG_ASSERT(a)
-#endif /* defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG) */
+#endif /* defined(PAGECACHE_DEBUG) */
#if defined(PAGECACHE_DEBUG) || !defined(DBUG_OFF)
-static long pagecache_thread_id;
+static my_thread_id pagecache_thread_id;
#define KEYCACHE_THREAD_TRACE(l) \
- KEYCACHE_DBUG_PRINT(l,("|thread %ld",pagecache_thread_id))
+ KEYCACHE_DBUG_PRINT(l,("|thread %lld",pagecache_thread_id))
#define KEYCACHE_THREAD_TRACE_BEGIN(l) \
{ struct st_my_thread_var *thread_var= my_thread_var; \
pagecache_thread_id= thread_var->id; \
- KEYCACHE_DBUG_PRINT(l,("[thread %ld",pagecache_thread_id)) }
+ KEYCACHE_DBUG_PRINT(l,("[thread %lld",pagecache_thread_id)); \
+ }
#define KEYCACHE_THREAD_TRACE_END(l) \
- KEYCACHE_DBUG_PRINT(l,("]thread %ld",pagecache_thread_id))
+ KEYCACHE_DBUG_PRINT(l,("]thread %lld",pagecache_thread_id))
#else
#define KEYCACHE_PRINT(l,m)
#define KEYCACHE_THREAD_TRACE_BEGIN(l)
@@ -586,13 +600,13 @@ static int ___pagecache_pthread_mutex_lock(mysql_mutex_t *mutex);
static void ___pagecache_pthread_mutex_unlock(mysql_mutex_t *mutex);
static int ___pagecache_pthread_cond_signal(mysql_cond_t *cond);
#define pagecache_pthread_mutex_lock(M) \
-{ DBUG_PRINT("lock", ("mutex lock 0x%lx %u", (ulong)(M), __LINE__)); \
+{ DBUG_PRINT("lock", ("mutex lock %p %u", (M), __LINE__)); \
___pagecache_pthread_mutex_lock(M);}
#define pagecache_pthread_mutex_unlock(M) \
-{ DBUG_PRINT("lock", ("mutex unlock 0x%lx %u", (ulong)(M), __LINE__)); \
+{ DBUG_PRINT("lock", ("mutex unlock %p %u", (M), __LINE__)); \
___pagecache_pthread_mutex_unlock(M);}
#define pagecache_pthread_cond_signal(M) \
-{ DBUG_PRINT("lock", ("signal 0x%lx %u", (ulong)(M), __LINE__)); \
+{ DBUG_PRINT("lock", ("signal %p %u", (M), __LINE__)); \
___pagecache_pthread_cond_signal(M);}
#else
#define pagecache_pthread_mutex_lock mysql_mutex_lock
@@ -748,7 +762,8 @@ static inline uint next_power(uint value)
size_t init_pagecache(PAGECACHE *pagecache, size_t use_mem,
uint division_limit, uint age_threshold,
- uint block_size, uint changed_blocks_hash_size,
+ uint block_size,
+ uint changed_blocks_hash_size,
myf my_readwrite_flags)
{
size_t blocks, hash_links, length;
@@ -756,6 +771,10 @@ size_t init_pagecache(PAGECACHE *pagecache, size_t use_mem,
DBUG_ENTER("init_pagecache");
DBUG_ASSERT(block_size >= 512);
+ // By default we init usual cache (variables will be assigned to switch to s3)
+ pagecache->big_block_read= NULL;
+ pagecache->big_block_free= NULL;
+
PAGECACHE_DEBUG_OPEN;
if (pagecache->inited && pagecache->disk_blocks > 0)
{
@@ -1350,6 +1369,8 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
}
}
while (thread != last_thread);
+ DBUG_PRINT("XXX", ("hash_link (link block): %p, hash_link: %p -> %p",
+ hash_link, hash_link->block, block));
hash_link->block= block;
/* Ensure that no other thread tries to use this block */
block->status|= PCBLOCK_REASSIGNED;
@@ -1646,6 +1667,9 @@ static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link)
if ((*hash_link->prev= hash_link->next))
hash_link->next->prev= hash_link->prev;
+
+ DBUG_PRINT("XXX", ("hash_link (unlink): %p, hash_link: %p -> NULL",
+ hash_link, hash_link->block));
hash_link->block= NULL;
if (pagecache->waiting_for_hash_link.last_thread)
{
@@ -1893,6 +1917,7 @@ static PAGECACHE_BLOCK_LINK *find_block(PAGECACHE *pagecache,
my_bool wrmode,
my_bool block_is_copied,
my_bool reg_req,
+ my_bool fast,
int *page_st)
{
PAGECACHE_HASH_LINK *hash_link;
@@ -1909,6 +1934,7 @@ static PAGECACHE_BLOCK_LINK *find_block(PAGECACHE *pagecache,
DBUG_EXECUTE("check_pagecache",
test_key_cache(pagecache, "start of find_block", 0););
#endif
+ DBUG_ASSERT(!fast || !wrmode);
restart:
/* Find the hash link for the requested page (file, pageno) */
@@ -2018,9 +2044,11 @@ restart:
/* This is a request for a new page or for a page not to be removed */
if (! block)
{
+ DBUG_PRINT("XXX", ("request for a new page"));
/* No block is assigned for the page yet */
if (pagecache->blocks_unused)
{
+ DBUG_PRINT("XXX", ("there is never used blocks"));
if (pagecache->free_block_list)
{
/* There is a block in the free list. */
@@ -2054,7 +2082,11 @@ restart:
block->last_hit_time= 0;
block->rec_lsn= LSN_MAX;
link_to_file_list(pagecache, block, file, 0);
+ DBUG_PRINT("XXX", ("block (no block assigned): %p, hash_link: %p -> %p",
+ block, block->hash_link, hash_link));
block->hash_link= hash_link;
+ DBUG_PRINT("XXX", ("hash_link (no block assignment): %p, hash_link: %p -> %p",
+ hash_link, hash_link->block, block));
hash_link->block= block;
page_status= PAGE_TO_BE_READ;
DBUG_PRINT("info", ("page to be read set for page %p (%u)",
@@ -2065,6 +2097,7 @@ restart:
}
else
{
+ DBUG_PRINT("XXX", ("there is NOT never used blocks"));
/* There are no never used blocks, use a block from the LRU chain */
/*
@@ -2076,6 +2109,8 @@ restart:
if (! pagecache->used_last)
{
+ struct st_my_thread_var *thread;
+ DBUG_PRINT("XXX", ("there is NOT UNUSED blocks"));
/*
Wait until a new block is added to the LRU chain;
several threads might wait here for the same page,
@@ -2084,8 +2119,18 @@ restart:
The block is given to us by the next thread executing
link_block().
*/
+ if (fast)
+ {
+ DBUG_ASSERT(hash_link->requests == 0);
+ unlink_hash(pagecache, hash_link);
+ DBUG_PRINT("info", ("fast and no blocks in LRU"));
- struct st_my_thread_var *thread= my_thread_var;
+ KEYCACHE_DBUG_PRINT("find_block",
+ ("fast and no blocks in LRU"));
+ DBUG_RETURN(0);
+ }
+
+ thread= my_thread_var;
thread->keycache_link= (void *) hash_link;
wqueue_link_into_queue(&pagecache->waiting_for_block, thread);
do
@@ -2104,13 +2149,30 @@ restart:
}
else
{
+ DBUG_PRINT("XXX", ("take a block from LRU"));
/*
Take the first block from the LRU chain
unlinking it from the chain
*/
block= pagecache->used_last->next_used;
+ if (fast &&
+ ((block->status & (PCBLOCK_IN_FLUSH | PCBLOCK_CHANGED)) ||
+ (block->hash_link && block->hash_link != hash_link &&
+ block->hash_link->requests)))
+ {
+ DBUG_ASSERT(hash_link->requests == 0);
+ unlink_hash(pagecache, hash_link);
+ DBUG_PRINT("info", ("fast and LRU block is in switch or has "
+ "readers"));
+ KEYCACHE_DBUG_PRINT("find_block",
+ ("fast and LRU block is in switch or has "
+ "readers"));
+ DBUG_RETURN (0);
+ }
if (reg_req)
reg_requests(pagecache, block, 1);
+ DBUG_PRINT("XXX", ("hash_link (LRU): %p, hash_link: %p -> %p",
+ hash_link, hash_link->block, block));
hash_link->block= block;
DBUG_ASSERT(block->requests == 1);
}
@@ -2181,6 +2243,8 @@ restart:
link_to_file_list(pagecache, block, file,
(my_bool)(block->hash_link ? 1 : 0));
+ DBUG_PRINT("XXX", ("block (LRU): %p, hash_link: %p -> %p",
+ block, block->hash_link, hash_link));
block->hash_link= hash_link;
PCBLOCK_INFO(block);
block->hits_left= init_hits_left;
@@ -2665,8 +2729,221 @@ retry:
DBUG_ASSERT(block->hash_link->requests > 0);
block->hash_link->requests--;
DBUG_RETURN(1);
+}
+
+
+/**
+ @brief Reading of a big block in the S3 storage engine.
+
+ @param pagecache Page cache
+ @param block Block to read
+
+ @note
+
+ Page cache is segmented in logical blocks of size 'block_size'. All
+ read request are for blocks of 'block_size'.
+
+ When using a file with 'big blocks', the file is split into a
+ header, header size (for index information) and then blocks of
+ big_block_size. he last block may be smaller than big_block_size.
+ All 'big blocks' are a multiple of block_size.
+ The header is never read into the page cache. It's used to store
+ the table definition and status and is only read by open().
+
+ When wanting to read a block, we register a read request for that
+ block and for the first block that is part of the big block read. We
+ also put a special flag on the first block so that if another thread
+ would want to do a big block read, it will wait on signal, and then
+ check if the block it requested is now in the page cache. If it's
+ not in the cache it will retry.
+
+ After the big block is read, we will put all read block that was not in the
+ page cache. Blocks that where already in page cache will not be touched
+ and will not be added first in the FIFO.
+
+ The block for which we had a read request is added first in FIFO and
+ returned.
+*/
+
+#ifdef WITH_S3_STORAGE_ENGINE
+static my_bool read_big_block(PAGECACHE *pagecache,
+ PAGECACHE_BLOCK_LINK *block)
+{
+ int page_st;
+ size_t big_block_size_in_pages;
+ size_t offset;
+ pgcache_page_no_t page, our_page;
+ pgcache_page_no_t page_to_read;
+ PAGECACHE_BLOCK_LINK *block_to_read= NULL;
+ PAGECACHE_IO_HOOK_ARGS args;
+ S3_BLOCK data;
+ DBUG_ENTER("read_big_block");
+ DBUG_PRINT("enter", ("read BIG block: %p", block));
+ bzero((void*) &data, sizeof(data));
+
+ DBUG_ASSERT(block->hash_link->file.big_block_size %
+ pagecache->block_size == 0);
+ big_block_size_in_pages=
+ block->hash_link->file.big_block_size / pagecache->block_size;
+
+ our_page= block->hash_link->pageno;
+
+ /* find first page of the big block (page_to_read) */
+ page_to_read= ((block->hash_link->pageno -
+ block->hash_link->file.head_blocks) /
+ big_block_size_in_pages);
+ page_to_read= (page_to_read * big_block_size_in_pages +
+ block->hash_link->file.head_blocks);
+ if (page_to_read != our_page)
+ {
+ block_to_read= find_block(pagecache, &block->hash_link->file,
+ page_to_read, 1,
+ FALSE, TRUE /* copy under protection (?)*/,
+ TRUE /*register*/, FALSE, &page_st);
+ DBUG_ASSERT(block_to_read == block_to_read->hash_link->block);
+
+ if (block_to_read->status & PCBLOCK_ERROR)
+ {
+ /* We get first block with an error so all operation failed */
+ block->status|= PCBLOCK_ERROR;
+ block->error= block_to_read->error;
+ DBUG_RETURN(FALSE); // no retry
+ }
+ // only primary request here, PAGE_WAIT_TO_BE_READ is impossible
+ DBUG_ASSERT(page_st != PAGE_WAIT_TO_BE_READ);
+ if (block_to_read->status & PCBLOCK_BIG_READ)
+ {
+ struct st_my_thread_var *thread;
+ DBUG_ASSERT(page_st != PAGE_TO_BE_READ);
+ /*
+ Block read failed because somebody else is reading the first block
+ (and all other blocks part of this one).
+ Wait until block is available.
+ */
+ unreg_request(pagecache, block, 1);
+ thread= my_thread_var;
+ /* Put the request into a queue and wait until it can be processed */
+ wqueue_add_to_queue(&block->wqueue[COND_FOR_BIG_BLOCK], thread);
+ do
+ {
+ DBUG_PRINT("wait",
+ ("suspend thread %s %ld", thread->name,
+ (ulong) thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend,
+ &pagecache->cache_lock);
+ }
+ while (thread->next);
+ DBUG_RETURN(TRUE);
+ }
+ }
+ else
+ {
+ block_to_read= block;
+ page_st= PAGE_TO_BE_READ;
+ }
+
+ DBUG_ASSERT(!(block_to_read->status & PCBLOCK_BIG_READ));
+ // Mark the first page of a big block
+ block_to_read->status|= PCBLOCK_BIG_READ;
+
+ // Don't keep cache locked during the possible slow read from s3
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+
+ // perform read of big block
+ args.page= NULL;
+ args.pageno= page_to_read;
+ args.data= block->hash_link->file.callback_data;
+
+ if (pagecache->big_block_read(pagecache, &args, &block->hash_link->file,
+ &data))
+ {
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ block_to_read->status|= PCBLOCK_ERROR;
+ block->status|= PCBLOCK_ERROR;
+ block_to_read->error= block->error= (int16) my_errno;
+ pagecache->big_block_free(&data);
+ if (block_to_read != block)
+ {
+ remove_reader(block_to_read);
+ unreg_request(pagecache, block_to_read, 1);
+ }
+ DBUG_RETURN(FALSE); // no retry
+ }
+
+ /*
+ We need to keep the mutex locked while filling pages.
+ As there is no changed blocks to flush, this operation should
+ be reasonable fast
+ */
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+
+ /* Copy the first page to the cache */
+ if (page_st != PAGE_READ)
+ {
+ DBUG_ASSERT(page_st != PAGE_WAIT_TO_BE_READ);
+ memcpy(block_to_read->buffer, data.str, pagecache->block_size);
+ block_to_read->status|= PCBLOCK_READ;
+ }
+ else
+ DBUG_ASSERT(block_to_read->status & PCBLOCK_READ);
+ /* Copy the rest of the pages */
+ for (offset= pagecache->block_size, page= page_to_read + 1;
+ offset < data.length;
+ offset+= pagecache->block_size, page++)
+ {
+ DBUG_ASSERT(offset + pagecache->block_size <= data.length);
+ if (page == our_page)
+ {
+ DBUG_ASSERT(!(block->status & PCBLOCK_READ));
+ memcpy(block->buffer, data.str + offset, pagecache->block_size);
+ block->status|= PCBLOCK_READ;
+ }
+ else
+ {
+ PAGECACHE_BLOCK_LINK *bl;
+ bl= find_block(pagecache, &block->hash_link->file, page, 1,
+ FALSE, TRUE /* copy under protection (?)*/,
+ TRUE /*register*/, TRUE /*fast*/, &page_st);
+ if (!bl)
+ {
+ // we run out of easy avaliable pages in the cache
+ break;
+ }
+ DBUG_ASSERT(bl == bl->hash_link->block);
+ if ((bl->status & PCBLOCK_ERROR) == 0 &&
+ page_st == PAGE_TO_BE_READ)
+ {
+ memcpy(bl->buffer, data.str + offset, pagecache->block_size);
+ bl->status|= PCBLOCK_READ;
+ }
+ remove_reader(bl);
+ unreg_request(pagecache, bl, 1);
+ }
+ }
+ if (page < our_page)
+ {
+ /* we break earlier, but still have to fill page what was requested */
+ DBUG_ASSERT(!(block->status & PCBLOCK_READ));
+ memcpy(block->buffer,
+ data.str + ((our_page - page_to_read) * pagecache->block_size),
+ pagecache->block_size);
+ block->status|= PCBLOCK_READ;
+ }
+ pagecache->big_block_free(&data);
+
+ block_to_read->status&= ~PCBLOCK_BIG_READ;
+ if (block_to_read != block)
+ {
+ remove_reader(block_to_read);
+ unreg_request(pagecache, block_to_read, 1);
+ }
+ if (block->wqueue[COND_FOR_BIG_BLOCK].last_thread)
+ wqueue_release_queue(&block->wqueue[COND_FOR_BIG_BLOCK]);
+
+ DBUG_RETURN(FALSE);
}
+#endif /* WITH_S3_STORAGE_ENGINE */
/*
@@ -2861,7 +3138,7 @@ void pagecache_unlock(PAGECACHE *pagecache,
inc_counter_for_resize_op(pagecache);
/* See NOTE for pagecache_unlock about registering requests */
block= find_block(pagecache, file, pageno, 0, 0, 0,
- pin == PAGECACHE_PIN_LEFT_UNPINNED, &page_st);
+ pin == PAGECACHE_PIN_LEFT_UNPINNED, FALSE, &page_st);
PCBLOCK_INFO(block);
DBUG_ASSERT(block != 0 && page_st == PAGE_READ);
if (first_REDO_LSN_for_page)
@@ -2948,7 +3225,7 @@ void pagecache_unpin(PAGECACHE *pagecache,
inc_counter_for_resize_op(pagecache);
/* See NOTE for pagecache_unlock about registering requests */
- block= find_block(pagecache, file, pageno, 0, 0, 0, 0, &page_st);
+ block= find_block(pagecache, file, pageno, 0, 0, 0, 0, FALSE, &page_st);
DBUG_ASSERT(block != 0);
DBUG_ASSERT(page_st == PAGE_READ);
/* we can't unpin such page without unlock */
@@ -3349,7 +3626,7 @@ uchar *pagecache_read(PAGECACHE *pagecache,
char llbuf[22];
DBUG_ENTER("pagecache_read");
DBUG_PRINT("enter", ("fd: %u page: %s buffer: %p level: %u "
- "t:%s (%d)%s->%s %s->%s",
+ "t:%s (%d)%s->%s %s->%s big block: %d",
(uint) file->file, ullstr(pageno, llbuf),
buff, level,
page_cache_page_type_str[type],
@@ -3357,7 +3634,8 @@ uchar *pagecache_read(PAGECACHE *pagecache,
page_cache_page_lock_str[lock_to_read[lock].new_lock],
page_cache_page_lock_str[lock_to_read[lock].unlock_lock],
page_cache_page_pin_str[new_pin],
- page_cache_page_pin_str[unlock_pin]));
+ page_cache_page_pin_str[unlock_pin],
+ MY_TEST(pagecache->big_block_read)));
DBUG_ASSERT(buff != 0 || (buff == 0 && (unlock_pin == PAGECACHE_PIN ||
unlock_pin == PAGECACHE_PIN_LEFT_PINNED)));
DBUG_ASSERT(pageno < ((1ULL) << 40));
@@ -3369,6 +3647,14 @@ uchar *pagecache_read(PAGECACHE *pagecache,
restart:
+ /*
+ If we use big block than the big block is multiple of blocks and we
+ have enouch blocks in cache
+ */
+ DBUG_ASSERT(!pagecache->big_block_read ||
+ (file->big_block_size != 0 &&
+ file->big_block_size % pagecache->block_size == 0));
+
if (pagecache->can_be_used)
{
/* Key cache is used */
@@ -3387,19 +3673,45 @@ restart:
pagecache->global_cache_r_requests++;
/* See NOTE for pagecache_unlock about registering requests. */
reg_request= ((new_pin == PAGECACHE_PIN_LEFT_UNPINNED) ||
- (new_pin == PAGECACHE_PIN));
+ (new_pin == PAGECACHE_PIN) ||
+ pagecache->big_block_read);
block= find_block(pagecache, file, pageno, level,
lock == PAGECACHE_LOCK_WRITE, buff != 0,
- reg_request, &page_st);
+ reg_request, FALSE, &page_st);
DBUG_PRINT("info", ("Block type: %s current type %s",
page_cache_page_type_str[block->type],
page_cache_page_type_str[type]));
if (((block->status & PCBLOCK_ERROR) == 0) && (page_st != PAGE_READ))
{
- /* The requested page is to be read into the block buffer */
- read_block(pagecache, block,
- (my_bool)(page_st == PAGE_TO_BE_READ));
- DBUG_PRINT("info", ("read is done"));
+#ifdef WITH_S3_STORAGE_ENGINE
+ if (!pagecache->big_block_read)
+#endif /* WITH_S3_STORAGE_ENGINE */
+ {
+ /* The requested page is to be read into the block buffer */
+ read_block(pagecache, block, page_st == PAGE_TO_BE_READ);
+ DBUG_PRINT("info", ("read is done"));
+ }
+#ifdef WITH_S3_STORAGE_ENGINE
+ else
+ {
+ /* It is big read and this thread should read */
+ DBUG_ASSERT(page_st == PAGE_TO_BE_READ);
+
+ if (read_big_block(pagecache, block))
+ {
+ /* block is unregistered in read_big_block */
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ DBUG_PRINT("restart", ("big block fail, restarting..."));
+ goto restart;
+ }
+ if (!((new_pin == PAGECACHE_PIN_LEFT_UNPINNED) ||
+ (new_pin == PAGECACHE_PIN)))
+ {
+ /* we registered request only for big_block_read */
+ unreg_request(pagecache, block, 1);
+ }
+ }
+#endif /* WITH_S3_STORAGE_ENGINE */
}
/*
Assert after block is read. Imagine two concurrent SELECTs on same
@@ -3990,6 +4302,7 @@ my_bool pagecache_write_part(PAGECACHE *pagecache,
DBUG_ASSERT(lock != PAGECACHE_LOCK_READ_UNLOCK);
DBUG_ASSERT(offset + size <= pagecache->block_size);
DBUG_ASSERT(pageno < ((1ULL) << 40));
+ DBUG_ASSERT(pagecache->big_block_read == 0);
#endif
if (!page_link)
@@ -4026,7 +4339,7 @@ restart:
(pin == PAGECACHE_PIN));
block= find_block(pagecache, file, pageno, level,
TRUE, FALSE,
- reg_request, &page_st);
+ reg_request, FALSE, &page_st);
if (!block)
{
DBUG_ASSERT(write_mode != PAGECACHE_WRITE_DONE);
@@ -4278,6 +4591,8 @@ static my_bool free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
block->type= PAGECACHE_EMPTY_PAGE;
#endif
block->rec_lsn= LSN_MAX;
+ DBUG_PRINT("XXX", ("block (Free): %p, hash_link: %p -> NULL",
+ block, block->hash_link));
block->hash_link= NULL;
if (block->temperature == PCBLOCK_WARM)
pagecache->warm_blocks--;
@@ -5230,6 +5545,7 @@ static int pagecache_pthread_cond_wait(mysql_cond_t *cond,
#endif
#endif /* defined(PAGECACHE_TIMEOUT) && !defined(__WIN__) */
+
#if defined(PAGECACHE_DEBUG)
static int ___pagecache_pthread_mutex_lock(mysql_mutex_t *mutex)
{
@@ -5256,32 +5572,26 @@ static int ___pagecache_pthread_cond_signal(mysql_cond_t *cond)
}
-#if defined(PAGECACHE_DEBUG_LOG)
-
-
static void pagecache_debug_print(const char * fmt, ...)
{
va_list args;
va_start(args,fmt);
if (pagecache_debug_log)
{
- VOID(vfprintf(pagecache_debug_log, fmt, args));
- VOID(fputc('\n',pagecache_debug_log));
+ vfprintf(pagecache_debug_log, fmt, args);
+ fputc('\n',pagecache_debug_log);
+#ifdef PAGECACHE_DEBUG_DLOG
+ _db_doprnt_(fmt, args);
+#endif
}
va_end(args);
}
-#endif /* defined(PAGECACHE_DEBUG_LOG) */
-
-#if defined(PAGECACHE_DEBUG_LOG)
-
void pagecache_debug_log_close(void)
{
if (pagecache_debug_log)
fclose(pagecache_debug_log);
}
-#endif /* defined(PAGECACHE_DEBUG_LOG) */
-
#endif /* defined(PAGECACHE_DEBUG) */
/**
@@ -5307,8 +5617,7 @@ static void null_post_write_hook(int res __attribute__((unused)),
return;
}
-void
-pagecache_file_set_null_hooks(PAGECACHE_FILE *file)
+void pagecache_file_set_null_hooks(PAGECACHE_FILE *file)
{
file->pre_read_hook= null_pre_hook;
file->post_read_hook= null_post_read_hook;
@@ -5316,4 +5625,5 @@ pagecache_file_set_null_hooks(PAGECACHE_FILE *file)
file->post_write_hook= null_post_write_hook;
file->flush_log_callback= null_pre_hook;
file->callback_data= NULL;
+ file->head_blocks= file->big_block_size= 0;
}