summaryrefslogtreecommitdiff
path: root/storage/ndb/src/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'storage/ndb/src/kernel')
-rw-r--r--storage/ndb/src/kernel/blocks/ERROR_codes.txt3
-rw-r--r--storage/ndb/src/kernel/blocks/backup/Backup.cpp84
-rw-r--r--storage/ndb/src/kernel/blocks/backup/Backup.hpp3
-rw-r--r--storage/ndb/src/kernel/blocks/backup/BackupFormat.hpp10
-rw-r--r--storage/ndb/src/kernel/blocks/backup/BackupInit.cpp7
-rw-r--r--storage/ndb/src/kernel/blocks/backup/FsBuffer.hpp24
-rw-r--r--storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp8
-rw-r--r--storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp8
-rw-r--r--storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp23
-rw-r--r--storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp21
-rw-r--r--storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp298
-rw-r--r--storage/ndb/src/kernel/blocks/dblqh/Makefile.am2
-rw-r--r--storage/ndb/src/kernel/blocks/dblqh/redoLogReader/reader.cpp (renamed from storage/ndb/src/kernel/blocks/dblqh/redoLogReader/redoLogFileReader.cpp)0
-rw-r--r--storage/ndb/src/kernel/blocks/dbtup/Dbtup.hpp4
-rw-r--r--storage/ndb/src/kernel/blocks/dbtup/DbtupDebug.cpp47
-rw-r--r--storage/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp18
-rw-r--r--storage/ndb/src/kernel/blocks/dbtup/DbtupPagMan.cpp61
-rw-r--r--storage/ndb/src/kernel/blocks/dbtup/DbtupPageMap.cpp5
-rw-r--r--storage/ndb/src/kernel/blocks/dbtup/DbtupScan.cpp64
-rw-r--r--storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp45
-rw-r--r--storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp151
-rw-r--r--storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.hpp4
-rw-r--r--storage/ndb/src/kernel/blocks/ndbfs/Ndbfs.cpp2
-rw-r--r--storage/ndb/src/kernel/blocks/pgman.cpp104
-rw-r--r--storage/ndb/src/kernel/blocks/pgman.hpp6
-rw-r--r--storage/ndb/src/kernel/blocks/restore.cpp3
-rw-r--r--storage/ndb/src/kernel/vm/Configuration.cpp12
-rw-r--r--storage/ndb/src/kernel/vm/Configuration.hpp1
-rw-r--r--storage/ndb/src/kernel/vm/SimulatedBlock.cpp48
-rw-r--r--storage/ndb/src/kernel/vm/SimulatedBlock.hpp8
-rw-r--r--storage/ndb/src/kernel/vm/WatchDog.cpp152
31 files changed, 915 insertions, 311 deletions
diff --git a/storage/ndb/src/kernel/blocks/ERROR_codes.txt b/storage/ndb/src/kernel/blocks/ERROR_codes.txt
index b3405679978..67eb89f850f 100644
--- a/storage/ndb/src/kernel/blocks/ERROR_codes.txt
+++ b/storage/ndb/src/kernel/blocks/ERROR_codes.txt
@@ -1,5 +1,5 @@
Next QMGR 1
-Next NDBCNTR 1001
+Next NDBCNTR 1002
Next NDBFS 2000
Next DBACC 3002
Next DBTUP 4029
@@ -523,3 +523,4 @@ Dbtup:
NDBCNTR:
1000: Crash insertion on SystemError::CopyFragRef
+1001: Delay sending NODE_FAILREP (to own node), until error is cleared
diff --git a/storage/ndb/src/kernel/blocks/backup/Backup.cpp b/storage/ndb/src/kernel/blocks/backup/Backup.cpp
index d86c22024cd..64e2c41aa69 100644
--- a/storage/ndb/src/kernel/blocks/backup/Backup.cpp
+++ b/storage/ndb/src/kernel/blocks/backup/Backup.cpp
@@ -448,6 +448,41 @@ Backup::execDUMP_STATE_ORD(Signal* signal)
filePtr.p->m_flags);
}
}
+
+ ndbout_c("m_curr_disk_write_speed: %u m_words_written_this_period: %u m_overflow_disk_write: %u",
+ m_curr_disk_write_speed, m_words_written_this_period, m_overflow_disk_write);
+ ndbout_c("m_reset_delay_used: %u m_reset_disk_speed_time: %llu",
+ m_reset_delay_used, (Uint64)m_reset_disk_speed_time);
+ for(c_backups.first(ptr); ptr.i != RNIL; c_backups.next(ptr))
+ {
+ ndbout_c("BackupRecord %u: BackupId: %u MasterRef: %x ClientRef: %x",
+ ptr.i, ptr.p->backupId, ptr.p->masterRef, ptr.p->clientRef);
+ ndbout_c(" State: %u", ptr.p->slaveState.getState());
+ ndbout_c(" noOfByte: %llu noOfRecords: %llu",
+ ptr.p->noOfBytes, ptr.p->noOfRecords);
+ ndbout_c(" noOfLogBytes: %llu noOfLogRecords: %llu",
+ ptr.p->noOfLogBytes, ptr.p->noOfLogRecords);
+ ndbout_c(" errorCode: %u", ptr.p->errorCode);
+ BackupFilePtr filePtr;
+ for(ptr.p->files.first(filePtr); filePtr.i != RNIL;
+ ptr.p->files.next(filePtr))
+ {
+ ndbout_c(" file %u: type: %u flags: H'%x tableId: %u fragmentId: %u",
+ filePtr.i, filePtr.p->fileType, filePtr.p->m_flags,
+ filePtr.p->tableId, filePtr.p->fragmentNo);
+ }
+ if (ptr.p->slaveState.getState() == SCANNING && ptr.p->dataFilePtr != RNIL)
+ {
+ c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr);
+ OperationRecord & op = filePtr.p->operation;
+ Uint32 *tmp = NULL;
+ Uint32 sz = 0;
+ bool eof = FALSE;
+ bool ready = op.dataBuffer.getReadPtr(&tmp, &sz, &eof);
+ ndbout_c("ready: %s eof: %s", ready ? "TRUE" : "FALSE", eof ? "TRUE" : "FALSE");
+ }
+ }
+ return;
}
if(signal->theData[0] == 24){
/**
@@ -2771,6 +2806,8 @@ Backup::openFiles(Signal* signal, BackupRecordPtr ptr)
c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr);
filePtr.p->m_flags |= BackupFile::BF_OPENING;
+ if (c_defaults.m_o_direct)
+ req->fileFlags |= FsOpenReq::OM_DIRECT;
req->userPointer = filePtr.i;
FsOpenReq::setVersion(req->fileNumber, 2);
FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_DATA);
@@ -3745,12 +3782,31 @@ Backup::OperationRecord::newFragment(Uint32 tableId, Uint32 fragNo)
}
bool
-Backup::OperationRecord::fragComplete(Uint32 tableId, Uint32 fragNo)
+Backup::OperationRecord::fragComplete(Uint32 tableId, Uint32 fragNo, bool fill_record)
{
Uint32 * tmp;
const Uint32 footSz = sizeof(BackupFormat::DataFile::FragmentFooter) >> 2;
+ Uint32 sz = footSz + 1;
- if(dataBuffer.getWritePtr(&tmp, footSz + 1)) {
+ if (fill_record)
+ {
+ Uint32 * new_tmp;
+ if (!dataBuffer.getWritePtr(&tmp, sz))
+ return false;
+ new_tmp = tmp + sz;
+
+ if ((UintPtr)new_tmp & (sizeof(Page32)-1))
+ {
+ /* padding is needed to get full write */
+ new_tmp += 2 /* to fit empty header minimum 2 words*/;
+ new_tmp = (Uint32 *)(((UintPtr)new_tmp + sizeof(Page32)-1) &
+ ~(UintPtr)(sizeof(Page32)-1));
+ /* new write sz */
+ sz = new_tmp - tmp;
+ }
+ }
+
+ if(dataBuffer.getWritePtr(&tmp, sz)) {
jam();
* tmp = 0; // Finish record stream
tmp++;
@@ -3762,7 +3818,17 @@ Backup::OperationRecord::fragComplete(Uint32 tableId, Uint32 fragNo)
foot->FragmentNo = htonl(fragNo);
foot->NoOfRecords = htonl(noOfRecords);
foot->Checksum = htonl(0);
- dataBuffer.updateWritePtr(footSz + 1);
+
+ if (sz != footSz + 1)
+ {
+ tmp += footSz;
+ memset(tmp, 0, (sz - footSz - 1) * 4);
+ *tmp = htonl(BackupFormat::EMPTY_ENTRY);
+ tmp++;
+ *tmp = htonl(sz - footSz - 1);
+ }
+
+ dataBuffer.updateWritePtr(sz);
return true;
}//if
return false;
@@ -3864,8 +3930,13 @@ Backup::fragmentCompleted(Signal* signal, BackupFilePtr filePtr)
return;
}//if
+ BackupRecordPtr ptr LINT_SET_PTR;
+ c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
+
OperationRecord & op = filePtr.p->operation;
- if(!op.fragComplete(filePtr.p->tableId, filePtr.p->fragmentNo)) {
+ if(!op.fragComplete(filePtr.p->tableId, filePtr.p->fragmentNo,
+ c_defaults.m_o_direct))
+ {
jam();
signal->theData[0] = BackupContinueB::BUFFER_FULL_FRAG_COMPLETE;
signal->theData[1] = filePtr.i;
@@ -3875,9 +3946,6 @@ Backup::fragmentCompleted(Signal* signal, BackupFilePtr filePtr)
filePtr.p->m_flags &= ~(Uint32)BackupFile::BF_SCAN_THREAD;
- BackupRecordPtr ptr LINT_SET_PTR;
- c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
-
if (ptr.p->is_lcp())
{
ptr.p->slaveState.setState(STOPPING);
@@ -4914,6 +4982,8 @@ Backup::lcp_open_file(Signal* signal, BackupRecordPtr ptr)
FsOpenReq::OM_CREATE |
FsOpenReq::OM_APPEND |
FsOpenReq::OM_AUTOSYNC;
+ if (c_defaults.m_o_direct)
+ req->fileFlags |= FsOpenReq::OM_DIRECT;
FsOpenReq::v2_setCount(req->fileNumber, 0xFFFFFFFF);
req->auto_sync_size = c_defaults.m_disk_synch_size;
diff --git a/storage/ndb/src/kernel/blocks/backup/Backup.hpp b/storage/ndb/src/kernel/blocks/backup/Backup.hpp
index 32f2e14ac92..3fd9b2967fd 100644
--- a/storage/ndb/src/kernel/blocks/backup/Backup.hpp
+++ b/storage/ndb/src/kernel/blocks/backup/Backup.hpp
@@ -240,7 +240,7 @@ public:
* Once per fragment
*/
bool newFragment(Uint32 tableId, Uint32 fragNo);
- bool fragComplete(Uint32 tableId, Uint32 fragNo);
+ bool fragComplete(Uint32 tableId, Uint32 fragNo, bool fill_record);
/**
* Once per scan frag (next) req/conf
@@ -534,6 +534,7 @@ public:
Uint32 m_disk_write_speed;
Uint32 m_disk_synch_size;
Uint32 m_diskless;
+ Uint32 m_o_direct;
};
/**
diff --git a/storage/ndb/src/kernel/blocks/backup/BackupFormat.hpp b/storage/ndb/src/kernel/blocks/backup/BackupFormat.hpp
index ace9dfe5c79..20f8f6650be 100644
--- a/storage/ndb/src/kernel/blocks/backup/BackupFormat.hpp
+++ b/storage/ndb/src/kernel/blocks/backup/BackupFormat.hpp
@@ -32,7 +32,8 @@ struct BackupFormat {
TABLE_LIST = 4,
TABLE_DESCRIPTION = 5,
GCP_ENTRY = 6,
- FRAGMENT_INFO = 7
+ FRAGMENT_INFO = 7,
+ EMPTY_ENTRY = 8
};
struct FileHeader {
@@ -93,6 +94,13 @@ struct BackupFormat {
Uint32 NoOfRecords;
Uint32 Checksum;
};
+
+ /* optional padding for O_DIRECT */
+ struct EmptyEntry {
+ Uint32 SectionType;
+ Uint32 SectionLength;
+ /* not used data */
+ };
};
/**
diff --git a/storage/ndb/src/kernel/blocks/backup/BackupInit.cpp b/storage/ndb/src/kernel/blocks/backup/BackupInit.cpp
index 4faa02e494f..2cd2a8a2bee 100644
--- a/storage/ndb/src/kernel/blocks/backup/BackupInit.cpp
+++ b/storage/ndb/src/kernel/blocks/backup/BackupInit.cpp
@@ -148,10 +148,13 @@ Backup::execREAD_CONFIG_REQ(Signal* signal)
c_defaults.m_disk_write_speed = 10 * (1024 * 1024);
c_defaults.m_disk_write_speed_sr = 100 * (1024 * 1024);
c_defaults.m_disk_synch_size = 4 * (1024 * 1024);
-
+ c_defaults.m_o_direct = true;
+
Uint32 noBackups = 0, noTables = 0, noAttribs = 0, noFrags = 0;
ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_DISCLESS,
&c_defaults.m_diskless));
+ ndb_mgm_get_int_parameter(p, CFG_DB_O_DIRECT,
+ &c_defaults.m_o_direct);
ndb_mgm_get_int_parameter(p, CFG_DB_CHECKPOINT_SPEED_SR,
&c_defaults.m_disk_write_speed_sr);
ndb_mgm_get_int_parameter(p, CFG_DB_CHECKPOINT_SPEED,
@@ -204,7 +207,7 @@ Backup::execREAD_CONFIG_REQ(Signal* signal)
/ sizeof(Page32);
// We need to allocate an additional of 2 pages. 1 page because of a bug in
// ArrayPool and another one for DICTTAINFO.
- c_pagePool.setSize(noPages + NO_OF_PAGES_META_FILE + 2);
+ c_pagePool.setSize(noPages + NO_OF_PAGES_META_FILE + 2, true);
{ // Init all tables
SLList<Table> tables(c_tablePool);
diff --git a/storage/ndb/src/kernel/blocks/backup/FsBuffer.hpp b/storage/ndb/src/kernel/blocks/backup/FsBuffer.hpp
index d26f36ccf40..bb0bbd6d770 100644
--- a/storage/ndb/src/kernel/blocks/backup/FsBuffer.hpp
+++ b/storage/ndb/src/kernel/blocks/backup/FsBuffer.hpp
@@ -270,8 +270,8 @@ FsBuffer::getReadPtr(Uint32 ** ptr, Uint32 * sz, bool * _eof){
* ptr = &Tp[Tr];
- DEBUG(ndbout_c("getReadPtr() Tr: %d Tw: %d Ts: %d Tm: %d sz1: %d -> %d",
- Tr, Tw, Ts, Tm, sz1, * sz));
+ DEBUG(ndbout_c("getReadPtr() Tr: %d Tmw: %d Ts: %d Tm: %d sz1: %d -> %d",
+ Tr, Tmw, Ts, Tm, sz1, * sz));
return true;
}
@@ -279,8 +279,8 @@ FsBuffer::getReadPtr(Uint32 ** ptr, Uint32 * sz, bool * _eof){
if(!m_eof){
* _eof = false;
- DEBUG(ndbout_c("getReadPtr() Tr: %d Tw: %d Ts: %d Tm: %d sz1: %d -> false",
- Tr, Tw, Ts, Tm, sz1));
+ DEBUG(ndbout_c("getReadPtr() Tr: %d Tmw: %d Ts: %d Tm: %d sz1: %d -> false",
+ Tr, Tmw, Ts, Tm, sz1));
return false;
}
@@ -289,8 +289,8 @@ FsBuffer::getReadPtr(Uint32 ** ptr, Uint32 * sz, bool * _eof){
* _eof = true;
* ptr = &Tp[Tr];
- DEBUG(ndbout_c("getReadPtr() Tr: %d Tw: %d Ts: %d Tm: %d sz1: %d -> %d eof",
- Tr, Tw, Ts, Tm, sz1, * sz));
+ DEBUG(ndbout_c("getReadPtr() Tr: %d Tmw: %d Ts: %d Tm: %d sz1: %d -> %d eof",
+ Tr, Tmw, Ts, Tm, sz1, * sz));
return false;
}
@@ -316,13 +316,13 @@ FsBuffer::getWritePtr(Uint32 ** ptr, Uint32 sz){
if(sz1 > sz){ // Note at least 1 word of slack
* ptr = &Tp[Tw];
- DEBUG(ndbout_c("getWritePtr(%d) Tr: %d Tw: %d Ts: %d sz1: %d -> true",
- sz, Tr, Tw, Ts, sz1));
+ DEBUG(ndbout_c("getWritePtr(%d) Tw: %d sz1: %d -> true",
+ sz, Tw, sz1));
return true;
}
- DEBUG(ndbout_c("getWritePtr(%d) Tr: %d Tw: %d Ts: %d sz1: %d -> false",
- sz, Tr, Tw, Ts, sz1));
+ DEBUG(ndbout_c("getWritePtr(%d) Tw: %d sz1: %d -> false",
+ sz, Tw, sz1));
return false;
}
@@ -339,11 +339,15 @@ FsBuffer::updateWritePtr(Uint32 sz){
m_free -= sz;
if(Tnew < Ts){
m_writeIndex = Tnew;
+ DEBUG(ndbout_c("updateWritePtr(%d) m_writeIndex: %d",
+ sz, m_writeIndex));
return;
}
memcpy(Tp, &Tp[Ts], (Tnew - Ts) << 2);
m_writeIndex = Tnew - Ts;
+ DEBUG(ndbout_c("updateWritePtr(%d) m_writeIndex: %d",
+ sz, m_writeIndex));
}
inline
diff --git a/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp b/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp
index ac3acdc6778..edc8c0131db 100644
--- a/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp
+++ b/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp
@@ -698,6 +698,9 @@ void Dbdict::execFSCLOSECONF(Signal* signal)
case FsConnectRecord::OPEN_READ_SCHEMA2:
openSchemaFile(signal, 1, fsPtr.i, false, false);
break;
+ case FsConnectRecord::OPEN_READ_TAB_FILE2:
+ openTableFile(signal, 1, fsPtr.i, c_readTableRecord.tableId, false);
+ break;
default:
jamLine((fsPtr.p->fsState & 0xFFF));
ndbrequire(false);
@@ -1073,8 +1076,11 @@ void Dbdict::readTableConf(Signal* signal,
void Dbdict::readTableRef(Signal* signal,
FsConnectRecordPtr fsPtr)
{
+ /**
+ * First close corrupt file
+ */
fsPtr.p->fsState = FsConnectRecord::OPEN_READ_TAB_FILE2;
- openTableFile(signal, 1, fsPtr.i, c_readTableRecord.tableId, false);
+ closeFile(signal, fsPtr.p->filePtr, fsPtr.i);
return;
}//Dbdict::readTableRef()
diff --git a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
index 1fe932aaae8..bc14eec1f98 100644
--- a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
+++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
@@ -4741,12 +4741,18 @@ void Dbdih::failedNodeLcpHandling(Signal* signal, NodeRecordPtr failedNodePtr)
jam();
const Uint32 nodeId = failedNodePtr.i;
- if (c_lcpState.m_participatingLQH.get(failedNodePtr.i)){
+ if (isMaster() && c_lcpState.m_participatingLQH.get(failedNodePtr.i))
+ {
/*----------------------------------------------------*/
/* THE NODE WAS INVOLVED IN A LOCAL CHECKPOINT. WE */
/* MUST UPDATE THE ACTIVE STATUS TO INDICATE THAT */
/* THE NODE HAVE MISSED A LOCAL CHECKPOINT. */
/*----------------------------------------------------*/
+
+ /**
+ * Bug#28717, Only master should do this, as this status is copied
+ * to other nodes
+ */
switch (failedNodePtr.p->activeStatus) {
case Sysfile::NS_Active:
jam();
diff --git a/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp b/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp
index ba146fce005..64d214d472b 100644
--- a/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp
+++ b/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp
@@ -71,7 +71,6 @@ class Dbtup;
/* CONSTANTS OF THE LOG PAGES */
/* ------------------------------------------------------------------------- */
#define ZPAGE_HEADER_SIZE 32
-#define ZNO_MBYTES_IN_FILE 16
#define ZPAGE_SIZE 8192
#define ZPAGES_IN_MBYTE 32
#define ZTWOLOG_NO_PAGES_IN_MBYTE 5
@@ -115,9 +114,6 @@ class Dbtup;
/* ------------------------------------------------------------------------- */
/* VARIOUS CONSTANTS USED AS FLAGS TO THE FILE MANAGER. */
/* ------------------------------------------------------------------------- */
-#define ZOPEN_READ 0
-#define ZOPEN_WRITE 1
-#define ZOPEN_READ_WRITE 2
#define ZVAR_NO_LOG_PAGE_WORD 1
#define ZLIST_OF_PAIRS 0
#define ZLIST_OF_PAIRS_SYNCH 16
@@ -142,7 +138,7 @@ class Dbtup;
/* IN THE MBYTE. */
/* ------------------------------------------------------------------------- */
#define ZFD_HEADER_SIZE 3
-#define ZFD_PART_SIZE 48
+#define ZFD_MBYTE_SIZE 3
#define ZLOG_HEAD_SIZE 8
#define ZNEXT_LOG_SIZE 2
#define ZABORT_LOG_SIZE 3
@@ -169,7 +165,6 @@ class Dbtup;
#define ZPOS_LOG_TYPE 0
#define ZPOS_NO_FD 1
#define ZPOS_FILE_NO 2
-#define ZMAX_LOG_FILES_IN_PAGE_ZERO 40
/* ------------------------------------------------------------------------- */
/* THE POSITIONS WITHIN A PREPARE LOG RECORD AND A NEW PREPARE */
/* LOG RECORD. */
@@ -1437,17 +1432,17 @@ public:
* header of each log file. That information is used during
* system restart to find the tail of the log.
*/
- UintR logLastPrepRef[16];
+ UintR *logLastPrepRef;
/**
* The max global checkpoint completed before the mbyte in the
* log file was started. One variable per mbyte.
*/
- UintR logMaxGciCompleted[16];
+ UintR *logMaxGciCompleted;
/**
* The max global checkpoint started before the mbyte in the log
* file was started. One variable per mbyte.
*/
- UintR logMaxGciStarted[16];
+ UintR *logMaxGciStarted;
/**
* This variable contains the file name as needed by the file
* system when opening the file.
@@ -2163,6 +2158,7 @@ private:
void execSTART_RECREF(Signal* signal);
void execGCP_SAVEREQ(Signal* signal);
+ void execFSOPENREF(Signal* signal);
void execFSOPENCONF(Signal* signal);
void execFSCLOSECONF(Signal* signal);
void execFSWRITECONF(Signal* signal);
@@ -2671,6 +2667,8 @@ private:
LogPartRecord *logPartRecord;
LogPartRecordPtr logPartPtr;
UintR clogPartFileSize;
+ Uint32 clogFileSize; // In MBYTE
+ Uint32 cmaxLogFilesInPageZero; //
// Configurable
LogFileRecord *logFileRecord;
@@ -2678,13 +2676,15 @@ private:
UintR cfirstfreeLogFile;
UintR clogFileFileSize;
-#define ZLFO_FILE_SIZE 256 /* MAX 256 OUTSTANDING FILE OPERATIONS */
+#define ZLFO_MIN_FILE_SIZE 256
+// RedoBuffer/32K minimum ZLFO_MIN_FILE_SIZE
LogFileOperationRecord *logFileOperationRecord;
LogFileOperationRecordPtr lfoPtr;
UintR cfirstfreeLfo;
UintR clfoFileSize;
LogPageRecord *logPageRecord;
+ void *logPageRecordUnaligned;
LogPageRecordPtr logPagePtr;
UintR cfirstfreeLogPage;
UintR clogPageFileSize;
@@ -2695,7 +2695,7 @@ private:
UintR cfirstfreePageRef;
UintR cpageRefFileSize;
-#define ZSCANREC_FILE_SIZE 100
+// Configurable
ArrayPool<ScanRecord> c_scanRecordPool;
ScanRecordPtr scanptr;
UintR cscanNoFreeRec;
@@ -2888,6 +2888,7 @@ private:
UintR ctransidHash[1024];
Uint32 c_diskless;
+ Uint32 c_o_direct;
Uint32 c_error_insert_table_id;
public:
diff --git a/storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp b/storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp
index c054c227c8e..d6411ee1cb9 100644
--- a/storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp
+++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp
@@ -30,11 +30,11 @@ void Dblqh::initData()
cgcprecFileSize = ZGCPREC_FILE_SIZE;
chostFileSize = MAX_NDB_NODES;
clcpFileSize = ZNO_CONCURRENT_LCP;
- clfoFileSize = ZLFO_FILE_SIZE;
+ clfoFileSize = 0;
clogFileFileSize = 0;
clogPartFileSize = ZLOG_PART_FILE_SIZE;
cpageRefFileSize = ZPAGE_REF_FILE_SIZE;
- cscanrecFileSize = ZSCANREC_FILE_SIZE;
+ cscanrecFileSize = 0;
ctabrecFileSize = 0;
ctcConnectrecFileSize = 0;
ctcNodeFailrecFileSize = MAX_NDB_NODES;
@@ -49,6 +49,7 @@ void Dblqh::initData()
logFileRecord = 0;
logFileOperationRecord = 0;
logPageRecord = 0;
+ logPageRecordUnaligned= 0;
pageRefRecord = 0;
tablerec = 0;
tcConnectionrec = 0;
@@ -60,6 +61,8 @@ void Dblqh::initData()
cLqhTimeOutCheckCount = 0;
cbookedAccOps = 0;
m_backup_ptr = RNIL;
+ clogFileSize = 16;
+ cmaxLogFilesInPageZero = 40;
}//Dblqh::initData()
void Dblqh::initRecords()
@@ -105,10 +108,13 @@ void Dblqh::initRecords()
sizeof(LogFileOperationRecord),
clfoFileSize);
- logPageRecord = (LogPageRecord*)allocRecord("LogPageRecord",
- sizeof(LogPageRecord),
- clogPageFileSize,
- false);
+ logPageRecord =
+ (LogPageRecord*)allocRecordAligned("LogPageRecord",
+ sizeof(LogPageRecord),
+ clogPageFileSize,
+ &logPageRecordUnaligned,
+ NDB_O_DIRECT_WRITE_ALIGNMENT,
+ false);
pageRefRecord = (PageRefRecord*)allocRecord("PageRefRecord",
sizeof(PageRefRecord),
@@ -260,6 +266,7 @@ Dblqh::Dblqh(Block_context& ctx):
addRecSignal(GSN_START_FRAGREQ, &Dblqh::execSTART_FRAGREQ);
addRecSignal(GSN_START_RECREF, &Dblqh::execSTART_RECREF);
addRecSignal(GSN_GCP_SAVEREQ, &Dblqh::execGCP_SAVEREQ);
+ addRecSignal(GSN_FSOPENREF, &Dblqh::execFSOPENREF, true);
addRecSignal(GSN_FSOPENCONF, &Dblqh::execFSOPENCONF);
addRecSignal(GSN_FSCLOSECONF, &Dblqh::execFSCLOSECONF);
addRecSignal(GSN_FSWRITECONF, &Dblqh::execFSWRITECONF);
@@ -377,7 +384,7 @@ Dblqh::~Dblqh()
sizeof(LogFileOperationRecord),
clfoFileSize);
- deallocRecord((void**)&logPageRecord,
+ deallocRecord((void**)&logPageRecordUnaligned,
"LogPageRecord",
sizeof(LogPageRecord),
clogPageFileSize);
diff --git a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
index 2ffed9749b8..8f42a8039d8 100644
--- a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
+++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
@@ -1023,6 +1023,11 @@ void Dblqh::execREAD_CONFIG_REQ(Signal* signal)
clogPageFileSize+= (16 - mega_byte_part);
}
+ /* maximum number of log file operations */
+ clfoFileSize = clogPageFileSize;
+ if (clfoFileSize < ZLFO_MIN_FILE_SIZE)
+ clfoFileSize = ZLFO_MIN_FILE_SIZE;
+
ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_LQH_TABLE, &ctabrecFileSize));
ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_LQH_TC_CONNECT,
&ctcConnectrecFileSize));
@@ -1031,14 +1036,44 @@ void Dblqh::execREAD_CONFIG_REQ(Signal* signal)
cmaxAccOps = cscanrecFileSize * MAX_PARALLEL_OP_PER_SCAN;
ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_DISCLESS, &c_diskless));
+ c_o_direct = true;
+ ndb_mgm_get_int_parameter(p, CFG_DB_O_DIRECT, &c_o_direct);
Uint32 tmp= 0;
ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_LQH_FRAG, &tmp));
c_fragment_pool.setSize(tmp);
+ if (!ndb_mgm_get_int_parameter(p, CFG_DB_REDOLOG_FILE_SIZE,
+ &clogFileSize))
+ {
+ // convert to mbyte
+ clogFileSize = (clogFileSize + 1024*1024 - 1) / (1024 * 1024);
+ ndbrequire(clogFileSize >= 4 && clogFileSize <= 1024);
+ }
+
+ cmaxLogFilesInPageZero = (ZPAGE_SIZE - ZPAGE_HEADER_SIZE - 128) /
+ (ZFD_MBYTE_SIZE * clogFileSize);
+
+ /**
+ * "Old" cmaxLogFilesInPageZero was 40
+ * Each FD need 3 words per mb, require that they can fit into 1 page
+ * (atleast 1 FD)
+ * Is also checked in ConfigInfo.cpp (max FragmentLogFileSize = 1Gb)
+ * 1Gb = 1024Mb => 3(ZFD_MBYTE_SIZE) * 1024 < 8192 (ZPAGE_SIZE)
+ */
+ if (cmaxLogFilesInPageZero > 40)
+ {
+ jam();
+ cmaxLogFilesInPageZero = 40;
+ }
+ else
+ {
+ ndbrequire(cmaxLogFilesInPageZero);
+ }
+
initRecords();
initialiseRecordsLab(signal, 0, ref, senderData);
-
+
return;
}//Dblqh::execSIZEALT_REP()
@@ -11788,9 +11823,9 @@ void Dblqh::sendStartLcp(Signal* signal)
Uint32 Dblqh::remainingLogSize(const LogFileRecordPtr &sltCurrLogFilePtr,
const LogPartRecordPtr &sltLogPartPtr)
{
- Uint32 hf = sltCurrLogFilePtr.p->fileNo*ZNO_MBYTES_IN_FILE+sltCurrLogFilePtr.p->currentMbyte;
- Uint32 tf = sltLogPartPtr.p->logTailFileNo*ZNO_MBYTES_IN_FILE+sltLogPartPtr.p->logTailMbyte;
- Uint32 sz = sltLogPartPtr.p->noLogFiles*ZNO_MBYTES_IN_FILE;
+ Uint32 hf = sltCurrLogFilePtr.p->fileNo*clogFileSize+sltCurrLogFilePtr.p->currentMbyte;
+ Uint32 tf = sltLogPartPtr.p->logTailFileNo*clogFileSize+sltLogPartPtr.p->logTailMbyte;
+ Uint32 sz = sltLogPartPtr.p->noLogFiles*clogFileSize;
if (tf > hf) hf += sz;
return sz-(hf-tf);
}
@@ -11848,7 +11883,7 @@ void Dblqh::setLogTail(Signal* signal, Uint32 keepGci)
/* ------------------------------------------------------------------------- */
SLT_LOOP:
for (tsltIndex = tsltStartMbyte;
- tsltIndex <= ZNO_MBYTES_IN_FILE - 1;
+ tsltIndex <= clogFileSize - 1;
tsltIndex++) {
if (sltLogFilePtr.p->logMaxGciStarted[tsltIndex] >= keepGci) {
/* ------------------------------------------------------------------------- */
@@ -11864,7 +11899,7 @@ void Dblqh::setLogTail(Signal* signal, Uint32 keepGci)
/* ------------------------------------------------------------------------- */
/*STEPPING BACK INCLUDES ALSO STEPPING BACK TO THE PREVIOUS LOG FILE. */
/* ------------------------------------------------------------------------- */
- tsltMbyte = ZNO_MBYTES_IN_FILE - 1;
+ tsltMbyte = clogFileSize - 1;
sltLogFilePtr.i = sltLogFilePtr.p->prevLogFile;
ptrCheckGuard(sltLogFilePtr, clogFileFileSize, logFileRecord);
}//if
@@ -11902,7 +11937,7 @@ void Dblqh::setLogTail(Signal* signal, Uint32 keepGci)
UintR ToldTailFileNo = sltLogPartPtr.p->logTailFileNo;
UintR ToldTailMByte = sltLogPartPtr.p->logTailMbyte;
- arrGuard(tsltMbyte, 16);
+ arrGuard(tsltMbyte, clogFileSize);
sltLogPartPtr.p->logTailFileNo =
sltLogFilePtr.p->logLastPrepRef[tsltMbyte] >> 16;
/* ------------------------------------------------------------------------- */
@@ -12402,6 +12437,26 @@ void Dblqh::execFSOPENCONF(Signal* signal)
}//switch
}//Dblqh::execFSOPENCONF()
+void
+Dblqh::execFSOPENREF(Signal* signal)
+{
+ jamEntry();
+ FsRef* ref = (FsRef*)signal->getDataPtr();
+ Uint32 err = ref->errorCode;
+ if (err == FsRef::fsErrInvalidFileSize)
+ {
+ char buf[256];
+ BaseString::snprintf(buf, sizeof(buf),
+ "Invalid file size for redo logfile, "
+ " size only changable with --initial");
+ progError(__LINE__,
+ NDBD_EXIT_INVALID_CONFIG,
+ buf);
+ return;
+ }
+
+ SimulatedBlock::execFSOPENREF(signal);
+}
/* ************>> */
/* FSREADCONF > */
@@ -13047,7 +13102,7 @@ void Dblqh::openFileInitLab(Signal* signal)
{
logFilePtr.p->logFileStatus = LogFileRecord::OPEN_INIT;
seizeLogpage(signal);
- writeSinglePage(signal, (ZNO_MBYTES_IN_FILE * ZPAGES_IN_MBYTE) - 1,
+ writeSinglePage(signal, (clogFileSize * ZPAGES_IN_MBYTE) - 1,
ZPAGE_SIZE - 1, __LINE__);
lfoPtr.p->lfoState = LogFileOperationRecord::INIT_WRITE_AT_END;
return;
@@ -13110,7 +13165,7 @@ void Dblqh::writeInitMbyteLab(Signal* signal)
{
releaseLfo(signal);
logFilePtr.p->currentMbyte = logFilePtr.p->currentMbyte + 1;
- if (logFilePtr.p->currentMbyte == ZNO_MBYTES_IN_FILE) {
+ if (logFilePtr.p->currentMbyte == clogFileSize) {
jam();
releaseLogpage(signal);
logFilePtr.p->logFileStatus = LogFileRecord::CLOSING_INIT;
@@ -13230,7 +13285,7 @@ void Dblqh::initLogfile(Signal* signal, Uint32 fileNo)
logFilePtr.p->lastPageWritten = 0;
logFilePtr.p->logPageZero = RNIL;
logFilePtr.p->currentMbyte = 0;
- for (tilIndex = 0; tilIndex <= 15; tilIndex++) {
+ for (tilIndex = 0; tilIndex < clogFileSize; tilIndex++) {
logFilePtr.p->logMaxGciCompleted[tilIndex] = (UintR)-1;
logFilePtr.p->logMaxGciStarted[tilIndex] = (UintR)-1;
logFilePtr.p->logLastPrepRef[tilIndex] = 0;
@@ -13281,8 +13336,14 @@ void Dblqh::openFileRw(Signal* signal, LogFileRecordPtr olfLogFilePtr)
signal->theData[3] = olfLogFilePtr.p->fileName[1];
signal->theData[4] = olfLogFilePtr.p->fileName[2];
signal->theData[5] = olfLogFilePtr.p->fileName[3];
- signal->theData[6] = ZOPEN_READ_WRITE | FsOpenReq::OM_AUTOSYNC;
+ signal->theData[6] = FsOpenReq::OM_READWRITE | FsOpenReq::OM_AUTOSYNC | FsOpenReq::OM_CHECK_SIZE;
+ if (c_o_direct)
+ signal->theData[6] |= FsOpenReq::OM_DIRECT;
req->auto_sync_size = MAX_REDO_PAGES_WITHOUT_SYNCH * sizeof(LogPageRecord);
+ Uint64 sz = clogFileSize;
+ sz *= 1024; sz *= 1024;
+ req->file_size_hi = sz >> 32;
+ req->file_size_lo = sz & 0xFFFFFFFF;
sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, FsOpenReq::SignalLength, JBA);
}//Dblqh::openFileRw()
@@ -13301,7 +13362,9 @@ void Dblqh::openLogfileInit(Signal* signal)
signal->theData[3] = logFilePtr.p->fileName[1];
signal->theData[4] = logFilePtr.p->fileName[2];
signal->theData[5] = logFilePtr.p->fileName[3];
- signal->theData[6] = 0x302 | FsOpenReq::OM_AUTOSYNC;
+ signal->theData[6] = FsOpenReq::OM_READWRITE | FsOpenReq::OM_TRUNCATE | FsOpenReq::OM_CREATE | FsOpenReq::OM_AUTOSYNC;
+ if (c_o_direct)
+ signal->theData[6] |= FsOpenReq::OM_DIRECT;
req->auto_sync_size = MAX_REDO_PAGES_WITHOUT_SYNCH * sizeof(LogPageRecord);
sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, FsOpenReq::SignalLength, JBA);
}//Dblqh::openLogfileInit()
@@ -13337,8 +13400,14 @@ void Dblqh::openNextLogfile(Signal* signal)
signal->theData[3] = onlLogFilePtr.p->fileName[1];
signal->theData[4] = onlLogFilePtr.p->fileName[2];
signal->theData[5] = onlLogFilePtr.p->fileName[3];
- signal->theData[6] = 2 | FsOpenReq::OM_AUTOSYNC;
+ signal->theData[6] = FsOpenReq::OM_READWRITE | FsOpenReq::OM_AUTOSYNC | FsOpenReq::OM_CHECK_SIZE;
+ if (c_o_direct)
+ signal->theData[6] |= FsOpenReq::OM_DIRECT;
req->auto_sync_size = MAX_REDO_PAGES_WITHOUT_SYNCH * sizeof(LogPageRecord);
+ Uint64 sz = clogFileSize;
+ sz *= 1024; sz *= 1024;
+ req->file_size_hi = sz >> 32;
+ req->file_size_lo = sz & 0xFFFFFFFF;
sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, FsOpenReq::SignalLength, JBA);
}//if
}//Dblqh::openNextLogfile()
@@ -13469,7 +13538,7 @@ void Dblqh::writeFileDescriptor(Signal* signal)
/* -------------------------------------------------- */
/* START BY WRITING TO LOG FILE RECORD */
/* -------------------------------------------------- */
- arrGuard(logFilePtr.p->currentMbyte, 16);
+ arrGuard(logFilePtr.p->currentMbyte, clogFileSize);
logFilePtr.p->logMaxGciCompleted[logFilePtr.p->currentMbyte] =
logPartPtr.p->logPartNewestCompletedGCI;
logFilePtr.p->logMaxGciStarted[logFilePtr.p->currentMbyte] = cnewestGci;
@@ -13495,10 +13564,7 @@ void Dblqh::writeFileDescriptor(Signal* signal)
/* ------------------------------------------------------------------------- */
void Dblqh::writeFileHeaderOpen(Signal* signal, Uint32 wmoType)
{
- LogFileRecordPtr wmoLogFilePtr;
UintR twmoNoLogDescriptors;
- UintR twmoLoop;
- UintR twmoIndex;
/* -------------------------------------------------- */
/* WRITE HEADER INFORMATION IN THE NEW FILE. */
@@ -13506,52 +13572,44 @@ void Dblqh::writeFileHeaderOpen(Signal* signal, Uint32 wmoType)
logPagePtr.p->logPageWord[ZPAGE_HEADER_SIZE + ZPOS_LOG_TYPE] = ZFD_TYPE;
logPagePtr.p->logPageWord[ZPAGE_HEADER_SIZE + ZPOS_FILE_NO] =
logFilePtr.p->fileNo;
- if (logPartPtr.p->noLogFiles > ZMAX_LOG_FILES_IN_PAGE_ZERO) {
+ if (logPartPtr.p->noLogFiles > cmaxLogFilesInPageZero) {
jam();
- twmoNoLogDescriptors = ZMAX_LOG_FILES_IN_PAGE_ZERO;
+ twmoNoLogDescriptors = cmaxLogFilesInPageZero;
} else {
jam();
twmoNoLogDescriptors = logPartPtr.p->noLogFiles;
}//if
logPagePtr.p->logPageWord[ZPAGE_HEADER_SIZE + ZPOS_NO_FD] =
twmoNoLogDescriptors;
- wmoLogFilePtr.i = logFilePtr.i;
- twmoLoop = 0;
-WMO_LOOP:
- jam();
- if (twmoLoop < twmoNoLogDescriptors) {
- jam();
- ptrCheckGuard(wmoLogFilePtr, clogFileFileSize, logFileRecord);
- for (twmoIndex = 0; twmoIndex <= ZNO_MBYTES_IN_FILE - 1; twmoIndex++) {
- jam();
- arrGuard(((ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) +
- (twmoLoop * ZFD_PART_SIZE)) + twmoIndex, ZPAGE_SIZE);
- logPagePtr.p->logPageWord[((ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) +
- (twmoLoop * ZFD_PART_SIZE)) + twmoIndex] =
- wmoLogFilePtr.p->logMaxGciCompleted[twmoIndex];
- arrGuard((((ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) +
- (twmoLoop * ZFD_PART_SIZE)) + ZNO_MBYTES_IN_FILE) +
- twmoIndex, ZPAGE_SIZE);
- logPagePtr.p->logPageWord[(((ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) +
- (twmoLoop * ZFD_PART_SIZE)) + ZNO_MBYTES_IN_FILE) + twmoIndex] =
- wmoLogFilePtr.p->logMaxGciStarted[twmoIndex];
- arrGuard((((ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) +
- (twmoLoop * ZFD_PART_SIZE)) + (2 * ZNO_MBYTES_IN_FILE)) +
- twmoIndex, ZPAGE_SIZE);
- logPagePtr.p->logPageWord[(((ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) +
- (twmoLoop * ZFD_PART_SIZE)) + (2 * ZNO_MBYTES_IN_FILE)) + twmoIndex] =
- wmoLogFilePtr.p->logLastPrepRef[twmoIndex];
- }//for
- wmoLogFilePtr.i = wmoLogFilePtr.p->prevLogFile;
- twmoLoop = twmoLoop + 1;
- goto WMO_LOOP;
- }//if
- logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] =
- (ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) +
- (ZFD_PART_SIZE * twmoNoLogDescriptors);
- arrGuard(logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX], ZPAGE_SIZE);
- logPagePtr.p->logPageWord[logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX]] =
- ZNEXT_LOG_RECORD_TYPE;
+
+ {
+ Uint32 pos = ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE;
+ LogFileRecordPtr filePtr = logFilePtr;
+ for (Uint32 fd = 0; fd < twmoNoLogDescriptors; fd++)
+ {
+ jam();
+ ptrCheckGuard(filePtr, clogFileFileSize, logFileRecord);
+ for (Uint32 mb = 0; mb < clogFileSize; mb ++)
+ {
+ jam();
+ Uint32 pos0 = pos + fd * (ZFD_MBYTE_SIZE * clogFileSize) + mb;
+ Uint32 pos1 = pos0 + clogFileSize;
+ Uint32 pos2 = pos1 + clogFileSize;
+ arrGuard(pos0, ZPAGE_SIZE);
+ arrGuard(pos1, ZPAGE_SIZE);
+ arrGuard(pos2, ZPAGE_SIZE);
+ logPagePtr.p->logPageWord[pos0] = filePtr.p->logMaxGciCompleted[mb];
+ logPagePtr.p->logPageWord[pos1] = filePtr.p->logMaxGciStarted[mb];
+ logPagePtr.p->logPageWord[pos2] = filePtr.p->logLastPrepRef[mb];
+ }
+ filePtr.i = filePtr.p->prevLogFile;
+ }
+ pos += (twmoNoLogDescriptors * ZFD_MBYTE_SIZE * clogFileSize);
+ arrGuard(pos, ZPAGE_SIZE);
+ logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = pos;
+ logPagePtr.p->logPageWord[pos] = ZNEXT_LOG_RECORD_TYPE;
+ }
+
/* ------------------------------------------------------- */
/* THIS IS A SPECIAL WRITE OF THE FIRST PAGE IN THE */
/* LOG FILE. THIS HAS SPECIAL SIGNIFANCE TO FIND */
@@ -13696,9 +13754,9 @@ void Dblqh::openSrLastFileLab(Signal* signal)
void Dblqh::readSrLastFileLab(Signal* signal)
{
logPartPtr.p->logLap = logPagePtr.p->logPageWord[ZPOS_LOG_LAP];
- if (logPartPtr.p->noLogFiles > ZMAX_LOG_FILES_IN_PAGE_ZERO) {
+ if (logPartPtr.p->noLogFiles > cmaxLogFilesInPageZero) {
jam();
- initGciInLogFileRec(signal, ZMAX_LOG_FILES_IN_PAGE_ZERO);
+ initGciInLogFileRec(signal, cmaxLogFilesInPageZero);
} else {
jam();
initGciInLogFileRec(signal, logPartPtr.p->noLogFiles);
@@ -13723,7 +13781,7 @@ void Dblqh::readSrLastMbyteLab(Signal* signal)
logPartPtr.p->lastMbyte = logFilePtr.p->currentMbyte - 1;
}//if
}//if
- arrGuard(logFilePtr.p->currentMbyte, 16);
+ arrGuard(logFilePtr.p->currentMbyte, clogFileSize);
logFilePtr.p->logMaxGciCompleted[logFilePtr.p->currentMbyte] =
logPagePtr.p->logPageWord[ZPOS_MAX_GCI_COMPLETED];
logFilePtr.p->logMaxGciStarted[logFilePtr.p->currentMbyte] =
@@ -13731,7 +13789,7 @@ void Dblqh::readSrLastMbyteLab(Signal* signal)
logFilePtr.p->logLastPrepRef[logFilePtr.p->currentMbyte] =
logPagePtr.p->logPageWord[ZLAST_LOG_PREP_REF];
releaseLogpage(signal);
- if (logFilePtr.p->currentMbyte < (ZNO_MBYTES_IN_FILE - 1)) {
+ if (logFilePtr.p->currentMbyte < (clogFileSize - 1)) {
jam();
logFilePtr.p->currentMbyte++;
readSinglePage(signal, ZPAGES_IN_MBYTE * logFilePtr.p->currentMbyte);
@@ -13745,21 +13803,21 @@ void Dblqh::readSrLastMbyteLab(Signal* signal)
* ---------------------------------------------------------------------- */
if (logPartPtr.p->lastMbyte == ZNIL) {
jam();
- logPartPtr.p->lastMbyte = ZNO_MBYTES_IN_FILE - 1;
+ logPartPtr.p->lastMbyte = clogFileSize - 1;
}//if
}//if
logFilePtr.p->logFileStatus = LogFileRecord::CLOSING_SR;
closeFile(signal, logFilePtr, __LINE__);
- if (logPartPtr.p->noLogFiles > ZMAX_LOG_FILES_IN_PAGE_ZERO) {
+ if (logPartPtr.p->noLogFiles > cmaxLogFilesInPageZero) {
Uint32 fileNo;
- if (logFilePtr.p->fileNo >= ZMAX_LOG_FILES_IN_PAGE_ZERO) {
+ if (logFilePtr.p->fileNo >= cmaxLogFilesInPageZero) {
jam();
- fileNo = logFilePtr.p->fileNo - ZMAX_LOG_FILES_IN_PAGE_ZERO;
+ fileNo = logFilePtr.p->fileNo - cmaxLogFilesInPageZero;
} else {
jam();
fileNo =
(logPartPtr.p->noLogFiles + logFilePtr.p->fileNo) -
- ZMAX_LOG_FILES_IN_PAGE_ZERO;
+ cmaxLogFilesInPageZero;
}//if
if (fileNo == 0) {
jam();
@@ -13769,11 +13827,11 @@ void Dblqh::readSrLastMbyteLab(Signal* signal)
* -------------------------------------------------------------------- */
fileNo = 1;
logPartPtr.p->srRemainingFiles =
- logPartPtr.p->noLogFiles - (ZMAX_LOG_FILES_IN_PAGE_ZERO - 1);
+ logPartPtr.p->noLogFiles - (cmaxLogFilesInPageZero - 1);
} else {
jam();
logPartPtr.p->srRemainingFiles =
- logPartPtr.p->noLogFiles - ZMAX_LOG_FILES_IN_PAGE_ZERO;
+ logPartPtr.p->noLogFiles - cmaxLogFilesInPageZero;
}//if
LogFileRecordPtr locLogFilePtr;
findLogfile(signal, fileNo, logPartPtr, &locLogFilePtr);
@@ -13798,9 +13856,9 @@ void Dblqh::openSrNextFileLab(Signal* signal)
void Dblqh::readSrNextFileLab(Signal* signal)
{
- if (logPartPtr.p->srRemainingFiles > ZMAX_LOG_FILES_IN_PAGE_ZERO) {
+ if (logPartPtr.p->srRemainingFiles > cmaxLogFilesInPageZero) {
jam();
- initGciInLogFileRec(signal, ZMAX_LOG_FILES_IN_PAGE_ZERO);
+ initGciInLogFileRec(signal, cmaxLogFilesInPageZero);
} else {
jam();
initGciInLogFileRec(signal, logPartPtr.p->srRemainingFiles);
@@ -13808,16 +13866,16 @@ void Dblqh::readSrNextFileLab(Signal* signal)
releaseLogpage(signal);
logFilePtr.p->logFileStatus = LogFileRecord::CLOSING_SR;
closeFile(signal, logFilePtr, __LINE__);
- if (logPartPtr.p->srRemainingFiles > ZMAX_LOG_FILES_IN_PAGE_ZERO) {
+ if (logPartPtr.p->srRemainingFiles > cmaxLogFilesInPageZero) {
Uint32 fileNo;
- if (logFilePtr.p->fileNo >= ZMAX_LOG_FILES_IN_PAGE_ZERO) {
+ if (logFilePtr.p->fileNo >= cmaxLogFilesInPageZero) {
jam();
- fileNo = logFilePtr.p->fileNo - ZMAX_LOG_FILES_IN_PAGE_ZERO;
+ fileNo = logFilePtr.p->fileNo - cmaxLogFilesInPageZero;
} else {
jam();
fileNo =
(logPartPtr.p->noLogFiles + logFilePtr.p->fileNo) -
- ZMAX_LOG_FILES_IN_PAGE_ZERO;
+ cmaxLogFilesInPageZero;
}//if
if (fileNo == 0) {
jam();
@@ -13826,11 +13884,11 @@ void Dblqh::readSrNextFileLab(Signal* signal)
* -------------------------------------------------------------------- */
fileNo = 1;
logPartPtr.p->srRemainingFiles =
- logPartPtr.p->srRemainingFiles - (ZMAX_LOG_FILES_IN_PAGE_ZERO - 1);
+ logPartPtr.p->srRemainingFiles - (cmaxLogFilesInPageZero - 1);
} else {
jam();
logPartPtr.p->srRemainingFiles =
- logPartPtr.p->srRemainingFiles - ZMAX_LOG_FILES_IN_PAGE_ZERO;
+ logPartPtr.p->srRemainingFiles - cmaxLogFilesInPageZero;
}//if
LogFileRecordPtr locLogFilePtr;
findLogfile(signal, fileNo, logPartPtr, &locLogFilePtr);
@@ -14701,7 +14759,7 @@ void Dblqh::srLogLimits(Signal* signal)
* EXECUTED.
* ----------------------------------------------------------------------- */
while(true) {
- ndbrequire(tmbyte < 16);
+ ndbrequire(tmbyte < clogFileSize);
if (logPartPtr.p->logExecState == LogPartRecord::LES_SEARCH_STOP) {
if (logFilePtr.p->logMaxGciCompleted[tmbyte] < logPartPtr.p->logLastGci) {
jam();
@@ -14742,7 +14800,7 @@ void Dblqh::srLogLimits(Signal* signal)
if (logPartPtr.p->logExecState != LogPartRecord::LES_EXEC_LOG) {
if (tmbyte == 0) {
jam();
- tmbyte = ZNO_MBYTES_IN_FILE - 1;
+ tmbyte = clogFileSize - 1;
logFilePtr.i = logFilePtr.p->prevLogFile;
ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
} else {
@@ -15136,7 +15194,7 @@ void Dblqh::execSr(Signal* signal)
logPagePtr.p->logPageWord[ZPAGE_HEADER_SIZE + ZPOS_NO_FD];
logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] =
(ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) +
- (noFdDescriptors * ZFD_PART_SIZE);
+ (noFdDescriptors * ZFD_MBYTE_SIZE * clogFileSize);
}
break;
/* ========================================================================= */
@@ -15176,11 +15234,11 @@ void Dblqh::execSr(Signal* signal)
/*---------------------------------------------------------------------------*/
/* START EXECUTION OF A NEW MBYTE IN THE LOG. */
/*---------------------------------------------------------------------------*/
- if (logFilePtr.p->currentMbyte < (ZNO_MBYTES_IN_FILE - 1)) {
+ if (logFilePtr.p->currentMbyte < (clogFileSize - 1)) {
jam();
logPartPtr.p->logExecState = LogPartRecord::LES_EXEC_LOG_NEW_MBYTE;
} else {
- ndbrequire(logFilePtr.p->currentMbyte == (ZNO_MBYTES_IN_FILE - 1));
+ ndbrequire(logFilePtr.p->currentMbyte == (clogFileSize - 1));
jam();
/*---------------------------------------------------------------------------*/
/* WE HAVE TO CHANGE FILE. CLOSE THIS ONE AND THEN OPEN THE NEXT. */
@@ -15375,7 +15433,7 @@ void Dblqh::invalidateLogAfterLastGCI(Signal* signal) {
jam();
releaseLfo(signal);
releaseLogpage(signal);
- if (logPartPtr.p->invalidatePageNo < (ZNO_MBYTES_IN_FILE * ZPAGES_IN_MBYTE - 1)) {
+ if (logPartPtr.p->invalidatePageNo < (clogFileSize * ZPAGES_IN_MBYTE - 1)) {
// We continue in this file.
logPartPtr.p->invalidatePageNo++;
} else {
@@ -16716,6 +16774,22 @@ void Dblqh::initialiseLogFile(Signal* signal)
ptrAss(logFilePtr, logFileRecord);
logFilePtr.p->nextLogFile = logFilePtr.i + 1;
logFilePtr.p->logFileStatus = LogFileRecord::LFS_IDLE;
+
+ logFilePtr.p->logLastPrepRef = new Uint32[clogFileSize];
+ logFilePtr.p->logMaxGciCompleted = new Uint32[clogFileSize];
+ logFilePtr.p->logMaxGciStarted = new Uint32[clogFileSize];
+
+ if (logFilePtr.p->logLastPrepRef == 0 ||
+ logFilePtr.p->logMaxGciCompleted == 0 ||
+ logFilePtr.p->logMaxGciStarted == 0)
+ {
+ char buf[256];
+ BaseString::snprintf(buf, sizeof(buf),
+ "Failed to alloc mbyte(%u) arrays for logfile %u",
+ clogFileSize, logFilePtr.i);
+ progError(__LINE__, NDBD_EXIT_MEMALLOC, buf);
+ }
+
}//for
logFilePtr.i = clogFileFileSize - 1;
ptrAss(logFilePtr, logFileRecord);
@@ -17044,41 +17118,31 @@ void Dblqh::initFragrec(Signal* signal,
* ========================================================================= */
void Dblqh::initGciInLogFileRec(Signal* signal, Uint32 noFdDescriptors)
{
- LogFileRecordPtr iglLogFilePtr;
- UintR tiglLoop;
- UintR tiglIndex;
-
- tiglLoop = 0;
- iglLogFilePtr.i = logFilePtr.i;
- iglLogFilePtr.p = logFilePtr.p;
-IGL_LOOP:
- for (tiglIndex = 0; tiglIndex <= ZNO_MBYTES_IN_FILE - 1; tiglIndex++) {
- arrGuard(((ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) +
- (tiglLoop * ZFD_PART_SIZE)) + tiglIndex, ZPAGE_SIZE);
- iglLogFilePtr.p->logMaxGciCompleted[tiglIndex] =
- logPagePtr.p->logPageWord[((ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) +
- (tiglLoop * ZFD_PART_SIZE)) + tiglIndex];
- arrGuard((((ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) + ZNO_MBYTES_IN_FILE) +
- (tiglLoop * ZFD_PART_SIZE)) + tiglIndex, ZPAGE_SIZE);
- iglLogFilePtr.p->logMaxGciStarted[tiglIndex] =
- logPagePtr.p->logPageWord[(((ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) +
- ZNO_MBYTES_IN_FILE) +
- (tiglLoop * ZFD_PART_SIZE)) + tiglIndex];
- arrGuard((((ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) +
- (2 * ZNO_MBYTES_IN_FILE)) + (tiglLoop * ZFD_PART_SIZE)) +
- tiglIndex, ZPAGE_SIZE);
- iglLogFilePtr.p->logLastPrepRef[tiglIndex] =
- logPagePtr.p->logPageWord[(((ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) +
- (2 * ZNO_MBYTES_IN_FILE)) +
- (tiglLoop * ZFD_PART_SIZE)) + tiglIndex];
- }//for
- tiglLoop = tiglLoop + 1;
- if (tiglLoop < noFdDescriptors) {
+ LogFileRecordPtr filePtr = logFilePtr;
+ Uint32 pos = ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE;
+ for (Uint32 fd = 0; fd < noFdDescriptors; fd++)
+ {
jam();
- iglLogFilePtr.i = iglLogFilePtr.p->prevLogFile;
- ptrCheckGuard(iglLogFilePtr, clogFileFileSize, logFileRecord);
- goto IGL_LOOP;
- }//if
+ for (Uint32 mb = 0; mb < clogFileSize; mb++)
+ {
+ jam();
+ Uint32 pos0 = pos + fd * (ZFD_MBYTE_SIZE * clogFileSize) + mb;
+ Uint32 pos1 = pos0 + clogFileSize;
+ Uint32 pos2 = pos1 + clogFileSize;
+ arrGuard(pos0, ZPAGE_SIZE);
+ arrGuard(pos1, ZPAGE_SIZE);
+ arrGuard(pos2, ZPAGE_SIZE);
+ filePtr.p->logMaxGciCompleted[mb] = logPagePtr.p->logPageWord[pos0];
+ filePtr.p->logMaxGciStarted[mb] = logPagePtr.p->logPageWord[pos1];
+ filePtr.p->logLastPrepRef[mb] = logPagePtr.p->logPageWord[pos2];
+ }
+ if (fd + 1 < noFdDescriptors)
+ {
+ jam();
+ filePtr.i = filePtr.p->prevLogFile;
+ ptrCheckGuard(filePtr, clogFileFileSize, logFileRecord);
+ }
+ }
}//Dblqh::initGciInLogFileRec()
/* ==========================================================================
@@ -18331,7 +18395,7 @@ void Dblqh::writeNextLog(Signal* signal)
ndbrequire(logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] < ZPAGE_SIZE);
logPagePtr.p->logPageWord[logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX]] =
ZNEXT_MBYTE_TYPE;
- if (logFilePtr.p->currentMbyte == (ZNO_MBYTES_IN_FILE - 1)) {
+ if (logFilePtr.p->currentMbyte == (clogFileSize - 1)) {
jam();
/* -------------------------------------------------- */
/* CALCULATE THE NEW REMAINING WORDS WHEN */
@@ -18420,7 +18484,7 @@ void Dblqh::writeNextLog(Signal* signal)
systemError(signal, __LINE__);
}//if
}//if
- if (logFilePtr.p->currentMbyte == (ZNO_MBYTES_IN_FILE - 1)) {
+ if (logFilePtr.p->currentMbyte == (clogFileSize - 1)) {
jam();
twnlNextMbyte = 0;
if (logFilePtr.p->fileChangeState != LogFileRecord::NOT_ONGOING) {
diff --git a/storage/ndb/src/kernel/blocks/dblqh/Makefile.am b/storage/ndb/src/kernel/blocks/dblqh/Makefile.am
index c7c477a512c..b545096dc83 100644
--- a/storage/ndb/src/kernel/blocks/dblqh/Makefile.am
+++ b/storage/ndb/src/kernel/blocks/dblqh/Makefile.am
@@ -16,7 +16,7 @@
EXTRA_PROGRAMS = ndbd_redo_log_reader
ndbd_redo_log_reader_SOURCES = redoLogReader/records.cpp \
- redoLogReader/redoLogFileReader.cpp
+ redoLogReader/reader.cpp
include $(top_srcdir)/storage/ndb/config/common.mk.am
include $(top_srcdir)/storage/ndb/config/type_kernel.mk.am
diff --git a/storage/ndb/src/kernel/blocks/dblqh/redoLogReader/redoLogFileReader.cpp b/storage/ndb/src/kernel/blocks/dblqh/redoLogReader/reader.cpp
index e5df14aea9a..e5df14aea9a 100644
--- a/storage/ndb/src/kernel/blocks/dblqh/redoLogReader/redoLogFileReader.cpp
+++ b/storage/ndb/src/kernel/blocks/dblqh/redoLogReader/reader.cpp
diff --git a/storage/ndb/src/kernel/blocks/dbtup/Dbtup.hpp b/storage/ndb/src/kernel/blocks/dbtup/Dbtup.hpp
index 5c0972148dd..f28687dca0d 100644
--- a/storage/ndb/src/kernel/blocks/dbtup/Dbtup.hpp
+++ b/storage/ndb/src/kernel/blocks/dbtup/Dbtup.hpp
@@ -2700,6 +2700,10 @@ private:
ArrayPool<Page> c_page_pool;
Uint32 cnoOfAllocatedPages;
+ Uint32 m_max_allocate_pages;
+
+ /* read ahead in pages during disk order scan */
+ Uint32 m_max_page_read_ahead;
Tablerec *tablerec;
Uint32 cnoOfTablerec;
diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupDebug.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupDebug.cpp
index 1182ac4ee7d..8e532ae97b5 100644
--- a/storage/ndb/src/kernel/blocks/dbtup/DbtupDebug.cpp
+++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupDebug.cpp
@@ -74,6 +74,10 @@ Dbtup::reportMemoryUsage(Signal* signal, int incDec){
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 6, JBB);
}
+#ifdef VM_TRACE
+extern Uint32 fc_left, fc_right, fc_remove;
+#endif
+
void
Dbtup::execDUMP_STATE_ORD(Signal* signal)
{
@@ -155,12 +159,20 @@ Dbtup::execDUMP_STATE_ORD(Signal* signal)
return;
}//if
#endif
-#if defined VM_TRACE && 0
- if (type == 1211){
- ndbout_c("Startar modul test av Page Manager");
+#if defined VM_TRACE
+ if (type == 1211 || type == 1212 || type == 1213){
+ Uint32 seed = time(0);
+ if (signal->getLength() > 1)
+ seed = signal->theData[1];
+ ndbout_c("Startar modul test av Page Manager (seed: 0x%x)", seed);
+ srand(seed);
Vector<Chunk> chunks;
const Uint32 LOOPS = 1000;
+ Uint32 sum_req = 0;
+ Uint32 sum_conf = 0;
+ Uint32 sum_loop = 0;
+ Uint32 max_loop = 0;
for(Uint32 i = 0; i<LOOPS; i++){
// Case
@@ -177,8 +189,15 @@ Dbtup::execDUMP_STATE_ORD(Signal* signal)
if(chunks.size() == 0 && c == 0){
c = 1 + rand() % 2;
}
+
+ if (type == 1211)
+ ndbout_c("loop=%d case=%d free=%d alloc=%d", i, c, free, alloc);
- ndbout_c("loop=%d case=%d free=%d alloc=%d", i, c, free, alloc);
+ if (type == 1213)
+ {
+ c = 1;
+ alloc = 2 + (sum_conf >> 3) + (sum_conf >> 4);
+ }
switch(c){
case 0:{ // Release
const int ch = rand() % chunks.size();
@@ -190,23 +209,33 @@ Dbtup::execDUMP_STATE_ORD(Signal* signal)
case 2: { // Seize(n) - fail
alloc += free;
// Fall through
+ sum_req += free;
+ goto doalloc;
}
case 1: { // Seize(n) (success)
-
+ sum_req += alloc;
+ doalloc:
Chunk chunk;
allocConsPages(alloc, chunk.pageCount, chunk.pageId);
ndbrequire(chunk.pageCount <= alloc);
if(chunk.pageCount != 0){
chunks.push_back(chunk);
if(chunk.pageCount != alloc) {
- ndbout_c(" Tried to allocate %d - only allocated %d - free: %d",
- alloc, chunk.pageCount, free);
+ if (type == 1211)
+ ndbout_c(" Tried to allocate %d - only allocated %d - free: %d",
+ alloc, chunk.pageCount, free);
}
} else {
ndbout_c(" Failed to alloc %d pages with %d pages free",
alloc, free);
}
+ sum_conf += chunk.pageCount;
+ Uint32 tot = fc_left + fc_right + fc_remove;
+ sum_loop += tot;
+ if (tot > max_loop)
+ max_loop = tot;
+
for(Uint32 i = 0; i<chunk.pageCount; i++){
PagePtr pagePtr;
pagePtr.i = chunk.pageId + i;
@@ -225,6 +254,10 @@ Dbtup::execDUMP_STATE_ORD(Signal* signal)
returnCommonArea(chunk.pageId, chunk.pageCount);
chunks.erase(chunks.size() - 1);
}
+
+ ndbout_c("Got %u%% of requested allocs, loops : %u 100*avg: %u max: %u",
+ (100 * sum_conf) / sum_req, sum_loop, 100*sum_loop / LOOPS,
+ max_loop);
}
#endif
}//Dbtup::execDUMP_STATE_ORD()
diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp
index a9f0083a2b6..74c7d38bd64 100644
--- a/storage/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp
+++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp
@@ -305,6 +305,12 @@ void Dbtup::execREAD_CONFIG_REQ(Signal* signal)
Uint32 noOfTriggers= 0;
Uint32 tmp= 0;
+
+ if (ndb_mgm_get_int_parameter(p, CFG_DB_MAX_ALLOCATE, &tmp))
+ tmp = 32 * 1024 * 1024;
+ m_max_allocate_pages = (tmp + GLOBAL_PAGE_SIZE - 1) / GLOBAL_PAGE_SIZE;
+
+ tmp = 0;
ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_TUP_PAGE_RANGE, &tmp));
initPageRangeSize(tmp);
ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_TUP_TABLE, &cnoOfTablerec));
@@ -338,6 +344,18 @@ void Dbtup::execREAD_CONFIG_REQ(Signal* signal)
ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_BATCH_SIZE, &nScanBatch));
c_scanLockPool.setSize(nScanOp * nScanBatch);
+
+ /* read ahead for disk scan can not be more that disk page buffer */
+ {
+ Uint64 tmp = 64*1024*1024;
+ ndb_mgm_get_int64_parameter(p, CFG_DB_DISK_PAGE_BUFFER_MEMORY, &tmp);
+ m_max_page_read_ahead = (tmp + GLOBAL_PAGE_SIZE - 1) / GLOBAL_PAGE_SIZE; // in pages
+ // never read ahead more than 32 pages
+ if (m_max_page_read_ahead > 32)
+ m_max_page_read_ahead = 32;
+ }
+
+
ScanOpPtr lcp;
ndbrequire(c_scanOpPool.seize(lcp));
new (lcp.p) ScanOp();
diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupPagMan.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupPagMan.cpp
index d10fabf42da..24806062fcf 100644
--- a/storage/ndb/src/kernel/blocks/dbtup/DbtupPagMan.cpp
+++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupPagMan.cpp
@@ -146,10 +146,17 @@ void Dbtup::initializePage()
cnoOfAllocatedPages = tmp; // Is updated by returnCommonArea
}//Dbtup::initializePage()
+#ifdef VM_TRACE
+Uint32 fc_left, fc_right, fc_remove;
+#endif
+
void Dbtup::allocConsPages(Uint32 noOfPagesToAllocate,
Uint32& noOfPagesAllocated,
Uint32& allocPageRef)
{
+#ifdef VM_TRACE
+ fc_left = fc_right = fc_remove = 0;
+#endif
if (noOfPagesToAllocate == 0){
jam();
noOfPagesAllocated = 0;
@@ -228,7 +235,10 @@ void Dbtup::findFreeLeftNeighbours(Uint32& allocPageRef,
{
PagePtr pageFirstPtr, pageLastPtr;
Uint32 remainAllocate = noOfPagesToAllocate - noPagesAllocated;
- while (allocPageRef > 0) {
+ Uint32 loop = 0;
+ while (allocPageRef > 0 &&
+ ++loop < 16)
+ {
jam();
pageLastPtr.i = allocPageRef - 1;
c_page_pool.getPtr(pageLastPtr);
@@ -256,6 +266,9 @@ void Dbtup::findFreeLeftNeighbours(Uint32& allocPageRef,
remainAllocate -= listSize;
}//if
}//if
+#ifdef VM_TRACE
+ fc_left++;
+#endif
}//while
}//Dbtup::findFreeLeftNeighbours()
@@ -269,7 +282,10 @@ void Dbtup::findFreeRightNeighbours(Uint32& allocPageRef,
jam();
return;
}//if
- while ((allocPageRef + noPagesAllocated) < c_page_pool.getSize()) {
+ Uint32 loop = 0;
+ while ((allocPageRef + noPagesAllocated) < c_page_pool.getSize() &&
+ ++loop < 16)
+ {
jam();
pageFirstPtr.i = allocPageRef + noPagesAllocated;
c_page_pool.getPtr(pageFirstPtr);
@@ -296,24 +312,37 @@ void Dbtup::findFreeRightNeighbours(Uint32& allocPageRef,
remainAllocate -= listSize;
}//if
}//if
+#ifdef VM_TRACE
+ fc_right++;
+#endif
}//while
}//Dbtup::findFreeRightNeighbours()
void Dbtup::insertCommonArea(Uint32 insPageRef, Uint32 insList)
{
cnoOfAllocatedPages -= (1 << insList);
- PagePtr pageLastPtr, pageInsPtr;
+ PagePtr pageLastPtr, pageInsPtr, pageHeadPtr;
+ pageHeadPtr.i = cfreepageList[insList];
c_page_pool.getPtr(pageInsPtr, insPageRef);
ndbrequire(insList < 16);
pageLastPtr.i = (pageInsPtr.i + (1 << insList)) - 1;
- pageInsPtr.p->next_cluster_page = cfreepageList[insList];
+ pageInsPtr.p->page_state = ZFREE_COMMON;
+ pageInsPtr.p->next_cluster_page = pageHeadPtr.i;
pageInsPtr.p->prev_cluster_page = RNIL;
pageInsPtr.p->last_cluster_page = pageLastPtr.i;
cfreepageList[insList] = pageInsPtr.i;
+ if (pageHeadPtr.i != RNIL)
+ {
+ jam();
+ c_page_pool.getPtr(pageHeadPtr);
+ pageHeadPtr.p->prev_cluster_page = pageInsPtr.i;
+ }
+
c_page_pool.getPtr(pageLastPtr);
+ pageLastPtr.p->page_state = ZFREE_COMMON;
pageLastPtr.p->first_cluster_page = pageInsPtr.i;
pageLastPtr.p->next_page = RNIL;
}//Dbtup::insertCommonArea()
@@ -321,12 +350,13 @@ void Dbtup::insertCommonArea(Uint32 insPageRef, Uint32 insList)
void Dbtup::removeCommonArea(Uint32 remPageRef, Uint32 list)
{
cnoOfAllocatedPages += (1 << list);
- PagePtr pagePrevPtr, pageNextPtr, pageLastPtr, pageSearchPtr, remPagePtr;
+ PagePtr pagePrevPtr, pageNextPtr, pageLastPtr, remPagePtr;
c_page_pool.getPtr(remPagePtr, remPageRef);
ndbrequire(list < 16);
if (cfreepageList[list] == remPagePtr.i) {
jam();
+ ndbassert(remPagePtr.p->prev_cluster_page == RNIL);
cfreepageList[list] = remPagePtr.p->next_cluster_page;
pageNextPtr.i = cfreepageList[list];
if (pageNextPtr.i != RNIL) {
@@ -335,30 +365,25 @@ void Dbtup::removeCommonArea(Uint32 remPageRef, Uint32 list)
pageNextPtr.p->prev_cluster_page = RNIL;
}//if
} else {
- pageSearchPtr.i = cfreepageList[list];
- while (true) {
- jam();
- c_page_pool.getPtr(pageSearchPtr);
- pagePrevPtr = pageSearchPtr;
- pageSearchPtr.i = pageSearchPtr.p->next_cluster_page;
- if (pageSearchPtr.i == remPagePtr.i) {
- jam();
- break;
- }//if
- }//while
+ pagePrevPtr.i = remPagePtr.p->prev_cluster_page;
pageNextPtr.i = remPagePtr.p->next_cluster_page;
+ c_page_pool.getPtr(pagePrevPtr);
pagePrevPtr.p->next_cluster_page = pageNextPtr.i;
- if (pageNextPtr.i != RNIL) {
+ if (pageNextPtr.i != RNIL)
+ {
jam();
c_page_pool.getPtr(pageNextPtr);
pageNextPtr.p->prev_cluster_page = pagePrevPtr.i;
- }//if
+ }
}//if
remPagePtr.p->next_cluster_page= RNIL;
remPagePtr.p->last_cluster_page= RNIL;
remPagePtr.p->prev_cluster_page= RNIL;
+ remPagePtr.p->page_state = ~ZFREE_COMMON;
pageLastPtr.i = (remPagePtr.i + (1 << list)) - 1;
c_page_pool.getPtr(pageLastPtr);
pageLastPtr.p->first_cluster_page= RNIL;
+ pageLastPtr.p->page_state = ~ZFREE_COMMON;
+
}//Dbtup::removeCommonArea()
diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupPageMap.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupPageMap.cpp
index 6ef8d3585e9..cde63091cfb 100644
--- a/storage/ndb/src/kernel/blocks/dbtup/DbtupPageMap.cpp
+++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupPageMap.cpp
@@ -432,6 +432,11 @@ void Dbtup::allocMoreFragPages(Fragrecord* const regFragPtr)
// We will grow by 18.75% plus two more additional pages to grow
// a little bit quicker in the beginning.
/* -----------------------------------------------------------------*/
+
+ if (noAllocPages > m_max_allocate_pages)
+ {
+ noAllocPages = m_max_allocate_pages;
+ }
Uint32 allocated = allocFragPages(regFragPtr, noAllocPages);
regFragPtr->noOfPagesToGrow += allocated;
}//Dbtup::allocMoreFragPages()
diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupScan.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupScan.cpp
index 948cd77b437..6e53531e118 100644
--- a/storage/ndb/src/kernel/blocks/dbtup/DbtupScan.cpp
+++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupScan.cpp
@@ -687,13 +687,74 @@ Dbtup::scanNext(Signal* signal, ScanOpPtr scanPtr)
// move to next extent
jam();
pos.m_extent_info_ptr_i = ext_ptr.i;
- Extent_info* ext = c_extent_pool.getPtr(pos.m_extent_info_ptr_i);
+ ext = c_extent_pool.getPtr(pos.m_extent_info_ptr_i);
key.m_file_no = ext->m_key.m_file_no;
key.m_page_no = ext->m_first_page_no;
}
}
key.m_page_idx = 0;
pos.m_get = ScanPos::Get_page_dd;
+ /*
+ read ahead for scan in disk order
+ do read ahead every 8:th page
+ */
+ if ((bits & ScanOp::SCAN_DD) &&
+ (((key.m_page_no - ext->m_first_page_no) & 7) == 0))
+ {
+ jam();
+ // initialize PGMAN request
+ Page_cache_client::Request preq;
+ preq.m_page = pos.m_key;
+ preq.m_callback = TheNULLCallback;
+
+ // set maximum read ahead
+ Uint32 read_ahead = m_max_page_read_ahead;
+
+ while (true)
+ {
+ // prepare page read ahead in current extent
+ Uint32 page_no = preq.m_page.m_page_no;
+ Uint32 page_no_limit = page_no + read_ahead;
+ Uint32 limit = ext->m_first_page_no + alloc.m_extent_size;
+ if (page_no_limit > limit)
+ {
+ jam();
+ // read ahead crosses extent, set limit for this extent
+ read_ahead = page_no_limit - limit;
+ page_no_limit = limit;
+ // and make sure we only read one extra extent next time around
+ if (read_ahead > alloc.m_extent_size)
+ read_ahead = alloc.m_extent_size;
+ }
+ else
+ {
+ jam();
+ read_ahead = 0; // no more to read ahead after this
+ }
+ // do read ahead pages for this extent
+ while (page_no < page_no_limit)
+ {
+ // page request to PGMAN
+ jam();
+ preq.m_page.m_page_no = page_no;
+ int flags = 0;
+ // ignore result
+ m_pgman.get_page(signal, preq, flags);
+ jamEntry();
+ page_no++;
+ }
+ if (!read_ahead || !list.next(ext_ptr))
+ {
+ // no more extents after this or read ahead done
+ jam();
+ break;
+ }
+ // move to next extent and initialize PGMAN request accordingly
+ Extent_info* ext = c_extent_pool.getPtr(ext_ptr.i);
+ preq.m_page.m_file_no = ext->m_key.m_file_no;
+ preq.m_page.m_page_no = ext->m_first_page_no;
+ }
+ } // if ScanOp::SCAN_DD read ahead
}
/*FALLTHRU*/
case ScanPos::Get_page_dd:
@@ -726,6 +787,7 @@ Dbtup::scanNext(Signal* signal, ScanOpPtr scanPtr)
safe_cast(&Dbtup::disk_page_tup_scan_callback);
int flags = 0;
int res = m_pgman.get_page(signal, preq, flags);
+ jamEntry();
if (res == 0) {
jam();
// request queued
diff --git a/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp b/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
index adc6d1e3ed4..56ecc8ddc39 100644
--- a/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
+++ b/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
@@ -277,6 +277,14 @@ void Ndbcntr::execSTTOR(Signal* signal)
break;
case ZSTART_PHASE_1:
jam();
+ {
+ Uint32 db_watchdog_interval = 0;
+ const ndb_mgm_configuration_iterator * p =
+ m_ctx.m_config.getOwnConfigIterator();
+ ndb_mgm_get_int_parameter(p, CFG_DB_WATCHDOG_INTERVAL, &db_watchdog_interval);
+ ndbrequire(db_watchdog_interval);
+ update_watch_dog_timer(db_watchdog_interval);
+ }
startPhase1Lab(signal);
break;
case ZSTART_PHASE_2:
@@ -1410,6 +1418,13 @@ void Ndbcntr::execNODE_FAILREP(Signal* signal)
{
jamEntry();
+ if (ERROR_INSERTED(1001))
+ {
+ sendSignalWithDelay(reference(), GSN_NODE_FAILREP, signal, 100,
+ signal->getLength());
+ return;
+ }
+
const NodeFailRep * nodeFail = (NodeFailRep *)&signal->theData[0];
NdbNodeBitmask allFailed;
allFailed.assign(NdbNodeBitmask::Size, nodeFail->theNodes);
@@ -2734,16 +2749,34 @@ void Ndbcntr::execSTART_ORD(Signal* signal){
c_missra.execSTART_ORD(signal);
}
+#define CLEAR_DX 13
+#define CLEAR_LCP 3
+
void
-Ndbcntr::clearFilesystem(Signal* signal){
+Ndbcntr::clearFilesystem(Signal* signal)
+{
+ const Uint32 lcp = c_fsRemoveCount >= CLEAR_DX;
+
FsRemoveReq * req = (FsRemoveReq *)signal->getDataPtrSend();
req->userReference = reference();
req->userPointer = 0;
req->directory = 1;
req->ownDirectory = 1;
- FsOpenReq::setVersion(req->fileNumber, 3);
- FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_CTL); // Can by any...
- FsOpenReq::v1_setDisk(req->fileNumber, c_fsRemoveCount);
+
+ if (lcp == 0)
+ {
+ FsOpenReq::setVersion(req->fileNumber, 3);
+ FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_CTL); // Can by any...
+ FsOpenReq::v1_setDisk(req->fileNumber, c_fsRemoveCount);
+ }
+ else
+ {
+ FsOpenReq::setVersion(req->fileNumber, 5);
+ FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_DATA);
+ FsOpenReq::v5_setLcpNo(req->fileNumber, c_fsRemoveCount - CLEAR_DX);
+ FsOpenReq::v5_setTableId(req->fileNumber, 0);
+ FsOpenReq::v5_setFragmentId(req->fileNumber, 0);
+ }
sendSignal(NDBFS_REF, GSN_FSREMOVEREQ, signal,
FsRemoveReq::SignalLength, JBA);
c_fsRemoveCount++;
@@ -2752,12 +2785,12 @@ Ndbcntr::clearFilesystem(Signal* signal){
void
Ndbcntr::execFSREMOVECONF(Signal* signal){
jamEntry();
- if(c_fsRemoveCount == 13){
+ if(c_fsRemoveCount == CLEAR_DX + CLEAR_LCP){
jam();
sendSttorry(signal);
} else {
jam();
- ndbrequire(c_fsRemoveCount < 13);
+ ndbrequire(c_fsRemoveCount < CLEAR_DX + CLEAR_LCP);
clearFilesystem(signal);
}//if
}
diff --git a/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp b/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp
index 5f93ee31bc7..cf18bf34040 100644
--- a/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp
+++ b/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp
@@ -163,7 +163,12 @@ AsyncFile::run()
theStartFlag = true;
// Create write buffer for bigger writes
theWriteBufferSize = WRITEBUFFERSIZE;
- theWriteBuffer = (char *) ndbd_malloc(theWriteBufferSize);
+ theWriteBufferUnaligned = (char *) ndbd_malloc(theWriteBufferSize +
+ NDB_O_DIRECT_WRITE_ALIGNMENT-1);
+ theWriteBuffer = (char *)
+ (((UintPtr)theWriteBufferUnaligned + NDB_O_DIRECT_WRITE_ALIGNMENT - 1) &
+ ~(UintPtr)(NDB_O_DIRECT_WRITE_ALIGNMENT - 1));
+
NdbMutex_Unlock(theStartMutexPtr);
NdbCondition_Signal(theStartConditionPtr);
@@ -247,6 +252,78 @@ AsyncFile::run()
static char g_odirect_readbuf[2*GLOBAL_PAGE_SIZE -1];
#endif
+int
+AsyncFile::check_odirect_write(Uint32 flags, int& new_flags, int mode)
+{
+ assert(new_flags & (O_CREAT | O_TRUNC));
+#ifdef O_DIRECT
+ int ret;
+ char * bufptr = (char*)((UintPtr(g_odirect_readbuf)+(GLOBAL_PAGE_SIZE - 1)) & ~(GLOBAL_PAGE_SIZE - 1));
+ while (((ret = ::write(theFd, bufptr, GLOBAL_PAGE_SIZE)) == -1) &&
+ (errno == EINTR));
+ if (ret == -1)
+ {
+ new_flags &= ~O_DIRECT;
+ ndbout_c("%s Failed to write using O_DIRECT, disabling",
+ theFileName.c_str());
+ }
+
+ close(theFd);
+ theFd = ::open(theFileName.c_str(), new_flags, mode);
+ if (theFd == -1)
+ return errno;
+#endif
+
+ return 0;
+}
+
+int
+AsyncFile::check_odirect_read(Uint32 flags, int &new_flags, int mode)
+{
+#ifdef O_DIRECT
+ int ret;
+ char * bufptr = (char*)((UintPtr(g_odirect_readbuf)+(GLOBAL_PAGE_SIZE - 1)) & ~(GLOBAL_PAGE_SIZE - 1));
+ while (((ret = ::read(theFd, bufptr, GLOBAL_PAGE_SIZE)) == -1) &&
+ (errno == EINTR));
+ if (ret == -1)
+ {
+ ndbout_c("%s Failed to read using O_DIRECT, disabling",
+ theFileName.c_str());
+ goto reopen;
+ }
+
+ if(lseek(theFd, 0, SEEK_SET) != 0)
+ {
+ return errno;
+ }
+
+ if ((flags & FsOpenReq::OM_CHECK_SIZE) == 0)
+ {
+ struct stat buf;
+ if ((fstat(theFd, &buf) == -1))
+ {
+ return errno;
+ }
+ else if ((buf.st_size % GLOBAL_PAGE_SIZE) != 0)
+ {
+ ndbout_c("%s filesize not a multiple of %d, disabling O_DIRECT",
+ theFileName.c_str(), GLOBAL_PAGE_SIZE);
+ goto reopen;
+ }
+ }
+
+ return 0;
+
+reopen:
+ close(theFd);
+ new_flags &= ~O_DIRECT;
+ theFd = ::open(theFileName.c_str(), new_flags, mode);
+ if (theFd == -1)
+ return errno;
+#endif
+ return 0;
+}
+
void AsyncFile::openReq(Request* request)
{
m_auto_sync_freq = 0;
@@ -312,7 +389,7 @@ void AsyncFile::openReq(Request* request)
}
#else
Uint32 flags = request->par.open.flags;
- Uint32 new_flags = 0;
+ int new_flags = 0;
// Convert file open flags from Solaris to Liux
if (flags & FsOpenReq::OM_CREATE)
@@ -343,10 +420,6 @@ void AsyncFile::openReq(Request* request)
{
new_flags |= O_DIRECT;
}
-#elif defined O_SYNC
- {
- flags |= FsOpenReq::OM_SYNC;
- }
#endif
if ((flags & FsOpenReq::OM_SYNC) && ! (flags & FsOpenReq::OM_INIT))
@@ -355,15 +428,19 @@ void AsyncFile::openReq(Request* request)
new_flags |= O_SYNC;
#endif
}
-
+
+ const char * rw = "";
switch(flags & 0x3){
case FsOpenReq::OM_READONLY:
+ rw = "r";
new_flags |= O_RDONLY;
break;
case FsOpenReq::OM_WRITEONLY:
+ rw = "w";
new_flags |= O_WRONLY;
break;
case FsOpenReq::OM_READWRITE:
+ rw = "rw";
new_flags |= O_RDWR;
break;
default:
@@ -404,11 +481,6 @@ no_odirect:
if (new_flags & O_DIRECT)
{
new_flags &= ~O_DIRECT;
- flags |= FsOpenReq::OM_SYNC;
-#ifdef O_SYNC
- if (! (flags & FsOpenReq::OM_INIT))
- new_flags |= O_SYNC;
-#endif
goto no_odirect;
}
#endif
@@ -421,11 +493,6 @@ no_odirect:
else if (new_flags & O_DIRECT)
{
new_flags &= ~O_DIRECT;
- flags |= FsOpenReq::OM_SYNC;
-#ifdef O_SYNC
- if (! (flags & FsOpenReq::OM_INIT))
- new_flags |= O_SYNC;
-#endif
goto no_odirect;
}
#endif
@@ -512,7 +579,6 @@ no_odirect:
{
ndbout_c("error on first write(%d), disable O_DIRECT", err);
new_flags &= ~O_DIRECT;
- flags |= FsOpenReq::OM_SYNC;
close(theFd);
theFd = ::open(theFileName.c_str(), new_flags, mode);
if (theFd != -1)
@@ -532,26 +598,32 @@ no_odirect:
else if (flags & FsOpenReq::OM_DIRECT)
{
#ifdef O_DIRECT
- do {
- int ret;
- char * bufptr = (char*)((UintPtr(g_odirect_readbuf)+(GLOBAL_PAGE_SIZE - 1)) & ~(GLOBAL_PAGE_SIZE - 1));
- while (((ret = ::read(theFd, bufptr, GLOBAL_PAGE_SIZE)) == -1) && (errno == EINTR));
- if (ret == -1)
- {
- ndbout_c("%s Failed to read using O_DIRECT, disabling", theFileName.c_str());
- flags |= FsOpenReq::OM_SYNC;
- flags |= FsOpenReq::OM_INIT;
- break;
- }
- if(lseek(theFd, 0, SEEK_SET) != 0)
- {
- request->error = errno;
- return;
- }
- } while (0);
+ if (flags & (FsOpenReq::OM_TRUNCATE | FsOpenReq::OM_CREATE))
+ {
+ request->error = check_odirect_write(flags, new_flags, mode);
+ }
+ else
+ {
+ request->error = check_odirect_read(flags, new_flags, mode);
+ }
+
+ if (request->error)
+ return;
#endif
}
-
+#ifdef VM_TRACE
+ if (flags & FsOpenReq::OM_DIRECT)
+ {
+#ifdef O_DIRECT
+ ndbout_c("%s %s O_DIRECT: %d",
+ theFileName.c_str(), rw,
+ !!(new_flags & O_DIRECT));
+#else
+ ndbout_c("%s %s O_DIRECT: 0",
+ theFileName.c_str(), rw);
+#endif
+ }
+#endif
if ((flags & FsOpenReq::OM_SYNC) && (flags & FsOpenReq::OM_INIT))
{
#ifdef O_SYNC
@@ -562,6 +634,10 @@ no_odirect:
new_flags &= ~(O_CREAT | O_TRUNC);
new_flags |= O_SYNC;
theFd = ::open(theFileName.c_str(), new_flags, mode);
+ if (theFd == -1)
+ {
+ request->error = errno;
+ }
#endif
}
#endif
@@ -1079,7 +1155,8 @@ AsyncFile::rmrfReq(Request * request, char * path, bool removePath){
void AsyncFile::endReq()
{
// Thread is ended with return
- if (theWriteBuffer) ndbd_free(theWriteBuffer, theWriteBufferSize);
+ if (theWriteBufferUnaligned)
+ ndbd_free(theWriteBufferUnaligned, theWriteBufferSize);
}
diff --git a/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.hpp b/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.hpp
index cc667225ce2..d8d585c47f7 100644
--- a/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.hpp
+++ b/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.hpp
@@ -234,9 +234,13 @@ private:
bool theStartFlag;
int theWriteBufferSize;
char* theWriteBuffer;
+ void* theWriteBufferUnaligned;
size_t m_write_wo_sync; // Writes wo/ sync
size_t m_auto_sync_freq; // Auto sync freq in bytes
+
+ int check_odirect_read(Uint32 flags, int&new_flags, int mode);
+ int check_odirect_write(Uint32 flags, int&new_flags, int mode);
public:
SimulatedBlock& m_fs;
Ptr<GlobalPage> m_page_ptr;
diff --git a/storage/ndb/src/kernel/blocks/ndbfs/Ndbfs.cpp b/storage/ndb/src/kernel/blocks/ndbfs/Ndbfs.cpp
index 44f8a8ab05b..26bf8878852 100644
--- a/storage/ndb/src/kernel/blocks/ndbfs/Ndbfs.cpp
+++ b/storage/ndb/src/kernel/blocks/ndbfs/Ndbfs.cpp
@@ -652,7 +652,7 @@ AsyncFile*
Ndbfs::createAsyncFile(){
// Check limit of open files
- if (m_maxFiles !=0 && theFiles.size()+1 == m_maxFiles) {
+ if (m_maxFiles !=0 && theFiles.size() == m_maxFiles) {
// Print info about all open files
for (unsigned i = 0; i < theFiles.size(); i++){
AsyncFile* file = theFiles[i];
diff --git a/storage/ndb/src/kernel/blocks/pgman.cpp b/storage/ndb/src/kernel/blocks/pgman.cpp
index 57563d3c6d4..006673cf011 100644
--- a/storage/ndb/src/kernel/blocks/pgman.cpp
+++ b/storage/ndb/src/kernel/blocks/pgman.cpp
@@ -122,9 +122,9 @@ Pgman::execREAD_CONFIG_REQ(Signal* signal)
if (page_buffer > 0)
{
- page_buffer /= GLOBAL_PAGE_SIZE; // in pages
- m_page_entry_pool.setSize(100*page_buffer);
+ page_buffer = (page_buffer + GLOBAL_PAGE_SIZE - 1) / GLOBAL_PAGE_SIZE; // in pages
m_param.m_max_pages = page_buffer;
+ m_page_entry_pool.setSize(m_param.m_lirs_stack_mult * page_buffer);
m_param.m_max_hot_pages = (page_buffer * 9) / 10;
}
@@ -141,9 +141,10 @@ Pgman::execREAD_CONFIG_REQ(Signal* signal)
Pgman::Param::Param() :
m_max_pages(64), // smallish for testing
+ m_lirs_stack_mult(10),
m_max_hot_pages(56),
m_max_loop_count(256),
- m_max_io_waits(64),
+ m_max_io_waits(256),
m_stats_loop_delay(1000),
m_cleanup_loop_delay(200),
m_lcp_loop_delay(0)
@@ -301,6 +302,9 @@ Pgman::get_sublist_no(Page_state state)
{
return Page_entry::SL_LOCKED;
}
+ if (state == Page_entry::ONSTACK) {
+ return Page_entry::SL_IDLE;
+ }
return Page_entry::SL_OTHER;
}
@@ -415,15 +419,55 @@ Pgman::get_page_entry(Ptr<Page_entry>& ptr, Uint32 file_no, Uint32 page_no)
{
if (find_page_entry(ptr, file_no, page_no))
{
+ jam();
ndbrequire(ptr.p->m_state != 0);
m_stats.m_page_hits++;
+
+#ifdef VM_TRACE
+ debugOut << "PGMAN: get_page_entry: found" << endl;
+ debugOut << "PGMAN: " << ptr << endl;
+#endif
return true;
}
+ if (m_page_entry_pool.getNoOfFree() == 0)
+ {
+ jam();
+ Page_sublist& pl_idle = *m_page_sublist[Page_entry::SL_IDLE];
+ Ptr<Page_entry> idle_ptr;
+ if (pl_idle.first(idle_ptr))
+ {
+ jam();
+
+#ifdef VM_TRACE
+ debugOut << "PGMAN: get_page_entry: re-use idle entry" << endl;
+ debugOut << "PGMAN: " << idle_ptr << endl;
+#endif
+
+ Page_state state = idle_ptr.p->m_state;
+ ndbrequire(state == Page_entry::ONSTACK);
+
+ Page_stack& pl_stack = m_page_stack;
+ ndbrequire(pl_stack.hasPrev(idle_ptr));
+ pl_stack.remove(idle_ptr);
+ state &= ~ Page_entry::ONSTACK;
+ set_page_state(idle_ptr, state);
+ ndbrequire(idle_ptr.p->m_state == 0);
+
+ release_page_entry(idle_ptr);
+ }
+ }
+
if (seize_page_entry(ptr, file_no, page_no))
{
+ jam();
ndbrequire(ptr.p->m_state == 0);
m_stats.m_page_faults++;
+
+#ifdef VM_TRACE
+ debugOut << "PGMAN: get_page_entry: seize" << endl;
+ debugOut << "PGMAN: " << ptr << endl;
+#endif
return true;
}
@@ -624,6 +668,7 @@ Pgman::lirs_reference(Ptr<Page_entry> ptr)
jam();
move_cleanup_ptr(ptr);
pl_queue.remove(ptr);
+ state &= ~ Page_entry::ONQUEUE;
}
if (state & Page_entry::BOUND)
{
@@ -654,6 +699,12 @@ Pgman::lirs_reference(Ptr<Page_entry> ptr)
pl_stack.add(ptr);
state |= Page_entry::ONSTACK;
state |= Page_entry::HOT;
+ // it could be on queue already
+ if (state & Page_entry::ONQUEUE) {
+ jam();
+ pl_queue.remove(ptr);
+ state &= ~Page_entry::ONQUEUE;
+ }
}
set_page_state(ptr, state);
@@ -902,9 +953,11 @@ Pgman::process_map(Signal* signal)
#ifdef VM_TRACE
debugOut << "PGMAN: >process_map" << endl;
#endif
- int max_count = m_param.m_max_io_waits - m_stats.m_current_io_waits;
- if (max_count > 0)
+ int max_count = 0;
+ if (m_param.m_max_io_waits > m_stats.m_current_io_waits) {
+ max_count = m_param.m_max_io_waits - m_stats.m_current_io_waits;
max_count = max_count / 2 + 1;
+ }
Page_sublist& pl_map = *m_page_sublist[Page_entry::SL_MAP];
while (! pl_map.isEmpty() && --max_count >= 0)
@@ -1056,15 +1109,10 @@ Pgman::process_cleanup(Signal* signal)
}
int max_loop_count = m_param.m_max_loop_count;
- int max_count = m_param.m_max_io_waits - m_stats.m_current_io_waits;
-
- if (max_count > 0)
- {
+ int max_count = 0;
+ if (m_param.m_max_io_waits > m_stats.m_current_io_waits) {
+ max_count = m_param.m_max_io_waits - m_stats.m_current_io_waits;
max_count = max_count / 2 + 1;
- /*
- * Possibly add code here to avoid writing too rapidly. May be
- * unnecessary since only cold pages are cleaned.
- */
}
Ptr<Page_entry> ptr = m_cleanup_ptr;
@@ -1166,9 +1214,12 @@ bool
Pgman::process_lcp(Signal* signal)
{
Page_hashlist& pl_hash = m_page_hashlist;
- int max_count = m_param.m_max_io_waits - m_stats.m_current_io_waits;
- if (max_count > 0)
+
+ int max_count = 0;
+ if (m_param.m_max_io_waits > m_stats.m_current_io_waits) {
+ max_count = m_param.m_max_io_waits - m_stats.m_current_io_waits;
max_count = max_count / 2 + 1;
+ }
#ifdef VM_TRACE
debugOut
@@ -1927,6 +1978,8 @@ Pgman::verify_page_entry(Ptr<Page_entry> ptr)
break;
case Page_entry::SL_LOCKED:
break;
+ case Page_entry::SL_IDLE:
+ break;
case Page_entry::SL_OTHER:
break;
default:
@@ -1973,8 +2026,11 @@ Pgman::verify_page_lists()
ndbrequire(stack_count == pl_stack.count() || dump_page_lists());
ndbrequire(queue_count == pl_queue.count() || dump_page_lists());
+ Uint32 hot_count = 0;
Uint32 hot_bound_count = 0;
Uint32 cold_bound_count = 0;
+ Uint32 stack_request_count = 0;
+ Uint32 queue_request_count = 0;
Uint32 i1 = RNIL;
for (pl_stack.first(ptr); ptr.i != RNIL; pl_stack.next(ptr))
@@ -1985,9 +2041,13 @@ Pgman::verify_page_lists()
ndbrequire(state & Page_entry::ONSTACK || dump_page_lists());
if (! pl_stack.hasPrev(ptr))
ndbrequire(state & Page_entry::HOT || dump_page_lists());
- if (state & Page_entry::HOT &&
- state & Page_entry::BOUND)
- hot_bound_count++;
+ if (state & Page_entry::HOT) {
+ hot_count++;
+ if (state & Page_entry::BOUND)
+ hot_bound_count++;
+ }
+ if (state & Page_entry::REQUEST)
+ stack_request_count++;
}
Uint32 i2 = RNIL;
@@ -1999,6 +2059,8 @@ Pgman::verify_page_lists()
ndbrequire(state & Page_entry::ONQUEUE || dump_page_lists());
ndbrequire(state & Page_entry::BOUND || dump_page_lists());
cold_bound_count++;
+ if (state & Page_entry::REQUEST)
+ queue_request_count++;
}
Uint32 tot_bound_count =
@@ -2031,7 +2093,11 @@ Pgman::verify_page_lists()
<< " cache:" << m_stats.m_num_pages
<< "(" << locked_bound_count << "L)"
<< " stack:" << pl_stack.count()
+ << " hot:" << hot_count
+ << " hot_bound:" << hot_bound_count
+ << " stack_request:" << stack_request_count
<< " queue:" << pl_queue.count()
+ << " queue_request:" << queue_request_count
<< " queuewait:" << queuewait_count << endl;
debugOut << "PGMAN:";
@@ -2139,6 +2205,8 @@ Pgman::get_sublist_name(Uint32 list_no)
return "busy";
case Page_entry::SL_LOCKED:
return "locked";
+ case Page_entry::SL_IDLE:
+ return "idle";
case Page_entry::SL_OTHER:
return "other";
}
diff --git a/storage/ndb/src/kernel/blocks/pgman.hpp b/storage/ndb/src/kernel/blocks/pgman.hpp
index 07029d1c3e5..e3bf0fa5780 100644
--- a/storage/ndb/src/kernel/blocks/pgman.hpp
+++ b/storage/ndb/src/kernel/blocks/pgman.hpp
@@ -325,8 +325,9 @@ private:
,SL_CALLBACK_IO = 4
,SL_BUSY = 5
,SL_LOCKED = 6
- ,SL_OTHER = 7
- ,SUBLIST_COUNT = 8
+ ,SL_IDLE = 7
+ ,SL_OTHER = 8
+ ,SUBLIST_COUNT = 9
};
Uint16 m_file_no; // disk page address set at seize
@@ -401,6 +402,7 @@ private:
struct Param {
Param();
Uint32 m_max_pages; // max number of cache pages
+ Uint32 m_lirs_stack_mult; // in m_max_pages (around 3-10)
Uint32 m_max_hot_pages; // max hot cache pages (up to 99%)
Uint32 m_max_loop_count; // limit purely local loops
Uint32 m_max_io_waits;
diff --git a/storage/ndb/src/kernel/blocks/restore.cpp b/storage/ndb/src/kernel/blocks/restore.cpp
index d4a2414ef2f..2d40cd79daa 100644
--- a/storage/ndb/src/kernel/blocks/restore.cpp
+++ b/storage/ndb/src/kernel/blocks/restore.cpp
@@ -557,6 +557,9 @@ Restore::restore_next(Signal* signal, FilePtr file_ptr)
case BackupFormat::GCP_ENTRY:
parse_gcp_entry(signal, file_ptr, data, len);
break;
+ case BackupFormat::EMPTY_ENTRY:
+ // skip
+ break;
case 0x4e444242: // 'NDBB'
if (check_file_version(signal, ntohl(* (data+2))) == 0)
{
diff --git a/storage/ndb/src/kernel/vm/Configuration.cpp b/storage/ndb/src/kernel/vm/Configuration.cpp
index ebdd4c97aab..72770d35cde 100644
--- a/storage/ndb/src/kernel/vm/Configuration.cpp
+++ b/storage/ndb/src/kernel/vm/Configuration.cpp
@@ -443,6 +443,11 @@ Configuration::setupConfiguration(){
"TimeBetweenWatchDogCheck missing");
}
+ if(iter.get(CFG_DB_WATCHDOG_INTERVAL_INITIAL, &_timeBetweenWatchDogCheckInitial)){
+ ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration fetched",
+ "TimeBetweenWatchDogCheckInitial missing");
+ }
+
/**
* Get paths
*/
@@ -462,9 +467,12 @@ Configuration::setupConfiguration(){
* Create the watch dog thread
*/
{
- Uint32 t = _timeBetweenWatchDogCheck;
+ if (_timeBetweenWatchDogCheckInitial < _timeBetweenWatchDogCheck)
+ _timeBetweenWatchDogCheckInitial = _timeBetweenWatchDogCheck;
+
+ Uint32 t = _timeBetweenWatchDogCheckInitial;
t = globalEmulatorData.theWatchDog ->setCheckInterval(t);
- _timeBetweenWatchDogCheck = t;
+ _timeBetweenWatchDogCheckInitial = t;
}
ConfigValues* cf = ConfigValuesFactory::extractCurrentSection(iter.m_config);
diff --git a/storage/ndb/src/kernel/vm/Configuration.hpp b/storage/ndb/src/kernel/vm/Configuration.hpp
index 934261e40af..918a889a171 100644
--- a/storage/ndb/src/kernel/vm/Configuration.hpp
+++ b/storage/ndb/src/kernel/vm/Configuration.hpp
@@ -84,6 +84,7 @@ private:
Uint32 _maxErrorLogs;
Uint32 _lockPagesInMainMemory;
Uint32 _timeBetweenWatchDogCheck;
+ Uint32 _timeBetweenWatchDogCheckInitial;
ndb_mgm_configuration * m_ownConfig;
ndb_mgm_configuration * m_clusterConfig;
diff --git a/storage/ndb/src/kernel/vm/SimulatedBlock.cpp b/storage/ndb/src/kernel/vm/SimulatedBlock.cpp
index 3125fc33258..bc16b9f364e 100644
--- a/storage/ndb/src/kernel/vm/SimulatedBlock.cpp
+++ b/storage/ndb/src/kernel/vm/SimulatedBlock.cpp
@@ -19,6 +19,7 @@
#include <NdbOut.hpp>
#include <GlobalData.hpp>
#include <Emulator.hpp>
+#include <WatchDog.hpp>
#include <ErrorHandlingMacros.hpp>
#include <TimeQueue.hpp>
#include <TransporterRegistry.hpp>
@@ -38,6 +39,9 @@
#include <AttributeDescriptor.hpp>
#include <NdbSqlUtil.hpp>
+#include <EventLogger.hpp>
+extern EventLogger g_eventLogger;
+
#define ljamEntry() jamEntryLine(30000 + __LINE__)
#define ljam() jamLine(30000 + __LINE__)
@@ -655,14 +659,20 @@ SimulatedBlock::getBatSize(Uint16 blockNo){
return sb->theBATSize;
}
+void* SimulatedBlock::allocRecord(const char * type, size_t s, size_t n, bool clear, Uint32 paramId)
+{
+ return allocRecordAligned(type, s, n, 0, 0, clear, paramId);
+}
+
void*
-SimulatedBlock::allocRecord(const char * type, size_t s, size_t n, bool clear, Uint32 paramId)
+SimulatedBlock::allocRecordAligned(const char * type, size_t s, size_t n, void **unaligned_buffer, Uint32 align, bool clear, Uint32 paramId)
{
void * p = NULL;
- size_t size = n*s;
- Uint64 real_size = (Uint64)((Uint64)n)*((Uint64)s);
- refresh_watch_dog();
+ Uint32 over_alloc = unaligned_buffer ? (align - 1) : 0;
+ size_t size = n*s + over_alloc;
+ Uint64 real_size = (Uint64)((Uint64)n)*((Uint64)s) + over_alloc;
+ refresh_watch_dog(9);
if (real_size > 0){
#ifdef VM_TRACE_MEM
ndbout_c("%s::allocRecord(%s, %u, %u) = %llu bytes",
@@ -696,14 +706,24 @@ SimulatedBlock::allocRecord(const char * type, size_t s, size_t n, bool clear, U
char * ptr = (char*)p;
const Uint32 chunk = 128 * 1024;
while(size > chunk){
- refresh_watch_dog();
+ refresh_watch_dog(9);
memset(ptr, 0, chunk);
ptr += chunk;
size -= chunk;
}
- refresh_watch_dog();
+ refresh_watch_dog(9);
memset(ptr, 0, size);
}
+ if (unaligned_buffer)
+ {
+ *unaligned_buffer = p;
+ p = (void *)(((UintPtr)p + over_alloc) & ~(UintPtr)(over_alloc));
+#ifdef VM_TRACE
+ g_eventLogger.info("'%s' (%u) %llu %llu, alignment correction %u bytes",
+ type, align, (Uint64)p, (Uint64)p+n*s,
+ (Uint32)((UintPtr)p - (UintPtr)*unaligned_buffer));
+#endif
+ }
}
return p;
}
@@ -720,9 +740,16 @@ SimulatedBlock::deallocRecord(void ** ptr,
}
void
-SimulatedBlock::refresh_watch_dog()
+SimulatedBlock::refresh_watch_dog(Uint32 place)
+{
+ globalData.incrementWatchDogCounter(place);
+}
+
+void
+SimulatedBlock::update_watch_dog_timer(Uint32 interval)
{
- globalData.incrementWatchDogCounter(1);
+ extern EmulatorData globalEmulatorData;
+ globalEmulatorData.theWatchDog->setCheckInterval(interval);
}
void
@@ -1631,6 +1658,11 @@ SimulatedBlock::sendFragmentedSignal(NodeReceiverGroup rg,
}
SimulatedBlock::Callback SimulatedBlock::TheEmptyCallback = {0, 0};
+void
+SimulatedBlock::TheNULLCallbackFunction(class Signal*, Uint32, Uint32)
+{ abort(); /* should never be called */ }
+SimulatedBlock::Callback SimulatedBlock::TheNULLCallback =
+{ &SimulatedBlock::TheNULLCallbackFunction, 0 };
void
SimulatedBlock::sendFragmentedSignal(BlockReference ref,
diff --git a/storage/ndb/src/kernel/vm/SimulatedBlock.hpp b/storage/ndb/src/kernel/vm/SimulatedBlock.hpp
index 37a8dde5956..a78ee21fb8f 100644
--- a/storage/ndb/src/kernel/vm/SimulatedBlock.hpp
+++ b/storage/ndb/src/kernel/vm/SimulatedBlock.hpp
@@ -131,6 +131,8 @@ public:
virtual const char* get_filename(Uint32 fd) const { return "";}
protected:
static Callback TheEmptyCallback;
+ void TheNULLCallbackFunction(class Signal*, Uint32, Uint32);
+ static Callback TheNULLCallback;
void execute(Signal* signal, Callback & c, Uint32 returnCode);
@@ -334,7 +336,8 @@ protected:
* Refresh Watch Dog in initialising code
*
*/
- void refresh_watch_dog();
+ void refresh_watch_dog(Uint32 place = 1);
+ void update_watch_dog_timer(Uint32 interval);
/**
* Prog error
@@ -377,6 +380,7 @@ protected:
*
*/
void* allocRecord(const char * type, size_t s, size_t n, bool clear = true, Uint32 paramId = 0);
+ void* allocRecordAligned(const char * type, size_t s, size_t n, void **unaligned_buffer, Uint32 align = NDB_O_DIRECT_WRITE_ALIGNMENT, bool clear = true, Uint32 paramId = 0);
/**
* Deallocate record
@@ -597,6 +601,8 @@ inline
void
SimulatedBlock::execute(Signal* signal, Callback & c, Uint32 returnCode){
CallbackFunction fun = c.m_callbackFunction;
+ if (fun == TheNULLCallback.m_callbackFunction)
+ return;
ndbrequire(fun != 0);
c.m_callbackFunction = NULL;
(this->*fun)(signal, c.m_callbackData, returnCode);
diff --git a/storage/ndb/src/kernel/vm/WatchDog.cpp b/storage/ndb/src/kernel/vm/WatchDog.cpp
index d1abb709b1e..a7f5e8f5c2b 100644
--- a/storage/ndb/src/kernel/vm/WatchDog.cpp
+++ b/storage/ndb/src/kernel/vm/WatchDog.cpp
@@ -16,6 +16,7 @@
#include <ndb_global.h>
#include <my_pthread.h>
+#include <sys/times.h>
#include "WatchDog.hpp"
#include "GlobalData.hpp"
@@ -24,6 +25,8 @@
#include <ErrorHandlingMacros.hpp>
#include <EventLogger.hpp>
+#include <NdbTick.h>
+
extern EventLogger g_eventLogger;
extern "C"
@@ -71,66 +74,115 @@ WatchDog::doStop(){
}
}
+const char *get_action(Uint32 IPValue)
+{
+ const char *action;
+ switch (IPValue) {
+ case 1:
+ action = "Job Handling";
+ break;
+ case 2:
+ action = "Scanning Timers";
+ break;
+ case 3:
+ action = "External I/O";
+ break;
+ case 4:
+ action = "Print Job Buffers at crash";
+ break;
+ case 5:
+ action = "Checking connections";
+ break;
+ case 6:
+ action = "Performing Send";
+ break;
+ case 7:
+ action = "Polling for Receive";
+ break;
+ case 8:
+ action = "Performing Receive";
+ break;
+ case 9:
+ action = "Allocating memory";
+ break;
+ default:
+ action = "Unknown place";
+ break;
+ }//switch
+ return action;
+}
+
void
-WatchDog::run(){
- unsigned int anIPValue;
- unsigned int alerts = 0;
+WatchDog::run()
+{
+ unsigned int anIPValue, sleep_time;
unsigned int oldIPValue = 0;
-
+ unsigned int theIntervalCheck = theInterval;
+ struct MicroSecondTimer start_time, last_time, now;
+ NdbTick_getMicroTimer(&start_time);
+ last_time = start_time;
+
// WatchDog for the single threaded NDB
- while(!theStop){
- Uint32 tmp = theInterval / 500;
- tmp= (tmp ? tmp : 1);
-
- while(!theStop && tmp > 0){
- NdbSleep_MilliSleep(500);
- tmp--;
- }
-
+ while (!theStop)
+ {
+ sleep_time= 100;
+
+ NdbSleep_MilliSleep(sleep_time);
if(theStop)
break;
+ NdbTick_getMicroTimer(&now);
+ if (NdbTick_getMicrosPassed(last_time, now)/1000 > sleep_time*2)
+ {
+ struct tms my_tms;
+ times(&my_tms);
+ g_eventLogger.info("Watchdog: User time: %llu System time: %llu",
+ (Uint64)my_tms.tms_utime,
+ (Uint64)my_tms.tms_stime);
+ g_eventLogger.warning("Watchdog: Warning overslept %u ms, expected %u ms.",
+ NdbTick_getMicrosPassed(last_time, now)/1000,
+ sleep_time);
+ }
+ last_time = now;
+
// Verify that the IP thread is not stuck in a loop
anIPValue = *theIPValue;
- if(anIPValue != 0) {
+ if (anIPValue != 0)
+ {
oldIPValue = anIPValue;
globalData.incrementWatchDogCounter(0);
- alerts = 0;
- } else {
- const char *last_stuck_action;
- alerts++;
- switch (oldIPValue) {
- case 1:
- last_stuck_action = "Job Handling";
- break;
- case 2:
- last_stuck_action = "Scanning Timers";
- break;
- case 3:
- last_stuck_action = "External I/O";
- break;
- case 4:
- last_stuck_action = "Print Job Buffers at crash";
- break;
- case 5:
- last_stuck_action = "Checking connections";
- break;
- case 6:
- last_stuck_action = "Performing Send";
- break;
- case 7:
- last_stuck_action = "Polling for Receive";
- break;
- case 8:
- last_stuck_action = "Performing Receive";
- break;
- default:
- last_stuck_action = "Unknown place";
- break;
- }//switch
- g_eventLogger.warning("Ndb kernel is stuck in: %s", last_stuck_action);
- if(alerts == 3){
- shutdownSystem(last_stuck_action);
+ NdbTick_getMicroTimer(&start_time);
+ theIntervalCheck = theInterval;
+ }
+ else
+ {
+ int warn = 1;
+ Uint32 elapsed = NdbTick_getMicrosPassed(start_time, now)/1000;
+ /*
+ oldIPValue == 9 indicates malloc going on, this can take some time
+ so only warn if we pass the watchdog interval
+ */
+ if (oldIPValue == 9)
+ if (elapsed < theIntervalCheck)
+ warn = 0;
+ else
+ theIntervalCheck += theInterval;
+
+ if (warn)
+ {
+ const char *last_stuck_action = get_action(oldIPValue);
+ g_eventLogger.warning("Ndb kernel is stuck in: %s", last_stuck_action);
+ {
+ struct tms my_tms;
+ times(&my_tms);
+ g_eventLogger.info("Watchdog: User time: %llu System time: %llu",
+ (Uint64)my_tms.tms_utime,
+ (Uint64)my_tms.tms_stime);
+ }
+ if (elapsed > 3 * theInterval)
+ {
+ shutdownSystem(last_stuck_action);
+ }
}
}
}