summaryrefslogtreecommitdiff
path: root/ndb
diff options
context:
space:
mode:
authorunknown <jonas@perch.ndb.mysql.com>2007-02-17 23:52:17 +0100
committerunknown <jonas@perch.ndb.mysql.com>2007-02-17 23:52:17 +0100
commit778b4aad59994e166f91c50b47d686a53a5e8b80 (patch)
tree17eaa4ec6effda4798e5aac5a50f4f139c262f0b /ndb
parent0e39133ad73e7ec7e821591d912509054174d697 (diff)
downloadmariadb-git-778b4aad59994e166f91c50b47d686a53a5e8b80.tar.gz
ndb - bug#26457
master failure during master take over ndb/src/kernel/blocks/ERROR_codes.txt: new error code ndb/src/kernel/blocks/dbdih/DbdihMain.cpp: Make sure to clear NF_XX_LCP if master fails during master take-over ndb/test/include/NdbRestarter.hpp: Add support for querying next master and node group (for multi node failure testing) ndb/test/ndbapi/testNodeRestart.cpp: testcase ndb/test/run-test/daily-basic-tests.txt: testcase ndb/test/src/NdbRestarter.cpp: Add support for querying next master and node group (for multi node failure testing)
Diffstat (limited to 'ndb')
-rw-r--r--ndb/src/kernel/blocks/ERROR_codes.txt4
-rw-r--r--ndb/src/kernel/blocks/dbdih/DbdihMain.cpp27
-rw-r--r--ndb/test/include/NdbRestarter.hpp2
-rw-r--r--ndb/test/ndbapi/testNodeRestart.cpp42
-rw-r--r--ndb/test/run-test/daily-basic-tests.txt4
-rw-r--r--ndb/test/src/NdbRestarter.cpp62
6 files changed, 139 insertions, 2 deletions
diff --git a/ndb/src/kernel/blocks/ERROR_codes.txt b/ndb/src/kernel/blocks/ERROR_codes.txt
index 0bcc99a6334..f7cb49014cb 100644
--- a/ndb/src/kernel/blocks/ERROR_codes.txt
+++ b/ndb/src/kernel/blocks/ERROR_codes.txt
@@ -5,7 +5,7 @@ Next DBACC 3002
Next DBTUP 4014
Next DBLQH 5043
Next DBDICT 6007
-Next DBDIH 7178
+Next DBDIH 7181
Next DBTC 8039
Next CMVMI 9000
Next BACKUP 10022
@@ -71,6 +71,8 @@ Delay GCP_SAVEREQ by 10 secs
7177: Delay copying of sysfileData in execCOPY_GCIREQ
+7180: Crash master during master-take-over in execMASTER_LCPCONF
+
ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING:
-----------------------------------------------------------------
diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
index 5f573d40dfe..0e6fe4714b6 100644
--- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
+++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
@@ -4612,6 +4612,8 @@ void
Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){
jam();
+ Uint32 oldNode = c_lcpMasterTakeOverState.failedNodeId;
+
c_lcpMasterTakeOverState.minTableId = ~0;
c_lcpMasterTakeOverState.minFragId = ~0;
c_lcpMasterTakeOverState.failedNodeId = nodeId;
@@ -4630,7 +4632,20 @@ Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){
/**
* Node failure during master take over...
*/
- ndbout_c("Nodefail during master take over");
+ ndbout_c("Nodefail during master take over (old: %d)", oldNode);
+ }
+
+ NodeRecordPtr nodePtr;
+ nodePtr.i = oldNode;
+ if (oldNode > 0 && oldNode < MAX_NDB_NODES)
+ {
+ jam();
+ ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
+ if (nodePtr.p->m_nodefailSteps.get(NF_LCP_TAKE_OVER))
+ {
+ jam();
+ checkLocalNodefailComplete(signal, oldNode, NF_LCP_TAKE_OVER);
+ }
}
setLocalNodefailHandling(signal, nodeId, NF_LCP_TAKE_OVER);
@@ -5646,6 +5661,14 @@ void Dbdih::execMASTER_LCPREQ(Signal* signal)
jamEntry();
const BlockReference newMasterBlockref = req->masterRef;
+ if (newMasterBlockref != cmasterdihref)
+ {
+ jam();
+ ndbout_c("resending GSN_MASTER_LCPREQ");
+ sendSignalWithDelay(reference(), GSN_MASTER_LCPREQ, signal,
+ signal->getLength(), 50);
+ return;
+ }
Uint32 failedNodeId = req->failedNodeId;
/**
@@ -5946,6 +5969,8 @@ void Dbdih::execMASTER_LCPCONF(Signal* signal)
ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
nodePtr.p->lcpStateAtTakeOver = lcpState;
+ CRASH_INSERTION(7180);
+
#ifdef VM_TRACE
ndbout_c("MASTER_LCPCONF");
printMASTER_LCP_CONF(stdout, &signal->theData[0], 0, 0);
diff --git a/ndb/test/include/NdbRestarter.hpp b/ndb/test/include/NdbRestarter.hpp
index 3ec92ae786e..a2e6a4b3248 100644
--- a/ndb/test/include/NdbRestarter.hpp
+++ b/ndb/test/include/NdbRestarter.hpp
@@ -62,6 +62,8 @@ public:
int dumpStateAllNodes(int * _args, int _num_args);
int getMasterNodeId();
+ int getNextMasterNodeId(int nodeId);
+ int getNodeGroup(int nodeId);
int getRandomNodeSameNodeGroup(int nodeId, int randomNumber);
int getRandomNodeOtherNodeGroup(int nodeId, int randomNumber);
int getRandomNotMasterNodeId(int randomNumber);
diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp
index 43fb77342b5..e729e8179b5 100644
--- a/ndb/test/ndbapi/testNodeRestart.cpp
+++ b/ndb/test/ndbapi/testNodeRestart.cpp
@@ -1045,6 +1045,45 @@ int runBug25554(NDBT_Context* ctx, NDBT_Step* step){
return NDBT_OK;
}
+int
+runBug26457(NDBT_Context* ctx, NDBT_Step* step)
+{
+ NdbRestarter res;
+ if (res.getNumDbNodes() < 4)
+ return NDBT_OK;
+
+ int loops = ctx->getNumLoops();
+ while (loops --)
+ {
+retry:
+ int master = res.getMasterNodeId();
+ int next = res.getNextMasterNodeId(master);
+
+ ndbout_c("master: %d next: %d", master, next);
+
+ if (res.getNodeGroup(master) == res.getNodeGroup(next))
+ {
+ res.restartOneDbNode(next, false, false, true);
+ if (res.waitClusterStarted())
+ return NDBT_FAILED;
+ goto retry;
+ }
+
+ int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 2 };
+
+ if (res.dumpStateOneNode(next, val2, 2))
+ return NDBT_FAILED;
+
+ if (res.insertErrorInNode(next, 7180))
+ return NDBT_FAILED;
+
+ res.restartOneDbNode(master, false, false, true);
+ if (res.waitClusterStarted())
+ return NDBT_FAILED;
+ }
+
+ return NDBT_OK;
+}
NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad",
@@ -1367,6 +1406,9 @@ TESTCASE("Bug25364", ""){
TESTCASE("Bug25554", ""){
INITIALIZER(runBug25554);
}
+TESTCASE("Bug26457", ""){
+ INITIALIZER(runBug26457);
+}
NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){
diff --git a/ndb/test/run-test/daily-basic-tests.txt b/ndb/test/run-test/daily-basic-tests.txt
index 00fa14eea2c..9074ff145d8 100644
--- a/ndb/test/run-test/daily-basic-tests.txt
+++ b/ndb/test/run-test/daily-basic-tests.txt
@@ -477,6 +477,10 @@ max-time: 1000
cmd: testNodeRestart
args: -n Bug25554 T1
+max-time: 1000
+cmd: testNodeRestart
+args: -n Bug26457 T1
+
# OLD FLEX
max-time: 500
cmd: flexBench
diff --git a/ndb/test/src/NdbRestarter.cpp b/ndb/test/src/NdbRestarter.cpp
index 2c16a05240d..4c7f52a8622 100644
--- a/ndb/test/src/NdbRestarter.cpp
+++ b/ndb/test/src/NdbRestarter.cpp
@@ -129,6 +129,68 @@ NdbRestarter::getMasterNodeId(){
}
int
+NdbRestarter::getNodeGroup(int nodeId){
+ if (!isConnected())
+ return -1;
+
+ if (getStatus() != 0)
+ return -1;
+
+ for(size_t i = 0; i < ndbNodes.size(); i++)
+ {
+ if(ndbNodes[i].node_id == nodeId)
+ {
+ return ndbNodes[i].node_group;
+ }
+ }
+
+ return -1;
+}
+
+int
+NdbRestarter::getNextMasterNodeId(int nodeId){
+ if (!isConnected())
+ return -1;
+
+ if (getStatus() != 0)
+ return -1;
+
+ size_t i;
+ for(i = 0; i < ndbNodes.size(); i++)
+ {
+ if(ndbNodes[i].node_id == nodeId)
+ {
+ break;
+ }
+ }
+ assert(i < ndbNodes.size());
+ if (i == ndbNodes.size())
+ return -1;
+
+ int dynid = ndbNodes[i].dynamic_id;
+ int minid = dynid;
+ for (i = 0; i<ndbNodes.size(); i++)
+ if (ndbNodes[i].dynamic_id > minid)
+ minid = ndbNodes[i].dynamic_id;
+
+ for (i = 0; i<ndbNodes.size(); i++)
+ if (ndbNodes[i].dynamic_id > dynid &&
+ ndbNodes[i].dynamic_id < minid)
+ {
+ minid = ndbNodes[i].dynamic_id;
+ }
+
+ if (minid != ~0)
+ {
+ for (i = 0; i<ndbNodes.size(); i++)
+ if (ndbNodes[i].dynamic_id == minid)
+ return ndbNodes[i].node_id;
+ }
+
+ return getMasterNodeId();
+}
+
+int
NdbRestarter::getRandomNotMasterNodeId(int rand){
int master = getMasterNodeId();
if(master == -1)