summaryrefslogtreecommitdiff
path: root/ndb
diff options
context:
space:
mode:
authorunknown <jonas@perch.ndb.mysql.com>2006-04-06 11:43:31 +0200
committerunknown <jonas@perch.ndb.mysql.com>2006-04-06 11:43:31 +0200
commitbe0d6c94de4c9c6542d3381185a73a96c4bedcae (patch)
tree40f7c9e1a1b9a72e75e10ddcf6fcffe901e4968f /ndb
parente40bdb5b711c1bb4d8ae003153102a6e8dc1d735 (diff)
downloadmariadb-git-be0d6c94de4c9c6542d3381185a73a96c4bedcae.tar.gz
ndb - wl1760/bug#18216
add two new start options that will decrease likelyhood of bug#18612 push cntr-sp2 logic down into qmgr-sp1 to decrease likelyhood of bug#18612 ndb/include/kernel/signaldata/CmRegSignalData.hpp: Expand CmRegReq with lots of stuff ndb/include/mgmapi/ndb_logevent.h: Add Start report during sp1 ndb/src/common/debugger/EventLogger.cpp: Add Start report during sp1 ndb/src/kernel/blocks/dbdih/DbdihInit.cpp: Init cntrref ndb/src/kernel/blocks/dbdih/DbdihMain.cpp: Fix small bugs related to partial initial start ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp: Remove timeouts as they are handled in QMGR nowadays ndb/src/kernel/blocks/qmgr/Qmgr.hpp: Push cntr sp2 logic down to QMGR to prevent the forming of multiple QMGR cluster that will lead to start problems (crashes...) ndb/src/kernel/blocks/qmgr/QmgrInit.cpp: Push cntr sp2 logic down to QMGR to prevent the forming of multiple QMGR cluster that will lead to start problems (crashes...) ndb/src/kernel/blocks/qmgr/QmgrMain.cpp: Push cntr sp2 logic down to QMGR to prevent the forming of multiple QMGR cluster that will lead to start problems (crashes...) ndb/src/kernel/vm/Configuration.cpp: Add new flags --initial-start --nowait-nodes
Diffstat (limited to 'ndb')
-rw-r--r--ndb/include/kernel/signaldata/CmRegSignalData.hpp29
-rw-r--r--ndb/include/mgmapi/ndb_logevent.h13
-rw-r--r--ndb/src/common/debugger/EventLogger.cpp85
-rw-r--r--ndb/src/kernel/blocks/dbdih/DbdihInit.cpp1
-rw-r--r--ndb/src/kernel/blocks/dbdih/DbdihMain.cpp78
-rw-r--r--ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp16
-rw-r--r--ndb/src/kernel/blocks/qmgr/Qmgr.hpp28
-rw-r--r--ndb/src/kernel/blocks/qmgr/QmgrInit.cpp3
-rw-r--r--ndb/src/kernel/blocks/qmgr/QmgrMain.cpp544
-rw-r--r--ndb/src/kernel/vm/Configuration.cpp45
10 files changed, 733 insertions, 109 deletions
diff --git a/ndb/include/kernel/signaldata/CmRegSignalData.hpp b/ndb/include/kernel/signaldata/CmRegSignalData.hpp
index f33c991249f..ab51ed17bc3 100644
--- a/ndb/include/kernel/signaldata/CmRegSignalData.hpp
+++ b/ndb/include/kernel/signaldata/CmRegSignalData.hpp
@@ -30,12 +30,17 @@ class CmRegReq {
friend class Qmgr;
public:
- STATIC_CONST( SignalLength = 3 );
+ STATIC_CONST( SignalLength = 5 + NdbNodeBitmask::Size );
private:
Uint32 blockRef;
Uint32 nodeId;
- Uint32 version; // See ndb_version.h
+ Uint32 version; // See ndb_version.h
+
+ Uint32 start_type; // As specified by cmd-line or mgm, NodeState::StartType
+ Uint32 latest_gci; // 0 means no fs
+ Uint32 skip_nodes[NdbNodeBitmask::Size]; // Nodes that does not _need_
+ // to be part of restart
};
/**
@@ -59,8 +64,7 @@ private:
* The dynamic id that the node reciving this signal has
*/
Uint32 dynamicId;
-
- Uint32 allNdbNodes[NdbNodeBitmask::Size];
+ Uint32 allNdbNodes[NdbNodeBitmask::Size];
};
/**
@@ -73,7 +77,7 @@ class CmRegRef {
friend class Qmgr;
public:
- STATIC_CONST( SignalLength = 4 );
+ STATIC_CONST( SignalLength = 7 + NdbNodeBitmask::Size );
enum ErrorCode {
ZBUSY = 0, /* Only the president can send this */
@@ -85,14 +89,27 @@ public:
* as president. */
ZNOT_PRESIDENT = 5, /* We are not president */
ZNOT_DEAD = 6, /* We are not dead when we are starting */
- ZINCOMPATIBLE_VERSION = 7
+ ZINCOMPATIBLE_VERSION = 7,
+ ZINCOMPATIBLE_START_TYPE = 8
};
private:
Uint32 blockRef;
Uint32 nodeId;
Uint32 errorCode;
+ /**
+ * Applicable if ZELECTION
+ */
Uint32 presidentCandidate;
+ Uint32 candidate_latest_gci; // 0 means non
+
+ /**
+ * Data for sending node sending node
+ */
+ Uint32 latest_gci;
+ Uint32 start_type;
+ Uint32 skip_nodes[NdbNodeBitmask::Size]; // Nodes that does not _need_
+ // to be part of restart
};
class CmAdd {
diff --git a/ndb/include/mgmapi/ndb_logevent.h b/ndb/include/mgmapi/ndb_logevent.h
index 6025ff2725c..d57646c14db 100644
--- a/ndb/include/mgmapi/ndb_logevent.h
+++ b/ndb/include/mgmapi/ndb_logevent.h
@@ -166,10 +166,14 @@ extern "C" {
/** NDB_MGM_EVENT_CATEGORY_BACKUP */
NDB_LE_BackupCompleted = 56,
/** NDB_MGM_EVENT_CATEGORY_BACKUP */
- NDB_LE_BackupAborted = 57
+ NDB_LE_BackupAborted = 57,
/* 58 used in 5.1 */
/* 59 used */
+
+ /** NDB_MGM_EVENT_CATEGORY_STARTUP */
+ NDB_LE_StartReport = 60
+
/* 60 unused */
/* 61 unused */
/* 62 unused */
@@ -625,6 +629,13 @@ extern "C" {
unsigned type;
unsigned node_id;
} SingleUser;
+ /** Log even data @ref NDB_LE_StartReport */
+ struct {
+ unsigned report_type;
+ unsigned remaining_time;
+ unsigned bitmask_size;
+ unsigned bitmask_data[1];
+ } StartReport;
#ifndef DOXYGEN_FIX
};
#else
diff --git a/ndb/src/common/debugger/EventLogger.cpp b/ndb/src/common/debugger/EventLogger.cpp
index f785cda5215..e1a477b8ea4 100644
--- a/ndb/src/common/debugger/EventLogger.cpp
+++ b/ndb/src/common/debugger/EventLogger.cpp
@@ -707,6 +707,90 @@ void getTextSingleUser(QQQQ) {
}
}
+void getTextStartReport(QQQQ) {
+ Uint32 time = theData[2];
+ Uint32 sz = theData[3];
+ char mask1[100];
+ char mask2[100];
+ char mask3[100];
+ char mask4[100];
+ BitmaskImpl::getText(sz, theData + 4 + (0 * sz), mask1);
+ BitmaskImpl::getText(sz, theData + 4 + (1 * sz), mask2);
+ BitmaskImpl::getText(sz, theData + 4 + (2 * sz), mask3);
+ BitmaskImpl::getText(sz, theData + 4 + (3 * sz), mask4);
+ switch(theData[1]){
+ case 1: // Wait initial
+ BaseString::snprintf
+ (m_text, m_text_len,
+ "Initial start, waiting for %s to connect, "
+ " nodes [ all: %s connected: %s no-wait: %s ]",
+ mask4, mask1, mask2, mask3);
+ break;
+ case 2: // Wait partial
+ BaseString::snprintf
+ (m_text, m_text_len,
+ "Waiting until nodes: %s connects, "
+ "nodes [ all: %s connected: %s no-wait: %s ]",
+ mask4, mask1, mask2, mask3);
+ break;
+ case 3: // Wait partial timeout
+ BaseString::snprintf
+ (m_text, m_text_len,
+ "Waiting %u sec for nodes %s to connect, "
+ "nodes [ all: %s connected: %s no-wait: %s ]",
+
+ time, mask4, mask1, mask2, mask3);
+ break;
+ case 4: // Wait partioned
+ BaseString::snprintf
+ (m_text, m_text_len,
+ "Waiting for non partitioned start, "
+ "nodes [ all: %s connected: %s missing: %s no-wait: %s ]",
+
+ mask1, mask2, mask4, mask3);
+ break;
+ case 5:
+ BaseString::snprintf
+ (m_text, m_text_len,
+ "Waiting %u sec for non partitioned start, "
+ "nodes [ all: %s connected: %s missing: %s no-wait: %s ]",
+
+ time, mask1, mask2, mask4, mask3);
+ break;
+ case 0x8000: // Do initial
+ BaseString::snprintf
+ (m_text, m_text_len,
+ "Initial start with nodes %s [ missing: %s no-wait: %s ]",
+ mask2, mask4, mask3);
+ break;
+ case 0x8001: // Do start
+ BaseString::snprintf
+ (m_text, m_text_len,
+ "Start with all nodes %s",
+ mask2);
+ break;
+ case 0x8002: // Do partial
+ BaseString::snprintf
+ (m_text, m_text_len,
+ "Start with nodes %s [ missing: %s no-wait: %s ]",
+ mask2, mask4, mask3);
+ break;
+ case 0x8003: // Do partioned
+ BaseString::snprintf
+ (m_text, m_text_len,
+ "Start potentially partitioned with nodes %s "
+ " [ missing: %s no-wait: %s ]",
+ mask2, mask4, mask3);
+ break;
+ default:
+ BaseString::snprintf
+ (m_text, m_text_len,
+ "Unknown startreport: 0x%x [ %s %s %s %s ]",
+ theData[1],
+ mask1, mask2, mask3, mask4);
+ }
+}
+
#if 0
BaseString::snprintf(m_text,
m_text_len,
@@ -755,6 +839,7 @@ const EventLoggerBase::EventRepLogLevelMatrix EventLoggerBase::matrix[] = {
ROW(StartREDOLog, LogLevel::llStartUp, 10, Logger::LL_INFO ),
ROW(StartLog, LogLevel::llStartUp, 10, Logger::LL_INFO ),
ROW(UNDORecordsExecuted, LogLevel::llStartUp, 15, Logger::LL_INFO ),
+ ROW(StartReport, LogLevel::llStartUp, 4, Logger::LL_INFO ),
// NODERESTART
ROW(NR_CopyDict, LogLevel::llNodeRestart, 8, Logger::LL_INFO ),
diff --git a/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp b/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp
index d6f6b772516..cd987048577 100644
--- a/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp
+++ b/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp
@@ -71,6 +71,7 @@ void Dbdih::initData()
cwaitLcpSr = false;
c_blockCommit = false;
c_blockCommitNo = 1;
+ cntrlblockref = RNIL;
}//Dbdih::initData()
void Dbdih::initRecords()
diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
index e0dbc9bd272..e4b95a5af7d 100644
--- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
+++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
@@ -11659,7 +11659,7 @@ void Dbdih::makeNodeGroups(Uint32 nodeArray[])
Uint32 tmngNode;
Uint32 tmngNodeGroup;
Uint32 tmngLimit;
- Uint32 i;
+ Uint32 i, j;
/**-----------------------------------------------------------------------
* ASSIGN ALL ACTIVE NODES INTO NODE GROUPS. HOT SPARE NODES ARE ASSIGNED
@@ -11705,6 +11705,38 @@ void Dbdih::makeNodeGroups(Uint32 nodeArray[])
Sysfile::setNodeGroup(mngNodeptr.i, SYSFILE->nodeGroups, mngNodeptr.p->nodeGroup);
}//if
}//for
+
+ for (i = 0; i<cnoOfNodeGroups; i++)
+ {
+ jam();
+ bool alive = false;
+ NodeGroupRecordPtr NGPtr;
+ NGPtr.i = i;
+ ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
+ for (j = 0; j<NGPtr.p->nodeCount; j++)
+ {
+ jam();
+ mngNodeptr.i = NGPtr.p->nodesInGroup[j];
+ ptrCheckGuard(mngNodeptr, MAX_NDB_NODES, nodeRecord);
+ if (checkNodeAlive(NGPtr.p->nodesInGroup[j]))
+ {
+ alive = true;
+ break;
+ }
+ }
+
+ if (!alive)
+ {
+ char buf[255];
+ BaseString::snprintf
+ (buf, sizeof(buf),
+ "Illegal initial start, no alive node in nodegroup %u", i);
+ progError(__LINE__,
+ NDBD_EXIT_SR_RESTARTCONFLICT,
+ buf);
+
+ }
+ }
}//Dbdih::makeNodeGroups()
/**
@@ -12512,7 +12544,6 @@ void Dbdih::sendStartFragreq(Signal* signal,
void Dbdih::setInitialActiveStatus()
{
NodeRecordPtr siaNodeptr;
- Uint32 tsiaNodeActiveStatus;
Uint32 tsiaNoActiveNodes;
tsiaNoActiveNodes = csystemnodes - cnoHotSpare;
@@ -12520,39 +12551,34 @@ void Dbdih::setInitialActiveStatus()
SYSFILE->nodeStatus[i] = 0;
for (siaNodeptr.i = 1; siaNodeptr.i < MAX_NDB_NODES; siaNodeptr.i++) {
ptrAss(siaNodeptr, nodeRecord);
- if (siaNodeptr.p->nodeStatus == NodeRecord::ALIVE) {
+ switch(siaNodeptr.p->nodeStatus){
+ case NodeRecord::ALIVE:
+ case NodeRecord::DEAD:
if (tsiaNoActiveNodes == 0) {
jam();
siaNodeptr.p->activeStatus = Sysfile::NS_HotSpare;
} else {
jam();
tsiaNoActiveNodes = tsiaNoActiveNodes - 1;
- siaNodeptr.p->activeStatus = Sysfile::NS_Active;
- }//if
- } else {
- jam();
- siaNodeptr.p->activeStatus = Sysfile::NS_NotDefined;
- }//if
- switch (siaNodeptr.p->activeStatus) {
- case Sysfile::NS_Active:
- jam();
- tsiaNodeActiveStatus = Sysfile::NS_Active;
- break;
- case Sysfile::NS_HotSpare:
- jam();
- tsiaNodeActiveStatus = Sysfile::NS_HotSpare;
- break;
- case Sysfile::NS_NotDefined:
- jam();
- tsiaNodeActiveStatus = Sysfile::NS_NotDefined;
+ if (siaNodeptr.p->nodeStatus == NodeRecord::ALIVE)
+ {
+ jam();
+ siaNodeptr.p->activeStatus = Sysfile::NS_Active;
+ }
+ else
+ {
+ siaNodeptr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver;
+ }
+ }
break;
default:
- ndbrequire(false);
- return;
+ jam();
+ siaNodeptr.p->activeStatus = Sysfile::NS_NotDefined;
break;
- }//switch
- Sysfile::setNodeStatus(siaNodeptr.i, SYSFILE->nodeStatus,
- tsiaNodeActiveStatus);
+ }//if
+ Sysfile::setNodeStatus(siaNodeptr.i,
+ SYSFILE->nodeStatus,
+ siaNodeptr.p->activeStatus);
}//for
}//Dbdih::setInitialActiveStatus()
diff --git a/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp b/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
index f5dab99ee35..3fc24e395b1 100644
--- a/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
+++ b/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
@@ -820,17 +820,9 @@ Ndbcntr::trySystemRestart(Signal* signal){
return false;
}
- if(!allNodes && c_start.m_startPartialTimeout > now){
- jam();
- return false;
- }
-
NodeState::StartType srType = NodeState::ST_SYSTEM_RESTART;
- if(c_start.m_waiting.equal(c_start.m_withoutLog)){
- if(!allNodes){
- jam();
- return false;
- }
+ if(c_start.m_waiting.equal(c_start.m_withoutLog))
+ {
jam();
srType = NodeState::ST_INITIAL_START;
c_start.m_starting = c_start.m_withoutLog; // Used for starting...
@@ -860,10 +852,6 @@ Ndbcntr::trySystemRestart(Signal* signal){
ndbrequire(false); // All nodes -> partitioning, which is not allowed
}
- if(c_start.m_startPartitionedTimeout > now){
- jam();
- return false;
- }
break;
}
diff --git a/ndb/src/kernel/blocks/qmgr/Qmgr.hpp b/ndb/src/kernel/blocks/qmgr/Qmgr.hpp
index e698ddd981b..3feb0858e82 100644
--- a/ndb/src/kernel/blocks/qmgr/Qmgr.hpp
+++ b/ndb/src/kernel/blocks/qmgr/Qmgr.hpp
@@ -50,6 +50,7 @@
#define ZAPI_HB_HANDLING 3
#define ZTIMER_HANDLING 4
#define ZARBIT_HANDLING 5
+#define ZSTART_FAILURE_LIMIT 6
/* Error Codes ------------------------------*/
#define ZERRTOOMANY 1101
@@ -113,8 +114,19 @@ public:
Uint32 m_gsn;
SignalCounter m_nodes;
- } c_start;
+ Uint32 m_latest_gci;
+
+ Uint32 m_start_type;
+ NdbNodeBitmask m_skip_nodes;
+ NdbNodeBitmask m_starting_nodes;
+ NdbNodeBitmask m_starting_nodes_w_log;
+ Uint16 m_president_candidate;
+ Uint32 m_president_candidate_gci;
+ Uint16 m_regReqReqSent;
+ Uint16 m_regReqReqRecv;
+ } c_start;
+
NdbNodeBitmask c_definedNodes; // DB nodes in config
NdbNodeBitmask c_clusterNodes; // DB nodes in cluster
NodeBitmask c_connectedNodes; // All kinds of connected nodes
@@ -125,7 +137,7 @@ public:
* i.e. nodes that connect to use, when we already have elected president
*/
NdbNodeBitmask c_readnodes_nodes;
-
+
Uint32 c_maxDynamicId;
// Records
@@ -236,6 +248,9 @@ private:
void execREAD_NODESREF(Signal* signal);
void execREAD_NODESCONF(Signal* signal);
+ void execDIH_RESTARTREF(Signal* signal);
+ void execDIH_RESTARTCONF(Signal* signal);
+
void execAPI_VERSION_REQ(Signal* signal);
void execAPI_BROADCAST_REP(Signal* signal);
@@ -252,6 +267,7 @@ private:
// Statement blocks
void check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn);
+ Uint32 check_startup(Signal* signal);
void node_failed(Signal* signal, Uint16 aFailedNode);
void checkStartInterface(Signal* signal);
@@ -374,12 +390,12 @@ private:
/* Status flags ----------------------------------*/
Uint32 c_restartPartialTimeout;
+ Uint32 c_restartPartionedTimeout;
+ Uint32 c_restartFailureTimeout;
+ Uint64 c_start_election_time;
Uint16 creadyDistCom;
- Uint16 c_regReqReqSent;
- Uint16 c_regReqReqRecv;
- Uint64 c_stopElectionTime;
- Uint16 cpresidentCandidate;
+
Uint16 cdelayRegreq;
Uint16 cpresidentAlive;
Uint16 cnoFailedNodes;
diff --git a/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp b/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp
index d0ecb114d0f..f14cbd48695 100644
--- a/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp
+++ b/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp
@@ -98,6 +98,9 @@ Qmgr::Qmgr(const class Configuration & conf)
addRecSignal(GSN_READ_NODESREF, &Qmgr::execREAD_NODESREF);
addRecSignal(GSN_READ_NODESCONF, &Qmgr::execREAD_NODESCONF);
+
+ addRecSignal(GSN_DIH_RESTARTREF, &Qmgr::execDIH_RESTARTREF);
+ addRecSignal(GSN_DIH_RESTARTCONF, &Qmgr::execDIH_RESTARTCONF);
initData();
}//Qmgr::Qmgr()
diff --git a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
index ea256821924..a45d35d343b 100644
--- a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
+++ b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
@@ -146,6 +146,30 @@ void Qmgr::execCONTINUEB(Signal* signal)
runArbitThread(signal);
return;
break;
+ case ZSTART_FAILURE_LIMIT:{
+ if (cpresident != ZNIL)
+ {
+ jam();
+ return;
+ }
+ Uint64 now = NdbTick_CurrentMillisecond();
+ if (now > (c_start_election_time + c_restartFailureTimeout))
+ {
+ jam();
+ BaseString tmp;
+ tmp.append("Shutting down node as total restart time exceeds "
+ " StartFailureTimeout as set in config file ");
+ if(c_restartFailureTimeout == ~0)
+ tmp.append(" 0 (inifinite)");
+ else
+ tmp.appfmt(" %d", c_restartFailureTimeout);
+
+ progError(__LINE__, NDBD_EXIT_SYSTEM_ERROR, tmp.c_str());
+ }
+ signal->theData[0] = ZSTART_FAILURE_LIMIT;
+ sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1);
+ return;
+ }
default:
jam();
// ZCOULD_NOT_OCCUR_ERROR;
@@ -273,14 +297,28 @@ void Qmgr::startphase1(Signal* signal)
nodePtr.p->phase = ZSTARTING;
nodePtr.p->blockRef = reference();
c_connectedNodes.set(nodePtr.i);
+
+ signal->theData[0] = reference();
+ sendSignal(DBDIH_REF, GSN_DIH_RESTARTREQ, signal, 1, JBB);
+ return;
+}
- signal->theData[0] = 0; // no answer
- signal->theData[1] = 0; // no id
- signal->theData[2] = NodeInfo::DB;
- sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 3, JBB);
+void
+Qmgr::execDIH_RESTARTREF(Signal*signal)
+{
+ jamEntry();
+ c_start.m_latest_gci = 0;
+ execCM_INFOCONF(signal);
+}
+
+void
+Qmgr::execDIH_RESTARTCONF(Signal*signal)
+{
+ jamEntry();
+
+ c_start.m_latest_gci = signal->theData[1];
execCM_INFOCONF(signal);
- return;
}
void Qmgr::setHbDelay(UintR aHbDelay)
@@ -418,25 +456,44 @@ Qmgr::execREAD_NODESREF(Signal* signal)
/*******************************/
void Qmgr::execCM_INFOCONF(Signal* signal)
{
+ /**
+ * Open communcation to all DB nodes
+ */
+ signal->theData[0] = 0; // no answer
+ signal->theData[1] = 0; // no id
+ signal->theData[2] = NodeInfo::DB;
+ sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 3, JBB);
+
cpresident = ZNIL;
- cpresidentCandidate = getOwnNodeId();
cpresidentAlive = ZFALSE;
- c_stopElectionTime = NdbTick_CurrentMillisecond();
- c_stopElectionTime += c_restartPartialTimeout;
+ c_start_election_time = NdbTick_CurrentMillisecond();
+
+ signal->theData[0] = ZSTART_FAILURE_LIMIT;
+ sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1);
+
cmInfoconf010Lab(signal);
return;
}//Qmgr::execCM_INFOCONF()
+Uint32 g_start_type = 0;
+NdbNodeBitmask g_nowait_nodes; // Set by clo
+
void Qmgr::cmInfoconf010Lab(Signal* signal)
{
c_start.m_startKey = 0;
c_start.m_startNode = getOwnNodeId();
c_start.m_nodes.clearWaitingFor();
c_start.m_gsn = GSN_CM_REGREQ;
+ c_start.m_starting_nodes.clear();
+ c_start.m_starting_nodes_w_log.clear();
+ c_start.m_regReqReqSent = 0;
+ c_start.m_regReqReqRecv = 0;
+ c_start.m_skip_nodes = g_nowait_nodes;
+ c_start.m_skip_nodes.bitAND(c_definedNodes);
+ c_start.m_start_type = g_start_type;
NodeRecPtr nodePtr;
- c_regReqReqSent = c_regReqReqRecv = 0;
cnoOfNodes = 0;
for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
jam();
@@ -471,14 +528,18 @@ void Qmgr::cmInfoconf010Lab(Signal* signal)
void
Qmgr::sendCmRegReq(Signal * signal, Uint32 nodeId){
- c_regReqReqSent++;
- CmRegReq * const cmRegReq = (CmRegReq *)&signal->theData[0];
- cmRegReq->blockRef = reference();
- cmRegReq->nodeId = getOwnNodeId();
- cmRegReq->version = NDB_VERSION;
+ CmRegReq * req = (CmRegReq *)&signal->theData[0];
+ req->blockRef = reference();
+ req->nodeId = getOwnNodeId();
+ req->version = NDB_VERSION;
+ req->latest_gci = c_start.m_latest_gci;
+ req->start_type = c_start.m_start_type;
+ c_start.m_skip_nodes.copyto(NdbNodeBitmask::Size, req->skip_nodes);
const Uint32 ref = calcQmgrBlockRef(nodeId);
sendSignal(ref, GSN_CM_REGREQ, signal, CmRegReq::SignalLength, JBB);
DEBUG_START(GSN_CM_REGREQ, nodeId, "");
+
+ c_start.m_regReqReqSent++;
}
/*
@@ -518,6 +579,18 @@ Qmgr::sendCmRegReq(Signal * signal, Uint32 nodeId){
/*******************************/
/* CM_REGREQ */
/*******************************/
+static
+int
+check_start_type(Uint32 starting, Uint32 own)
+{
+ if (starting == (1 << NodeState::ST_INITIAL_START) &&
+ ((own & (1 << NodeState::ST_INITIAL_START)) == 0))
+ {
+ return 1;
+ }
+ return 0;
+}
+
void Qmgr::execCM_REGREQ(Signal* signal)
{
DEBUG_START3(signal, "");
@@ -529,6 +602,17 @@ void Qmgr::execCM_REGREQ(Signal* signal)
const BlockReference Tblockref = cmRegReq->blockRef;
const Uint32 startingVersion = cmRegReq->version;
addNodePtr.i = cmRegReq->nodeId;
+ Uint32 gci = 1;
+ Uint32 start_type = ~0;
+ NdbNodeBitmask skip_nodes;
+
+ if (signal->getLength() == CmRegReq::SignalLength)
+ {
+ jam();
+ gci = cmRegReq->latest_gci;
+ start_type = cmRegReq->start_type;
+ skip_nodes.assign(NdbNodeBitmask::Size, cmRegReq->skip_nodes);
+ }
if (creadyDistCom == ZFALSE) {
jam();
@@ -542,11 +626,19 @@ void Qmgr::execCM_REGREQ(Signal* signal)
return;
}
- ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
-
- if (cpresident != getOwnNodeId()){
+ if (check_start_type(start_type, c_start.m_start_type))
+ {
+ jam();
+ sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_START_TYPE);
+ return;
+ }
+
+ if (cpresident != getOwnNodeId())
+ {
jam();
- if (cpresident == ZNIL) {
+
+ if (cpresident == ZNIL)
+ {
/***
* We don't know the president.
* If the node to be added has lower node id
@@ -554,13 +646,19 @@ void Qmgr::execCM_REGREQ(Signal* signal)
* candidate
*/
jam();
- if (addNodePtr.i < cpresidentCandidate) {
+ if (gci > c_start.m_president_candidate_gci ||
+ (gci == c_start.m_president_candidate_gci &&
+ addNodePtr.i < c_start.m_president_candidate))
+ {
jam();
- cpresidentCandidate = addNodePtr.i;
- }//if
+ c_start.m_president_candidate = addNodePtr.i;
+ c_start.m_president_candidate_gci = gci;
+ ndbout_c("assign candidate: %u %u", addNodePtr.i, gci);
+ }
sendCmRegrefLab(signal, Tblockref, CmRegRef::ZELECTION);
return;
- }
+ }
+
/**
* We are not the president.
* We know the president.
@@ -570,7 +668,8 @@ void Qmgr::execCM_REGREQ(Signal* signal)
return;
}//if
- if (c_start.m_startNode != 0){
+ if (c_start.m_startNode != 0)
+ {
jam();
/**
* President busy by adding another node
@@ -579,7 +678,8 @@ void Qmgr::execCM_REGREQ(Signal* signal)
return;
}//if
- if (ctoStatus == Q_ACTIVE) {
+ if (ctoStatus == Q_ACTIVE)
+ {
jam();
/**
* Active taking over as president
@@ -588,7 +688,8 @@ void Qmgr::execCM_REGREQ(Signal* signal)
return;
}//if
- if (getNodeInfo(addNodePtr.i).m_type != NodeInfo::DB) {
+ if (getNodeInfo(addNodePtr.i).m_type != NodeInfo::DB)
+ {
jam();
/**
* The new node is not in config file
@@ -597,13 +698,15 @@ void Qmgr::execCM_REGREQ(Signal* signal)
return;
}
+ ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
Phase phase = addNodePtr.p->phase;
- if (phase != ZINIT){
+ if (phase != ZINIT)
+ {
jam();
DEBUG("phase = " << phase);
sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_DEAD);
return;
- }//if
+ }
jam();
/**
@@ -675,7 +778,12 @@ void Qmgr::sendCmRegrefLab(Signal* signal, BlockReference TBRef,
ref->blockRef = reference();
ref->nodeId = getOwnNodeId();
ref->errorCode = Terror;
- ref->presidentCandidate = (cpresident == ZNIL ? cpresidentCandidate : cpresident);
+ ref->presidentCandidate =
+ (cpresident == ZNIL ? c_start.m_president_candidate : cpresident);
+ ref->candidate_latest_gci = c_start.m_president_candidate_gci;
+ ref->latest_gci = c_start.m_latest_gci;
+ ref->start_type = c_start.m_start_type;
+ c_start.m_skip_nodes.copyto(NdbNodeBitmask::Size, ref->skip_nodes);
sendSignal(TBRef, GSN_CM_REGREF, signal,
CmRegRef::SignalLength, JBB);
DEBUG_START(GSN_CM_REGREF, refToNode(TBRef), "");
@@ -869,28 +977,105 @@ Qmgr::sendCmNodeInfoReq(Signal* signal, Uint32 nodeId, const NodeRec * self){
/*******************************/
/* CM_REGREF */
/*******************************/
+static
+const char *
+get_start_type_string(Uint32 st)
+{
+ static char buf[256];
+
+ if (st == 0)
+ {
+ return "<ANY>";
+ }
+ else
+ {
+ buf[0] = 0;
+ for(Uint32 i = 0; i<NodeState::ST_ILLEGAL_TYPE; i++)
+ {
+ if (st & (1 << i))
+ {
+ if (buf[0])
+ strcat(buf, "/");
+ switch(i){
+ case NodeState::ST_INITIAL_START:
+ strcat(buf, "inital start");
+ break;
+ case NodeState::ST_SYSTEM_RESTART:
+ strcat(buf, "system restart");
+ break;
+ case NodeState::ST_NODE_RESTART:
+ strcat(buf, "node restart");
+ break;
+ case NodeState::ST_INITIAL_NODE_RESTART:
+ strcat(buf, "initial node restart");
+ break;
+ }
+ }
+ }
+ return buf;
+ }
+}
+
void Qmgr::execCM_REGREF(Signal* signal)
{
jamEntry();
- UintR TaddNodeno = signal->theData[1];
- UintR TrefuseReason = signal->theData[2];
- Uint32 candidate = signal->theData[3];
+ CmRegRef* ref = (CmRegRef*)signal->getDataPtr();
+ UintR TaddNodeno = ref->nodeId;
+ UintR TrefuseReason = ref->errorCode;
+ Uint32 candidate = ref->presidentCandidate;
+ Uint32 node_gci = 1;
+ Uint32 candidate_gci = 1;
+ Uint32 start_type = ~0;
+ NdbNodeBitmask skip_nodes;
DEBUG_START3(signal, TrefuseReason);
- c_regReqReqRecv++;
+ if (signal->getLength() == CmRegRef::SignalLength)
+ {
+ jam();
+ node_gci = ref->latest_gci;
+ candidate_gci = ref->candidate_latest_gci;
+ start_type = ref->start_type;
+ skip_nodes.assign(NdbNodeBitmask::Size, ref->skip_nodes);
+ }
+
+ c_start.m_regReqReqRecv++;
// Ignore block reference in data[0]
- if(candidate != cpresidentCandidate){
+ if(candidate != c_start.m_president_candidate)
+ {
jam();
- c_regReqReqRecv = ~0;
+ c_start.m_regReqReqRecv = ~0;
}
-
+
+ c_start.m_starting_nodes.set(TaddNodeno);
+ if (node_gci)
+ {
+ jam();
+ c_start.m_starting_nodes_w_log.set(TaddNodeno);
+ }
+
+ skip_nodes.bitAND(c_definedNodes);
+ c_start.m_skip_nodes.bitOR(skip_nodes);
+
+ char buf[100];
switch (TrefuseReason) {
case CmRegRef::ZINCOMPATIBLE_VERSION:
jam();
- systemErrorLab(signal, __LINE__, "incompatible version, connection refused by running ndb node");
+ systemErrorLab(signal, __LINE__,
+ "incompatible version, "
+ "connection refused by running ndb node");
+ case CmRegRef::ZINCOMPATIBLE_START_TYPE:
+ jam();
+ BaseString::snprintf(buf, sizeof(buf),
+ "incompatible start type detected: node %d"
+ " reports %s(%d) my start type: %s(%d)",
+ TaddNodeno,
+ get_start_type_string(start_type), start_type,
+ get_start_type_string(c_start.m_start_type),
+ c_start.m_start_type);
+ progError(__LINE__, NDBD_EXIT_SR_RESTARTCONFLICT, buf);
break;
case CmRegRef::ZBUSY:
case CmRegRef::ZBUSY_TO_PRES:
@@ -909,14 +1094,19 @@ void Qmgr::execCM_REGREF(Signal* signal)
break;
case CmRegRef::ZELECTION:
jam();
- if (cpresidentCandidate > TaddNodeno) {
+ if (candidate_gci > c_start.m_president_candidate_gci ||
+ (candidate_gci == c_start.m_president_candidate_gci &&
+ candidate < c_start.m_president_candidate))
+ {
jam();
//----------------------------------------
/* We may already have a candidate */
/* choose the lowest nodeno */
//----------------------------------------
signal->theData[3] = 2;
- cpresidentCandidate = TaddNodeno;
+ c_start.m_president_candidate = candidate;
+ c_start.m_president_candidate_gci = candidate_gci;
+ ndbout_c("assign candidate: %u %u", candidate, candidate_gci);
} else {
signal->theData[3] = 4;
}//if
@@ -944,32 +1134,34 @@ void Qmgr::execCM_REGREF(Signal* signal)
//-----------------------------------------
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
- if(cpresidentAlive == ZTRUE){
+ if(cpresidentAlive == ZTRUE)
+ {
jam();
- DEBUG("");
+ DEBUG("cpresidentAlive");
return;
}
- if(c_regReqReqSent != c_regReqReqRecv){
+ if(c_start.m_regReqReqSent != c_start.m_regReqReqRecv)
+ {
jam();
- DEBUG( c_regReqReqSent << " != " << c_regReqReqRecv);
+ DEBUG(c_start.m_regReqReqSent << " != " << c_start.m_regReqReqRecv);
return;
}
- if(cpresidentCandidate != getOwnNodeId()){
+ if(c_start.m_president_candidate != getOwnNodeId())
+ {
jam();
- DEBUG("");
+ DEBUG("i'm not the candidate");
return;
}
-
+
/**
- * All configured nodes has agreed
+ * All connected nodes has agreed
*/
- Uint64 now = NdbTick_CurrentMillisecond();
- if((c_regReqReqRecv == cnoOfNodes) || now > c_stopElectionTime){
+ if(check_startup(signal))
+ {
jam();
electionWon(signal);
- sendSttorryLab(signal);
/**
* Start timer handling
@@ -981,6 +1173,190 @@ void Qmgr::execCM_REGREF(Signal* signal)
return;
}//Qmgr::execCM_REGREF()
+Uint32
+Qmgr::check_startup(Signal* signal)
+{
+ Uint64 now = NdbTick_CurrentMillisecond();
+ Uint64 partial_timeout = c_start_election_time + c_restartPartialTimeout;
+ Uint64 partitioned_timeout = partial_timeout + c_restartPartionedTimeout;
+
+ /**
+ * First see if we should wait more...
+ */
+ NdbNodeBitmask tmp;
+ tmp.bitOR(c_start.m_skip_nodes);
+ tmp.bitOR(c_start.m_starting_nodes);
+
+ NdbNodeBitmask wait;
+ wait.assign(c_definedNodes);
+ wait.bitANDC(tmp);
+
+ Uint32 retVal = 0;
+ NdbNodeBitmask report_mask;
+
+ if ((c_start.m_latest_gci == 0) ||
+ (c_start.m_start_type == (1 << NodeState::ST_INITIAL_START)))
+ {
+ if (!tmp.equal(c_definedNodes))
+ {
+ jam();
+ signal->theData[1] = 1;
+ signal->theData[2] = ~0;
+ report_mask.assign(wait);
+ retVal = 0;
+ goto start_report;
+ }
+ else
+ {
+ jam();
+ signal->theData[1] = 0x8000;
+ report_mask.assign(c_definedNodes);
+ report_mask.bitANDC(c_start.m_starting_nodes);
+ retVal = 1;
+ goto start_report;
+ }
+ }
+ const bool all = c_start.m_starting_nodes.equal(c_definedNodes);
+ CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0];
+
+ {
+ /**
+ * Check for missing node group directly
+ */
+ char buf[100];
+ NdbNodeBitmask check;
+ check.assign(c_definedNodes);
+ check.bitANDC(c_start.m_starting_nodes); // Not connected nodes
+ check.bitOR(c_start.m_starting_nodes_w_log);
+
+ sd->blockRef = reference();
+ sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
+ sd->mask = check;
+ EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
+ CheckNodeGroups::SignalLength);
+
+ if (sd->output == CheckNodeGroups::Lose)
+ {
+ jam();
+ goto missing_nodegroup;
+ }
+ }
+
+ sd->blockRef = reference();
+ sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
+ sd->mask = c_start.m_starting_nodes;
+ EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
+ CheckNodeGroups::SignalLength);
+
+ const Uint32 result = sd->output;
+
+ sd->blockRef = reference();
+ sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
+ sd->mask = c_start.m_starting_nodes_w_log;
+ EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
+ CheckNodeGroups::SignalLength);
+
+ const Uint32 result_w_log = sd->output;
+
+ if (tmp.equal(c_definedNodes))
+ {
+ /**
+ * All nodes (wrt no-wait nodes) has connected...
+ * this means that we will now start or die
+ */
+ jam();
+ switch(result_w_log){
+ case CheckNodeGroups::Lose:
+ {
+ jam();
+ goto missing_nodegroup;
+ }
+ case CheckNodeGroups::Win:
+ signal->theData[1] = all ? 0x8001 : 0x8002;
+ report_mask.assign(c_definedNodes);
+ report_mask.bitANDC(c_start.m_starting_nodes);
+ retVal = 1;
+ goto start_report;
+ case CheckNodeGroups::Partitioning:
+ ndbrequire(result != CheckNodeGroups::Lose);
+ signal->theData[1] =
+ all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003);
+ report_mask.assign(c_definedNodes);
+ report_mask.bitANDC(c_start.m_starting_nodes);
+ retVal = 1;
+ goto start_report;
+ }
+ }
+
+ if (now < partial_timeout)
+ {
+ jam();
+ signal->theData[1] = c_restartPartialTimeout == ~0 ? 2 : 3;
+ signal->theData[2] = Uint32((partial_timeout - now + 500) / 1000);
+ report_mask.assign(wait);
+ retVal = 0;
+ goto start_report;
+ }
+
+ /**
+ * Start partial has passed...check for partitioning...
+ */
+ switch(result_w_log){
+ case CheckNodeGroups::Lose:
+ jam();
+ goto missing_nodegroup;
+ case CheckNodeGroups::Partitioning:
+ if (now < partitioned_timeout && result != CheckNodeGroups::Win)
+ {
+ signal->theData[1] = c_restartPartionedTimeout == ~0 ? 4 : 5;
+ signal->theData[2] = Uint32((partitioned_timeout - now + 500) / 1000);
+ report_mask.assign(c_definedNodes);
+ report_mask.bitANDC(c_start.m_starting_nodes);
+ retVal = 0;
+ goto start_report;
+ }
+ // Fall through...
+ case CheckNodeGroups::Win:
+ signal->theData[1] =
+ all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003);
+ report_mask.assign(c_definedNodes);
+ report_mask.bitANDC(c_start.m_starting_nodes);
+ retVal = 1;
+ goto start_report;
+ }
+
+ ndbrequire(false);
+
+start_report:
+ jam();
+ {
+ Uint32 sz = NdbNodeBitmask::Size;
+ signal->theData[0] = NDB_LE_StartReport;
+ signal->theData[3] = sz;
+ Uint32* ptr = signal->theData+4;
+ c_definedNodes.copyto(sz, ptr); ptr += sz;
+ c_start.m_starting_nodes.copyto(sz, ptr); ptr += sz;
+ c_start.m_skip_nodes.copyto(sz, ptr); ptr += sz;
+ report_mask.copyto(sz, ptr); ptr+= sz;
+ sendSignal(CMVMI_REF, GSN_EVENT_REP, signal,
+ 4+4*NdbNodeBitmask::Size, JBB);
+ }
+ return retVal;
+
+missing_nodegroup:
+ jam();
+ char buf[100], mask1[100], mask2[100];
+ c_start.m_starting_nodes.getText(mask1);
+ tmp.assign(c_start.m_starting_nodes);
+ tmp.bitANDC(c_start.m_starting_nodes_w_log);
+ tmp.getText(mask2);
+ BaseString::snprintf(buf, sizeof(buf),
+ "Unable to start missing node group! "
+ " starting: %s (missing fs for: %s)",
+ mask1, mask2);
+ progError(__LINE__, NDBD_EXIT_SR_RESTARTCONFLICT, buf);
+}
+
void
Qmgr::electionWon(Signal* signal){
NodeRecPtr myNodePtr;
@@ -999,7 +1375,7 @@ Qmgr::electionWon(Signal* signal){
c_clusterNodes.set(getOwnNodeId());
cpresidentAlive = ZTRUE;
- c_stopElectionTime = ~0;
+ c_start_election_time = ~0;
c_start.reset();
signal->theData[0] = NDB_LE_CM_REGCONF;
@@ -1007,6 +1383,13 @@ Qmgr::electionWon(Signal* signal){
signal->theData[2] = cpresident;
signal->theData[3] = 1;
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
+
+ c_start.m_starting_nodes.clear(getOwnNodeId());
+ if (c_start.m_starting_nodes.isclear())
+ {
+ jam();
+ sendSttorryLab(signal);
+ }
}
/*
@@ -1020,7 +1403,15 @@ Qmgr::electionWon(Signal* signal){
/*--------------------------------------------------------------*/
void Qmgr::regreqTimeLimitLab(Signal* signal)
{
- if(cpresident == ZNIL){
+ if(cpresident == ZNIL)
+ {
+ if (c_start.m_president_candidate == ZNIL)
+ {
+ jam();
+ c_start.m_president_candidate = getOwnNodeId();
+ ndbout_c("Assigning candidate to self: %d", getOwnNodeId());
+ }
+
cmInfoconf010Lab(signal);
}
}//Qmgr::regreqTimelimitLab()
@@ -1430,6 +1821,17 @@ void Qmgr::execCM_ACKADD(Signal* signal)
*/
handleArbitNdbAdd(signal, addNodePtr.i);
c_start.reset();
+
+ if (c_start.m_starting_nodes.get(addNodePtr.i))
+ {
+ jam();
+ c_start.m_starting_nodes.clear(addNodePtr.i);
+ if (c_start.m_starting_nodes.isclear())
+ {
+ jam();
+ sendSttorryLab(signal);
+ }
+ }
return;
}//switch
ndbrequire(false);
@@ -1583,7 +1985,8 @@ void Qmgr::initData(Signal* signal)
cnoPrepFailedNodes = 0;
creadyDistCom = ZFALSE;
cpresident = ZNIL;
- cpresidentCandidate = ZNIL;
+ c_start.m_president_candidate = ZNIL;
+ c_start.m_president_candidate_gci = 0;
cpdistref = 0;
cneighbourh = ZNIL;
cneighbourl = ZNIL;
@@ -1611,15 +2014,33 @@ void Qmgr::initData(Signal* signal)
Uint32 hbDBAPI = 1500;
Uint32 arbitTimeout = 1000;
c_restartPartialTimeout = 30000;
+ c_restartPartionedTimeout = 60000;
+ c_restartFailureTimeout = ~0;
ndb_mgm_get_int_parameter(p, CFG_DB_HEARTBEAT_INTERVAL, &hbDBDB);
ndb_mgm_get_int_parameter(p, CFG_DB_API_HEARTBEAT_INTERVAL, &hbDBAPI);
ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_TIMEOUT, &arbitTimeout);
ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTIAL_TIMEOUT,
&c_restartPartialTimeout);
- if(c_restartPartialTimeout == 0){
+ ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTITION_TIMEOUT,
+ &c_restartPartionedTimeout);
+ ndb_mgm_get_int_parameter(p, CFG_DB_START_FAILURE_TIMEOUT,
+ &c_restartFailureTimeout);
+
+ if(c_restartPartialTimeout == 0)
+ {
c_restartPartialTimeout = ~0;
}
+ if (c_restartPartionedTimeout ==0)
+ {
+ c_restartPartionedTimeout = ~0;
+ }
+
+ if (c_restartFailureTimeout == 0)
+ {
+ c_restartFailureTimeout = ~0;
+ }
+
setHbDelay(hbDBDB);
setHbApiDelay(hbDBAPI);
setArbitTimeout(arbitTimeout);
@@ -2051,6 +2472,16 @@ void Qmgr::execDISCONNECT_REP(Signal* signal)
NodeRecPtr nodePtr;
nodePtr.i = getOwnNodeId();
ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
+
+ char buf[100];
+ if (getNodeState().startLevel < NodeState::SL_STARTED)
+ {
+ jam();
+ BaseString::snprintf(buf, 100, "Node %u disconected", nodeId);
+ progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
+ ndbrequire(false);
+ }
+
switch(nodePtr.p->phase){
case ZRUNNING:
jam();
@@ -2069,7 +2500,6 @@ void Qmgr::execDISCONNECT_REP(Signal* signal)
ndbrequire(false);
case ZAPI_INACTIVE:
{
- char buf[100];
BaseString::snprintf(buf, 100, "Node %u disconected", nodeId);
progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
ndbrequire(false);
@@ -4178,8 +4608,10 @@ Qmgr::execDUMP_STATE_ORD(Signal* signal)
case 1:
infoEvent("creadyDistCom = %d, cpresident = %d\n",
creadyDistCom, cpresident);
- infoEvent("cpresidentAlive = %d, cpresidentCand = %d\n",
- cpresidentAlive, cpresidentCandidate);
+ infoEvent("cpresidentAlive = %d, cpresidentCand = %d (gci: %d)\n",
+ cpresidentAlive,
+ c_start.m_president_candidate,
+ c_start.m_president_candidate_gci);
infoEvent("ctoStatus = %d\n", ctoStatus);
for(Uint32 i = 1; i<MAX_NDB_NODES; i++){
if(getNodeInfo(i).getType() == NodeInfo::DB){
diff --git a/ndb/src/kernel/vm/Configuration.cpp b/ndb/src/kernel/vm/Configuration.cpp
index 831145a7a41..f8c79a53fb7 100644
--- a/ndb/src/kernel/vm/Configuration.cpp
+++ b/ndb/src/kernel/vm/Configuration.cpp
@@ -55,6 +55,12 @@ enum ndbd_options {
NDB_STD_OPTS_VARS;
// XXX should be my_bool ???
static int _daemon, _no_daemon, _foreground, _initial, _no_start;
+static int _initialstart;
+static const char* _nowait_nodes;
+
+extern Uint32 g_start_type;
+extern NdbNodeBitmask g_nowait_nodes;
+
/**
* Arguments to NDB process
*/
@@ -82,6 +88,14 @@ static struct my_option my_long_options[] =
" (implies --nodaemon)",
(gptr*) &_foreground, (gptr*) &_foreground, 0,
GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 },
+ { "nowait-nodes", NO_ARG,
+ "Nodes that will not be waited for during start",
+ (gptr*) &_nowait_nodes, (gptr*) &_nowait_nodes, 0,
+ GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 },
+ { "initial-start", NO_ARG,
+ "Perform initial start",
+ (gptr*) &_initialstart, (gptr*) &_initialstart, 0,
+ GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
};
static void short_usage_sub(void)
@@ -150,6 +164,37 @@ Configuration::init(int argc, char** argv)
globalData.ownId= 0;
+ if (_nowait_nodes)
+ {
+ BaseString str(_nowait_nodes);
+ Vector<BaseString> arr;
+ str.split(arr, ",");
+ for (Uint32 i = 0; i<arr.size(); i++)
+ {
+ char *endptr = 0;
+ long val = strtol(arr[i].c_str(), &endptr, 10);
+ if (*endptr)
+ {
+ ndbout_c("Unable to parse nowait-nodes argument: %s : %s",
+ arr[i].c_str(), _nowait_nodes);
+ exit(-1);
+ }
+ if (! (val > 0 && val < MAX_NDB_NODES))
+ {
+ ndbout_c("Invalid nodeid specified in nowait-nodes: %d : %s",
+ val, _nowait_nodes);
+ exit(-1);
+ }
+ g_nowait_nodes.set(val);
+ }
+ }
+
+ if (_initialstart)
+ {
+ _initialStart = true;
+ g_start_type |= (1 << NodeState::ST_INITIAL_START);
+ }
+
return true;
}