diff options
author | unknown <jonas@perch.ndb.mysql.com> | 2006-04-06 11:43:31 +0200 |
---|---|---|
committer | unknown <jonas@perch.ndb.mysql.com> | 2006-04-06 11:43:31 +0200 |
commit | be0d6c94de4c9c6542d3381185a73a96c4bedcae (patch) | |
tree | 40f7c9e1a1b9a72e75e10ddcf6fcffe901e4968f /ndb | |
parent | e40bdb5b711c1bb4d8ae003153102a6e8dc1d735 (diff) | |
download | mariadb-git-be0d6c94de4c9c6542d3381185a73a96c4bedcae.tar.gz |
ndb - wl1760/bug#18216
add two new start options that will decrease likelyhood of bug#18612
push cntr-sp2 logic down into qmgr-sp1 to decrease likelyhood of bug#18612
ndb/include/kernel/signaldata/CmRegSignalData.hpp:
Expand CmRegReq with lots of stuff
ndb/include/mgmapi/ndb_logevent.h:
Add Start report during sp1
ndb/src/common/debugger/EventLogger.cpp:
Add Start report during sp1
ndb/src/kernel/blocks/dbdih/DbdihInit.cpp:
Init cntrref
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
Fix small bugs related to partial initial start
ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp:
Remove timeouts as they are handled in QMGR nowadays
ndb/src/kernel/blocks/qmgr/Qmgr.hpp:
Push cntr sp2 logic down to QMGR to prevent the forming of multiple QMGR cluster
that will lead to start problems (crashes...)
ndb/src/kernel/blocks/qmgr/QmgrInit.cpp:
Push cntr sp2 logic down to QMGR to prevent the forming of multiple QMGR cluster
that will lead to start problems (crashes...)
ndb/src/kernel/blocks/qmgr/QmgrMain.cpp:
Push cntr sp2 logic down to QMGR to prevent the forming of multiple QMGR cluster
that will lead to start problems (crashes...)
ndb/src/kernel/vm/Configuration.cpp:
Add new flags
--initial-start
--nowait-nodes
Diffstat (limited to 'ndb')
-rw-r--r-- | ndb/include/kernel/signaldata/CmRegSignalData.hpp | 29 | ||||
-rw-r--r-- | ndb/include/mgmapi/ndb_logevent.h | 13 | ||||
-rw-r--r-- | ndb/src/common/debugger/EventLogger.cpp | 85 | ||||
-rw-r--r-- | ndb/src/kernel/blocks/dbdih/DbdihInit.cpp | 1 | ||||
-rw-r--r-- | ndb/src/kernel/blocks/dbdih/DbdihMain.cpp | 78 | ||||
-rw-r--r-- | ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp | 16 | ||||
-rw-r--r-- | ndb/src/kernel/blocks/qmgr/Qmgr.hpp | 28 | ||||
-rw-r--r-- | ndb/src/kernel/blocks/qmgr/QmgrInit.cpp | 3 | ||||
-rw-r--r-- | ndb/src/kernel/blocks/qmgr/QmgrMain.cpp | 544 | ||||
-rw-r--r-- | ndb/src/kernel/vm/Configuration.cpp | 45 |
10 files changed, 733 insertions, 109 deletions
diff --git a/ndb/include/kernel/signaldata/CmRegSignalData.hpp b/ndb/include/kernel/signaldata/CmRegSignalData.hpp index f33c991249f..ab51ed17bc3 100644 --- a/ndb/include/kernel/signaldata/CmRegSignalData.hpp +++ b/ndb/include/kernel/signaldata/CmRegSignalData.hpp @@ -30,12 +30,17 @@ class CmRegReq { friend class Qmgr; public: - STATIC_CONST( SignalLength = 3 ); + STATIC_CONST( SignalLength = 5 + NdbNodeBitmask::Size ); private: Uint32 blockRef; Uint32 nodeId; - Uint32 version; // See ndb_version.h + Uint32 version; // See ndb_version.h + + Uint32 start_type; // As specified by cmd-line or mgm, NodeState::StartType + Uint32 latest_gci; // 0 means no fs + Uint32 skip_nodes[NdbNodeBitmask::Size]; // Nodes that does not _need_ + // to be part of restart }; /** @@ -59,8 +64,7 @@ private: * The dynamic id that the node reciving this signal has */ Uint32 dynamicId; - - Uint32 allNdbNodes[NdbNodeBitmask::Size]; + Uint32 allNdbNodes[NdbNodeBitmask::Size]; }; /** @@ -73,7 +77,7 @@ class CmRegRef { friend class Qmgr; public: - STATIC_CONST( SignalLength = 4 ); + STATIC_CONST( SignalLength = 7 + NdbNodeBitmask::Size ); enum ErrorCode { ZBUSY = 0, /* Only the president can send this */ @@ -85,14 +89,27 @@ public: * as president. */ ZNOT_PRESIDENT = 5, /* We are not president */ ZNOT_DEAD = 6, /* We are not dead when we are starting */ - ZINCOMPATIBLE_VERSION = 7 + ZINCOMPATIBLE_VERSION = 7, + ZINCOMPATIBLE_START_TYPE = 8 }; private: Uint32 blockRef; Uint32 nodeId; Uint32 errorCode; + /** + * Applicable if ZELECTION + */ Uint32 presidentCandidate; + Uint32 candidate_latest_gci; // 0 means non + + /** + * Data for sending node sending node + */ + Uint32 latest_gci; + Uint32 start_type; + Uint32 skip_nodes[NdbNodeBitmask::Size]; // Nodes that does not _need_ + // to be part of restart }; class CmAdd { diff --git a/ndb/include/mgmapi/ndb_logevent.h b/ndb/include/mgmapi/ndb_logevent.h index 6025ff2725c..d57646c14db 100644 --- a/ndb/include/mgmapi/ndb_logevent.h +++ b/ndb/include/mgmapi/ndb_logevent.h @@ -166,10 +166,14 @@ extern "C" { /** NDB_MGM_EVENT_CATEGORY_BACKUP */ NDB_LE_BackupCompleted = 56, /** NDB_MGM_EVENT_CATEGORY_BACKUP */ - NDB_LE_BackupAborted = 57 + NDB_LE_BackupAborted = 57, /* 58 used in 5.1 */ /* 59 used */ + + /** NDB_MGM_EVENT_CATEGORY_STARTUP */ + NDB_LE_StartReport = 60 + /* 60 unused */ /* 61 unused */ /* 62 unused */ @@ -625,6 +629,13 @@ extern "C" { unsigned type; unsigned node_id; } SingleUser; + /** Log even data @ref NDB_LE_StartReport */ + struct { + unsigned report_type; + unsigned remaining_time; + unsigned bitmask_size; + unsigned bitmask_data[1]; + } StartReport; #ifndef DOXYGEN_FIX }; #else diff --git a/ndb/src/common/debugger/EventLogger.cpp b/ndb/src/common/debugger/EventLogger.cpp index f785cda5215..e1a477b8ea4 100644 --- a/ndb/src/common/debugger/EventLogger.cpp +++ b/ndb/src/common/debugger/EventLogger.cpp @@ -707,6 +707,90 @@ void getTextSingleUser(QQQQ) { } } +void getTextStartReport(QQQQ) { + Uint32 time = theData[2]; + Uint32 sz = theData[3]; + char mask1[100]; + char mask2[100]; + char mask3[100]; + char mask4[100]; + BitmaskImpl::getText(sz, theData + 4 + (0 * sz), mask1); + BitmaskImpl::getText(sz, theData + 4 + (1 * sz), mask2); + BitmaskImpl::getText(sz, theData + 4 + (2 * sz), mask3); + BitmaskImpl::getText(sz, theData + 4 + (3 * sz), mask4); + switch(theData[1]){ + case 1: // Wait initial + BaseString::snprintf + (m_text, m_text_len, + "Initial start, waiting for %s to connect, " + " nodes [ all: %s connected: %s no-wait: %s ]", + mask4, mask1, mask2, mask3); + break; + case 2: // Wait partial + BaseString::snprintf + (m_text, m_text_len, + "Waiting until nodes: %s connects, " + "nodes [ all: %s connected: %s no-wait: %s ]", + mask4, mask1, mask2, mask3); + break; + case 3: // Wait partial timeout + BaseString::snprintf + (m_text, m_text_len, + "Waiting %u sec for nodes %s to connect, " + "nodes [ all: %s connected: %s no-wait: %s ]", + + time, mask4, mask1, mask2, mask3); + break; + case 4: // Wait partioned + BaseString::snprintf + (m_text, m_text_len, + "Waiting for non partitioned start, " + "nodes [ all: %s connected: %s missing: %s no-wait: %s ]", + + mask1, mask2, mask4, mask3); + break; + case 5: + BaseString::snprintf + (m_text, m_text_len, + "Waiting %u sec for non partitioned start, " + "nodes [ all: %s connected: %s missing: %s no-wait: %s ]", + + time, mask1, mask2, mask4, mask3); + break; + case 0x8000: // Do initial + BaseString::snprintf + (m_text, m_text_len, + "Initial start with nodes %s [ missing: %s no-wait: %s ]", + mask2, mask4, mask3); + break; + case 0x8001: // Do start + BaseString::snprintf + (m_text, m_text_len, + "Start with all nodes %s", + mask2); + break; + case 0x8002: // Do partial + BaseString::snprintf + (m_text, m_text_len, + "Start with nodes %s [ missing: %s no-wait: %s ]", + mask2, mask4, mask3); + break; + case 0x8003: // Do partioned + BaseString::snprintf + (m_text, m_text_len, + "Start potentially partitioned with nodes %s " + " [ missing: %s no-wait: %s ]", + mask2, mask4, mask3); + break; + default: + BaseString::snprintf + (m_text, m_text_len, + "Unknown startreport: 0x%x [ %s %s %s %s ]", + theData[1], + mask1, mask2, mask3, mask4); + } +} + #if 0 BaseString::snprintf(m_text, m_text_len, @@ -755,6 +839,7 @@ const EventLoggerBase::EventRepLogLevelMatrix EventLoggerBase::matrix[] = { ROW(StartREDOLog, LogLevel::llStartUp, 10, Logger::LL_INFO ), ROW(StartLog, LogLevel::llStartUp, 10, Logger::LL_INFO ), ROW(UNDORecordsExecuted, LogLevel::llStartUp, 15, Logger::LL_INFO ), + ROW(StartReport, LogLevel::llStartUp, 4, Logger::LL_INFO ), // NODERESTART ROW(NR_CopyDict, LogLevel::llNodeRestart, 8, Logger::LL_INFO ), diff --git a/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp b/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp index d6f6b772516..cd987048577 100644 --- a/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp +++ b/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp @@ -71,6 +71,7 @@ void Dbdih::initData() cwaitLcpSr = false; c_blockCommit = false; c_blockCommitNo = 1; + cntrlblockref = RNIL; }//Dbdih::initData() void Dbdih::initRecords() diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index e0dbc9bd272..e4b95a5af7d 100644 --- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -11659,7 +11659,7 @@ void Dbdih::makeNodeGroups(Uint32 nodeArray[]) Uint32 tmngNode; Uint32 tmngNodeGroup; Uint32 tmngLimit; - Uint32 i; + Uint32 i, j; /**----------------------------------------------------------------------- * ASSIGN ALL ACTIVE NODES INTO NODE GROUPS. HOT SPARE NODES ARE ASSIGNED @@ -11705,6 +11705,38 @@ void Dbdih::makeNodeGroups(Uint32 nodeArray[]) Sysfile::setNodeGroup(mngNodeptr.i, SYSFILE->nodeGroups, mngNodeptr.p->nodeGroup); }//if }//for + + for (i = 0; i<cnoOfNodeGroups; i++) + { + jam(); + bool alive = false; + NodeGroupRecordPtr NGPtr; + NGPtr.i = i; + ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord); + for (j = 0; j<NGPtr.p->nodeCount; j++) + { + jam(); + mngNodeptr.i = NGPtr.p->nodesInGroup[j]; + ptrCheckGuard(mngNodeptr, MAX_NDB_NODES, nodeRecord); + if (checkNodeAlive(NGPtr.p->nodesInGroup[j])) + { + alive = true; + break; + } + } + + if (!alive) + { + char buf[255]; + BaseString::snprintf + (buf, sizeof(buf), + "Illegal initial start, no alive node in nodegroup %u", i); + progError(__LINE__, + NDBD_EXIT_SR_RESTARTCONFLICT, + buf); + + } + } }//Dbdih::makeNodeGroups() /** @@ -12512,7 +12544,6 @@ void Dbdih::sendStartFragreq(Signal* signal, void Dbdih::setInitialActiveStatus() { NodeRecordPtr siaNodeptr; - Uint32 tsiaNodeActiveStatus; Uint32 tsiaNoActiveNodes; tsiaNoActiveNodes = csystemnodes - cnoHotSpare; @@ -12520,39 +12551,34 @@ void Dbdih::setInitialActiveStatus() SYSFILE->nodeStatus[i] = 0; for (siaNodeptr.i = 1; siaNodeptr.i < MAX_NDB_NODES; siaNodeptr.i++) { ptrAss(siaNodeptr, nodeRecord); - if (siaNodeptr.p->nodeStatus == NodeRecord::ALIVE) { + switch(siaNodeptr.p->nodeStatus){ + case NodeRecord::ALIVE: + case NodeRecord::DEAD: if (tsiaNoActiveNodes == 0) { jam(); siaNodeptr.p->activeStatus = Sysfile::NS_HotSpare; } else { jam(); tsiaNoActiveNodes = tsiaNoActiveNodes - 1; - siaNodeptr.p->activeStatus = Sysfile::NS_Active; - }//if - } else { - jam(); - siaNodeptr.p->activeStatus = Sysfile::NS_NotDefined; - }//if - switch (siaNodeptr.p->activeStatus) { - case Sysfile::NS_Active: - jam(); - tsiaNodeActiveStatus = Sysfile::NS_Active; - break; - case Sysfile::NS_HotSpare: - jam(); - tsiaNodeActiveStatus = Sysfile::NS_HotSpare; - break; - case Sysfile::NS_NotDefined: - jam(); - tsiaNodeActiveStatus = Sysfile::NS_NotDefined; + if (siaNodeptr.p->nodeStatus == NodeRecord::ALIVE) + { + jam(); + siaNodeptr.p->activeStatus = Sysfile::NS_Active; + } + else + { + siaNodeptr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver; + } + } break; default: - ndbrequire(false); - return; + jam(); + siaNodeptr.p->activeStatus = Sysfile::NS_NotDefined; break; - }//switch - Sysfile::setNodeStatus(siaNodeptr.i, SYSFILE->nodeStatus, - tsiaNodeActiveStatus); + }//if + Sysfile::setNodeStatus(siaNodeptr.i, + SYSFILE->nodeStatus, + siaNodeptr.p->activeStatus); }//for }//Dbdih::setInitialActiveStatus() diff --git a/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp b/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp index f5dab99ee35..3fc24e395b1 100644 --- a/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp +++ b/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp @@ -820,17 +820,9 @@ Ndbcntr::trySystemRestart(Signal* signal){ return false; } - if(!allNodes && c_start.m_startPartialTimeout > now){ - jam(); - return false; - } - NodeState::StartType srType = NodeState::ST_SYSTEM_RESTART; - if(c_start.m_waiting.equal(c_start.m_withoutLog)){ - if(!allNodes){ - jam(); - return false; - } + if(c_start.m_waiting.equal(c_start.m_withoutLog)) + { jam(); srType = NodeState::ST_INITIAL_START; c_start.m_starting = c_start.m_withoutLog; // Used for starting... @@ -860,10 +852,6 @@ Ndbcntr::trySystemRestart(Signal* signal){ ndbrequire(false); // All nodes -> partitioning, which is not allowed } - if(c_start.m_startPartitionedTimeout > now){ - jam(); - return false; - } break; } diff --git a/ndb/src/kernel/blocks/qmgr/Qmgr.hpp b/ndb/src/kernel/blocks/qmgr/Qmgr.hpp index e698ddd981b..3feb0858e82 100644 --- a/ndb/src/kernel/blocks/qmgr/Qmgr.hpp +++ b/ndb/src/kernel/blocks/qmgr/Qmgr.hpp @@ -50,6 +50,7 @@ #define ZAPI_HB_HANDLING 3 #define ZTIMER_HANDLING 4 #define ZARBIT_HANDLING 5 +#define ZSTART_FAILURE_LIMIT 6 /* Error Codes ------------------------------*/ #define ZERRTOOMANY 1101 @@ -113,8 +114,19 @@ public: Uint32 m_gsn; SignalCounter m_nodes; - } c_start; + Uint32 m_latest_gci; + + Uint32 m_start_type; + NdbNodeBitmask m_skip_nodes; + NdbNodeBitmask m_starting_nodes; + NdbNodeBitmask m_starting_nodes_w_log; + Uint16 m_president_candidate; + Uint32 m_president_candidate_gci; + Uint16 m_regReqReqSent; + Uint16 m_regReqReqRecv; + } c_start; + NdbNodeBitmask c_definedNodes; // DB nodes in config NdbNodeBitmask c_clusterNodes; // DB nodes in cluster NodeBitmask c_connectedNodes; // All kinds of connected nodes @@ -125,7 +137,7 @@ public: * i.e. nodes that connect to use, when we already have elected president */ NdbNodeBitmask c_readnodes_nodes; - + Uint32 c_maxDynamicId; // Records @@ -236,6 +248,9 @@ private: void execREAD_NODESREF(Signal* signal); void execREAD_NODESCONF(Signal* signal); + void execDIH_RESTARTREF(Signal* signal); + void execDIH_RESTARTCONF(Signal* signal); + void execAPI_VERSION_REQ(Signal* signal); void execAPI_BROADCAST_REP(Signal* signal); @@ -252,6 +267,7 @@ private: // Statement blocks void check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn); + Uint32 check_startup(Signal* signal); void node_failed(Signal* signal, Uint16 aFailedNode); void checkStartInterface(Signal* signal); @@ -374,12 +390,12 @@ private: /* Status flags ----------------------------------*/ Uint32 c_restartPartialTimeout; + Uint32 c_restartPartionedTimeout; + Uint32 c_restartFailureTimeout; + Uint64 c_start_election_time; Uint16 creadyDistCom; - Uint16 c_regReqReqSent; - Uint16 c_regReqReqRecv; - Uint64 c_stopElectionTime; - Uint16 cpresidentCandidate; + Uint16 cdelayRegreq; Uint16 cpresidentAlive; Uint16 cnoFailedNodes; diff --git a/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp b/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp index d0ecb114d0f..f14cbd48695 100644 --- a/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp +++ b/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp @@ -98,6 +98,9 @@ Qmgr::Qmgr(const class Configuration & conf) addRecSignal(GSN_READ_NODESREF, &Qmgr::execREAD_NODESREF); addRecSignal(GSN_READ_NODESCONF, &Qmgr::execREAD_NODESCONF); + + addRecSignal(GSN_DIH_RESTARTREF, &Qmgr::execDIH_RESTARTREF); + addRecSignal(GSN_DIH_RESTARTCONF, &Qmgr::execDIH_RESTARTCONF); initData(); }//Qmgr::Qmgr() diff --git a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp index ea256821924..a45d35d343b 100644 --- a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp +++ b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp @@ -146,6 +146,30 @@ void Qmgr::execCONTINUEB(Signal* signal) runArbitThread(signal); return; break; + case ZSTART_FAILURE_LIMIT:{ + if (cpresident != ZNIL) + { + jam(); + return; + } + Uint64 now = NdbTick_CurrentMillisecond(); + if (now > (c_start_election_time + c_restartFailureTimeout)) + { + jam(); + BaseString tmp; + tmp.append("Shutting down node as total restart time exceeds " + " StartFailureTimeout as set in config file "); + if(c_restartFailureTimeout == ~0) + tmp.append(" 0 (inifinite)"); + else + tmp.appfmt(" %d", c_restartFailureTimeout); + + progError(__LINE__, NDBD_EXIT_SYSTEM_ERROR, tmp.c_str()); + } + signal->theData[0] = ZSTART_FAILURE_LIMIT; + sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1); + return; + } default: jam(); // ZCOULD_NOT_OCCUR_ERROR; @@ -273,14 +297,28 @@ void Qmgr::startphase1(Signal* signal) nodePtr.p->phase = ZSTARTING; nodePtr.p->blockRef = reference(); c_connectedNodes.set(nodePtr.i); + + signal->theData[0] = reference(); + sendSignal(DBDIH_REF, GSN_DIH_RESTARTREQ, signal, 1, JBB); + return; +} - signal->theData[0] = 0; // no answer - signal->theData[1] = 0; // no id - signal->theData[2] = NodeInfo::DB; - sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 3, JBB); +void +Qmgr::execDIH_RESTARTREF(Signal*signal) +{ + jamEntry(); + c_start.m_latest_gci = 0; + execCM_INFOCONF(signal); +} + +void +Qmgr::execDIH_RESTARTCONF(Signal*signal) +{ + jamEntry(); + + c_start.m_latest_gci = signal->theData[1]; execCM_INFOCONF(signal); - return; } void Qmgr::setHbDelay(UintR aHbDelay) @@ -418,25 +456,44 @@ Qmgr::execREAD_NODESREF(Signal* signal) /*******************************/ void Qmgr::execCM_INFOCONF(Signal* signal) { + /** + * Open communcation to all DB nodes + */ + signal->theData[0] = 0; // no answer + signal->theData[1] = 0; // no id + signal->theData[2] = NodeInfo::DB; + sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 3, JBB); + cpresident = ZNIL; - cpresidentCandidate = getOwnNodeId(); cpresidentAlive = ZFALSE; - c_stopElectionTime = NdbTick_CurrentMillisecond(); - c_stopElectionTime += c_restartPartialTimeout; + c_start_election_time = NdbTick_CurrentMillisecond(); + + signal->theData[0] = ZSTART_FAILURE_LIMIT; + sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1); + cmInfoconf010Lab(signal); return; }//Qmgr::execCM_INFOCONF() +Uint32 g_start_type = 0; +NdbNodeBitmask g_nowait_nodes; // Set by clo + void Qmgr::cmInfoconf010Lab(Signal* signal) { c_start.m_startKey = 0; c_start.m_startNode = getOwnNodeId(); c_start.m_nodes.clearWaitingFor(); c_start.m_gsn = GSN_CM_REGREQ; + c_start.m_starting_nodes.clear(); + c_start.m_starting_nodes_w_log.clear(); + c_start.m_regReqReqSent = 0; + c_start.m_regReqReqRecv = 0; + c_start.m_skip_nodes = g_nowait_nodes; + c_start.m_skip_nodes.bitAND(c_definedNodes); + c_start.m_start_type = g_start_type; NodeRecPtr nodePtr; - c_regReqReqSent = c_regReqReqRecv = 0; cnoOfNodes = 0; for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) { jam(); @@ -471,14 +528,18 @@ void Qmgr::cmInfoconf010Lab(Signal* signal) void Qmgr::sendCmRegReq(Signal * signal, Uint32 nodeId){ - c_regReqReqSent++; - CmRegReq * const cmRegReq = (CmRegReq *)&signal->theData[0]; - cmRegReq->blockRef = reference(); - cmRegReq->nodeId = getOwnNodeId(); - cmRegReq->version = NDB_VERSION; + CmRegReq * req = (CmRegReq *)&signal->theData[0]; + req->blockRef = reference(); + req->nodeId = getOwnNodeId(); + req->version = NDB_VERSION; + req->latest_gci = c_start.m_latest_gci; + req->start_type = c_start.m_start_type; + c_start.m_skip_nodes.copyto(NdbNodeBitmask::Size, req->skip_nodes); const Uint32 ref = calcQmgrBlockRef(nodeId); sendSignal(ref, GSN_CM_REGREQ, signal, CmRegReq::SignalLength, JBB); DEBUG_START(GSN_CM_REGREQ, nodeId, ""); + + c_start.m_regReqReqSent++; } /* @@ -518,6 +579,18 @@ Qmgr::sendCmRegReq(Signal * signal, Uint32 nodeId){ /*******************************/ /* CM_REGREQ */ /*******************************/ +static +int +check_start_type(Uint32 starting, Uint32 own) +{ + if (starting == (1 << NodeState::ST_INITIAL_START) && + ((own & (1 << NodeState::ST_INITIAL_START)) == 0)) + { + return 1; + } + return 0; +} + void Qmgr::execCM_REGREQ(Signal* signal) { DEBUG_START3(signal, ""); @@ -529,6 +602,17 @@ void Qmgr::execCM_REGREQ(Signal* signal) const BlockReference Tblockref = cmRegReq->blockRef; const Uint32 startingVersion = cmRegReq->version; addNodePtr.i = cmRegReq->nodeId; + Uint32 gci = 1; + Uint32 start_type = ~0; + NdbNodeBitmask skip_nodes; + + if (signal->getLength() == CmRegReq::SignalLength) + { + jam(); + gci = cmRegReq->latest_gci; + start_type = cmRegReq->start_type; + skip_nodes.assign(NdbNodeBitmask::Size, cmRegReq->skip_nodes); + } if (creadyDistCom == ZFALSE) { jam(); @@ -542,11 +626,19 @@ void Qmgr::execCM_REGREQ(Signal* signal) return; } - ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec); - - if (cpresident != getOwnNodeId()){ + if (check_start_type(start_type, c_start.m_start_type)) + { + jam(); + sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_START_TYPE); + return; + } + + if (cpresident != getOwnNodeId()) + { jam(); - if (cpresident == ZNIL) { + + if (cpresident == ZNIL) + { /*** * We don't know the president. * If the node to be added has lower node id @@ -554,13 +646,19 @@ void Qmgr::execCM_REGREQ(Signal* signal) * candidate */ jam(); - if (addNodePtr.i < cpresidentCandidate) { + if (gci > c_start.m_president_candidate_gci || + (gci == c_start.m_president_candidate_gci && + addNodePtr.i < c_start.m_president_candidate)) + { jam(); - cpresidentCandidate = addNodePtr.i; - }//if + c_start.m_president_candidate = addNodePtr.i; + c_start.m_president_candidate_gci = gci; + ndbout_c("assign candidate: %u %u", addNodePtr.i, gci); + } sendCmRegrefLab(signal, Tblockref, CmRegRef::ZELECTION); return; - } + } + /** * We are not the president. * We know the president. @@ -570,7 +668,8 @@ void Qmgr::execCM_REGREQ(Signal* signal) return; }//if - if (c_start.m_startNode != 0){ + if (c_start.m_startNode != 0) + { jam(); /** * President busy by adding another node @@ -579,7 +678,8 @@ void Qmgr::execCM_REGREQ(Signal* signal) return; }//if - if (ctoStatus == Q_ACTIVE) { + if (ctoStatus == Q_ACTIVE) + { jam(); /** * Active taking over as president @@ -588,7 +688,8 @@ void Qmgr::execCM_REGREQ(Signal* signal) return; }//if - if (getNodeInfo(addNodePtr.i).m_type != NodeInfo::DB) { + if (getNodeInfo(addNodePtr.i).m_type != NodeInfo::DB) + { jam(); /** * The new node is not in config file @@ -597,13 +698,15 @@ void Qmgr::execCM_REGREQ(Signal* signal) return; } + ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec); Phase phase = addNodePtr.p->phase; - if (phase != ZINIT){ + if (phase != ZINIT) + { jam(); DEBUG("phase = " << phase); sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_DEAD); return; - }//if + } jam(); /** @@ -675,7 +778,12 @@ void Qmgr::sendCmRegrefLab(Signal* signal, BlockReference TBRef, ref->blockRef = reference(); ref->nodeId = getOwnNodeId(); ref->errorCode = Terror; - ref->presidentCandidate = (cpresident == ZNIL ? cpresidentCandidate : cpresident); + ref->presidentCandidate = + (cpresident == ZNIL ? c_start.m_president_candidate : cpresident); + ref->candidate_latest_gci = c_start.m_president_candidate_gci; + ref->latest_gci = c_start.m_latest_gci; + ref->start_type = c_start.m_start_type; + c_start.m_skip_nodes.copyto(NdbNodeBitmask::Size, ref->skip_nodes); sendSignal(TBRef, GSN_CM_REGREF, signal, CmRegRef::SignalLength, JBB); DEBUG_START(GSN_CM_REGREF, refToNode(TBRef), ""); @@ -869,28 +977,105 @@ Qmgr::sendCmNodeInfoReq(Signal* signal, Uint32 nodeId, const NodeRec * self){ /*******************************/ /* CM_REGREF */ /*******************************/ +static +const char * +get_start_type_string(Uint32 st) +{ + static char buf[256]; + + if (st == 0) + { + return "<ANY>"; + } + else + { + buf[0] = 0; + for(Uint32 i = 0; i<NodeState::ST_ILLEGAL_TYPE; i++) + { + if (st & (1 << i)) + { + if (buf[0]) + strcat(buf, "/"); + switch(i){ + case NodeState::ST_INITIAL_START: + strcat(buf, "inital start"); + break; + case NodeState::ST_SYSTEM_RESTART: + strcat(buf, "system restart"); + break; + case NodeState::ST_NODE_RESTART: + strcat(buf, "node restart"); + break; + case NodeState::ST_INITIAL_NODE_RESTART: + strcat(buf, "initial node restart"); + break; + } + } + } + return buf; + } +} + void Qmgr::execCM_REGREF(Signal* signal) { jamEntry(); - UintR TaddNodeno = signal->theData[1]; - UintR TrefuseReason = signal->theData[2]; - Uint32 candidate = signal->theData[3]; + CmRegRef* ref = (CmRegRef*)signal->getDataPtr(); + UintR TaddNodeno = ref->nodeId; + UintR TrefuseReason = ref->errorCode; + Uint32 candidate = ref->presidentCandidate; + Uint32 node_gci = 1; + Uint32 candidate_gci = 1; + Uint32 start_type = ~0; + NdbNodeBitmask skip_nodes; DEBUG_START3(signal, TrefuseReason); - c_regReqReqRecv++; + if (signal->getLength() == CmRegRef::SignalLength) + { + jam(); + node_gci = ref->latest_gci; + candidate_gci = ref->candidate_latest_gci; + start_type = ref->start_type; + skip_nodes.assign(NdbNodeBitmask::Size, ref->skip_nodes); + } + + c_start.m_regReqReqRecv++; // Ignore block reference in data[0] - if(candidate != cpresidentCandidate){ + if(candidate != c_start.m_president_candidate) + { jam(); - c_regReqReqRecv = ~0; + c_start.m_regReqReqRecv = ~0; } - + + c_start.m_starting_nodes.set(TaddNodeno); + if (node_gci) + { + jam(); + c_start.m_starting_nodes_w_log.set(TaddNodeno); + } + + skip_nodes.bitAND(c_definedNodes); + c_start.m_skip_nodes.bitOR(skip_nodes); + + char buf[100]; switch (TrefuseReason) { case CmRegRef::ZINCOMPATIBLE_VERSION: jam(); - systemErrorLab(signal, __LINE__, "incompatible version, connection refused by running ndb node"); + systemErrorLab(signal, __LINE__, + "incompatible version, " + "connection refused by running ndb node"); + case CmRegRef::ZINCOMPATIBLE_START_TYPE: + jam(); + BaseString::snprintf(buf, sizeof(buf), + "incompatible start type detected: node %d" + " reports %s(%d) my start type: %s(%d)", + TaddNodeno, + get_start_type_string(start_type), start_type, + get_start_type_string(c_start.m_start_type), + c_start.m_start_type); + progError(__LINE__, NDBD_EXIT_SR_RESTARTCONFLICT, buf); break; case CmRegRef::ZBUSY: case CmRegRef::ZBUSY_TO_PRES: @@ -909,14 +1094,19 @@ void Qmgr::execCM_REGREF(Signal* signal) break; case CmRegRef::ZELECTION: jam(); - if (cpresidentCandidate > TaddNodeno) { + if (candidate_gci > c_start.m_president_candidate_gci || + (candidate_gci == c_start.m_president_candidate_gci && + candidate < c_start.m_president_candidate)) + { jam(); //---------------------------------------- /* We may already have a candidate */ /* choose the lowest nodeno */ //---------------------------------------- signal->theData[3] = 2; - cpresidentCandidate = TaddNodeno; + c_start.m_president_candidate = candidate; + c_start.m_president_candidate_gci = candidate_gci; + ndbout_c("assign candidate: %u %u", candidate, candidate_gci); } else { signal->theData[3] = 4; }//if @@ -944,32 +1134,34 @@ void Qmgr::execCM_REGREF(Signal* signal) //----------------------------------------- sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB); - if(cpresidentAlive == ZTRUE){ + if(cpresidentAlive == ZTRUE) + { jam(); - DEBUG(""); + DEBUG("cpresidentAlive"); return; } - if(c_regReqReqSent != c_regReqReqRecv){ + if(c_start.m_regReqReqSent != c_start.m_regReqReqRecv) + { jam(); - DEBUG( c_regReqReqSent << " != " << c_regReqReqRecv); + DEBUG(c_start.m_regReqReqSent << " != " << c_start.m_regReqReqRecv); return; } - if(cpresidentCandidate != getOwnNodeId()){ + if(c_start.m_president_candidate != getOwnNodeId()) + { jam(); - DEBUG(""); + DEBUG("i'm not the candidate"); return; } - + /** - * All configured nodes has agreed + * All connected nodes has agreed */ - Uint64 now = NdbTick_CurrentMillisecond(); - if((c_regReqReqRecv == cnoOfNodes) || now > c_stopElectionTime){ + if(check_startup(signal)) + { jam(); electionWon(signal); - sendSttorryLab(signal); /** * Start timer handling @@ -981,6 +1173,190 @@ void Qmgr::execCM_REGREF(Signal* signal) return; }//Qmgr::execCM_REGREF() +Uint32 +Qmgr::check_startup(Signal* signal) +{ + Uint64 now = NdbTick_CurrentMillisecond(); + Uint64 partial_timeout = c_start_election_time + c_restartPartialTimeout; + Uint64 partitioned_timeout = partial_timeout + c_restartPartionedTimeout; + + /** + * First see if we should wait more... + */ + NdbNodeBitmask tmp; + tmp.bitOR(c_start.m_skip_nodes); + tmp.bitOR(c_start.m_starting_nodes); + + NdbNodeBitmask wait; + wait.assign(c_definedNodes); + wait.bitANDC(tmp); + + Uint32 retVal = 0; + NdbNodeBitmask report_mask; + + if ((c_start.m_latest_gci == 0) || + (c_start.m_start_type == (1 << NodeState::ST_INITIAL_START))) + { + if (!tmp.equal(c_definedNodes)) + { + jam(); + signal->theData[1] = 1; + signal->theData[2] = ~0; + report_mask.assign(wait); + retVal = 0; + goto start_report; + } + else + { + jam(); + signal->theData[1] = 0x8000; + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 1; + goto start_report; + } + } + const bool all = c_start.m_starting_nodes.equal(c_definedNodes); + CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0]; + + { + /** + * Check for missing node group directly + */ + char buf[100]; + NdbNodeBitmask check; + check.assign(c_definedNodes); + check.bitANDC(c_start.m_starting_nodes); // Not connected nodes + check.bitOR(c_start.m_starting_nodes_w_log); + + sd->blockRef = reference(); + sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; + sd->mask = check; + EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, + CheckNodeGroups::SignalLength); + + if (sd->output == CheckNodeGroups::Lose) + { + jam(); + goto missing_nodegroup; + } + } + + sd->blockRef = reference(); + sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; + sd->mask = c_start.m_starting_nodes; + EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, + CheckNodeGroups::SignalLength); + + const Uint32 result = sd->output; + + sd->blockRef = reference(); + sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; + sd->mask = c_start.m_starting_nodes_w_log; + EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, + CheckNodeGroups::SignalLength); + + const Uint32 result_w_log = sd->output; + + if (tmp.equal(c_definedNodes)) + { + /** + * All nodes (wrt no-wait nodes) has connected... + * this means that we will now start or die + */ + jam(); + switch(result_w_log){ + case CheckNodeGroups::Lose: + { + jam(); + goto missing_nodegroup; + } + case CheckNodeGroups::Win: + signal->theData[1] = all ? 0x8001 : 0x8002; + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 1; + goto start_report; + case CheckNodeGroups::Partitioning: + ndbrequire(result != CheckNodeGroups::Lose); + signal->theData[1] = + all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003); + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 1; + goto start_report; + } + } + + if (now < partial_timeout) + { + jam(); + signal->theData[1] = c_restartPartialTimeout == ~0 ? 2 : 3; + signal->theData[2] = Uint32((partial_timeout - now + 500) / 1000); + report_mask.assign(wait); + retVal = 0; + goto start_report; + } + + /** + * Start partial has passed...check for partitioning... + */ + switch(result_w_log){ + case CheckNodeGroups::Lose: + jam(); + goto missing_nodegroup; + case CheckNodeGroups::Partitioning: + if (now < partitioned_timeout && result != CheckNodeGroups::Win) + { + signal->theData[1] = c_restartPartionedTimeout == ~0 ? 4 : 5; + signal->theData[2] = Uint32((partitioned_timeout - now + 500) / 1000); + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 0; + goto start_report; + } + // Fall through... + case CheckNodeGroups::Win: + signal->theData[1] = + all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003); + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 1; + goto start_report; + } + + ndbrequire(false); + +start_report: + jam(); + { + Uint32 sz = NdbNodeBitmask::Size; + signal->theData[0] = NDB_LE_StartReport; + signal->theData[3] = sz; + Uint32* ptr = signal->theData+4; + c_definedNodes.copyto(sz, ptr); ptr += sz; + c_start.m_starting_nodes.copyto(sz, ptr); ptr += sz; + c_start.m_skip_nodes.copyto(sz, ptr); ptr += sz; + report_mask.copyto(sz, ptr); ptr+= sz; + sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, + 4+4*NdbNodeBitmask::Size, JBB); + } + return retVal; + +missing_nodegroup: + jam(); + char buf[100], mask1[100], mask2[100]; + c_start.m_starting_nodes.getText(mask1); + tmp.assign(c_start.m_starting_nodes); + tmp.bitANDC(c_start.m_starting_nodes_w_log); + tmp.getText(mask2); + BaseString::snprintf(buf, sizeof(buf), + "Unable to start missing node group! " + " starting: %s (missing fs for: %s)", + mask1, mask2); + progError(__LINE__, NDBD_EXIT_SR_RESTARTCONFLICT, buf); +} + void Qmgr::electionWon(Signal* signal){ NodeRecPtr myNodePtr; @@ -999,7 +1375,7 @@ Qmgr::electionWon(Signal* signal){ c_clusterNodes.set(getOwnNodeId()); cpresidentAlive = ZTRUE; - c_stopElectionTime = ~0; + c_start_election_time = ~0; c_start.reset(); signal->theData[0] = NDB_LE_CM_REGCONF; @@ -1007,6 +1383,13 @@ Qmgr::electionWon(Signal* signal){ signal->theData[2] = cpresident; signal->theData[3] = 1; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB); + + c_start.m_starting_nodes.clear(getOwnNodeId()); + if (c_start.m_starting_nodes.isclear()) + { + jam(); + sendSttorryLab(signal); + } } /* @@ -1020,7 +1403,15 @@ Qmgr::electionWon(Signal* signal){ /*--------------------------------------------------------------*/ void Qmgr::regreqTimeLimitLab(Signal* signal) { - if(cpresident == ZNIL){ + if(cpresident == ZNIL) + { + if (c_start.m_president_candidate == ZNIL) + { + jam(); + c_start.m_president_candidate = getOwnNodeId(); + ndbout_c("Assigning candidate to self: %d", getOwnNodeId()); + } + cmInfoconf010Lab(signal); } }//Qmgr::regreqTimelimitLab() @@ -1430,6 +1821,17 @@ void Qmgr::execCM_ACKADD(Signal* signal) */ handleArbitNdbAdd(signal, addNodePtr.i); c_start.reset(); + + if (c_start.m_starting_nodes.get(addNodePtr.i)) + { + jam(); + c_start.m_starting_nodes.clear(addNodePtr.i); + if (c_start.m_starting_nodes.isclear()) + { + jam(); + sendSttorryLab(signal); + } + } return; }//switch ndbrequire(false); @@ -1583,7 +1985,8 @@ void Qmgr::initData(Signal* signal) cnoPrepFailedNodes = 0; creadyDistCom = ZFALSE; cpresident = ZNIL; - cpresidentCandidate = ZNIL; + c_start.m_president_candidate = ZNIL; + c_start.m_president_candidate_gci = 0; cpdistref = 0; cneighbourh = ZNIL; cneighbourl = ZNIL; @@ -1611,15 +2014,33 @@ void Qmgr::initData(Signal* signal) Uint32 hbDBAPI = 1500; Uint32 arbitTimeout = 1000; c_restartPartialTimeout = 30000; + c_restartPartionedTimeout = 60000; + c_restartFailureTimeout = ~0; ndb_mgm_get_int_parameter(p, CFG_DB_HEARTBEAT_INTERVAL, &hbDBDB); ndb_mgm_get_int_parameter(p, CFG_DB_API_HEARTBEAT_INTERVAL, &hbDBAPI); ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_TIMEOUT, &arbitTimeout); ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTIAL_TIMEOUT, &c_restartPartialTimeout); - if(c_restartPartialTimeout == 0){ + ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTITION_TIMEOUT, + &c_restartPartionedTimeout); + ndb_mgm_get_int_parameter(p, CFG_DB_START_FAILURE_TIMEOUT, + &c_restartFailureTimeout); + + if(c_restartPartialTimeout == 0) + { c_restartPartialTimeout = ~0; } + if (c_restartPartionedTimeout ==0) + { + c_restartPartionedTimeout = ~0; + } + + if (c_restartFailureTimeout == 0) + { + c_restartFailureTimeout = ~0; + } + setHbDelay(hbDBDB); setHbApiDelay(hbDBAPI); setArbitTimeout(arbitTimeout); @@ -2051,6 +2472,16 @@ void Qmgr::execDISCONNECT_REP(Signal* signal) NodeRecPtr nodePtr; nodePtr.i = getOwnNodeId(); ptrCheckGuard(nodePtr, MAX_NODES, nodeRec); + + char buf[100]; + if (getNodeState().startLevel < NodeState::SL_STARTED) + { + jam(); + BaseString::snprintf(buf, 100, "Node %u disconected", nodeId); + progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf); + ndbrequire(false); + } + switch(nodePtr.p->phase){ case ZRUNNING: jam(); @@ -2069,7 +2500,6 @@ void Qmgr::execDISCONNECT_REP(Signal* signal) ndbrequire(false); case ZAPI_INACTIVE: { - char buf[100]; BaseString::snprintf(buf, 100, "Node %u disconected", nodeId); progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf); ndbrequire(false); @@ -4178,8 +4608,10 @@ Qmgr::execDUMP_STATE_ORD(Signal* signal) case 1: infoEvent("creadyDistCom = %d, cpresident = %d\n", creadyDistCom, cpresident); - infoEvent("cpresidentAlive = %d, cpresidentCand = %d\n", - cpresidentAlive, cpresidentCandidate); + infoEvent("cpresidentAlive = %d, cpresidentCand = %d (gci: %d)\n", + cpresidentAlive, + c_start.m_president_candidate, + c_start.m_president_candidate_gci); infoEvent("ctoStatus = %d\n", ctoStatus); for(Uint32 i = 1; i<MAX_NDB_NODES; i++){ if(getNodeInfo(i).getType() == NodeInfo::DB){ diff --git a/ndb/src/kernel/vm/Configuration.cpp b/ndb/src/kernel/vm/Configuration.cpp index 831145a7a41..f8c79a53fb7 100644 --- a/ndb/src/kernel/vm/Configuration.cpp +++ b/ndb/src/kernel/vm/Configuration.cpp @@ -55,6 +55,12 @@ enum ndbd_options { NDB_STD_OPTS_VARS; // XXX should be my_bool ??? static int _daemon, _no_daemon, _foreground, _initial, _no_start; +static int _initialstart; +static const char* _nowait_nodes; + +extern Uint32 g_start_type; +extern NdbNodeBitmask g_nowait_nodes; + /** * Arguments to NDB process */ @@ -82,6 +88,14 @@ static struct my_option my_long_options[] = " (implies --nodaemon)", (gptr*) &_foreground, (gptr*) &_foreground, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 }, + { "nowait-nodes", NO_ARG, + "Nodes that will not be waited for during start", + (gptr*) &_nowait_nodes, (gptr*) &_nowait_nodes, 0, + GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 }, + { "initial-start", NO_ARG, + "Perform initial start", + (gptr*) &_initialstart, (gptr*) &_initialstart, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} }; static void short_usage_sub(void) @@ -150,6 +164,37 @@ Configuration::init(int argc, char** argv) globalData.ownId= 0; + if (_nowait_nodes) + { + BaseString str(_nowait_nodes); + Vector<BaseString> arr; + str.split(arr, ","); + for (Uint32 i = 0; i<arr.size(); i++) + { + char *endptr = 0; + long val = strtol(arr[i].c_str(), &endptr, 10); + if (*endptr) + { + ndbout_c("Unable to parse nowait-nodes argument: %s : %s", + arr[i].c_str(), _nowait_nodes); + exit(-1); + } + if (! (val > 0 && val < MAX_NDB_NODES)) + { + ndbout_c("Invalid nodeid specified in nowait-nodes: %d : %s", + val, _nowait_nodes); + exit(-1); + } + g_nowait_nodes.set(val); + } + } + + if (_initialstart) + { + _initialStart = true; + g_start_type |= (1 << NodeState::ST_INITIAL_START); + } + return true; } |