From 7a8c2c307fffb7a9a37868c302daca36296dd3b0 Mon Sep 17 00:00:00 2001 From: "stewart@mysql.com" <> Date: Tue, 23 May 2006 18:16:26 +1000 Subject: BUG#13985 Cluster: ndb_mgm "status" command can return incorrect data node status partial fix for this bug. more info on what the other half of the fix involves is in the bug report. --- ndb/src/mgmclient/CommandInterpreter.cpp | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) (limited to 'ndb') diff --git a/ndb/src/mgmclient/CommandInterpreter.cpp b/ndb/src/mgmclient/CommandInterpreter.cpp index 14cd3fba42b..cb85cd8f3e6 100644 --- a/ndb/src/mgmclient/CommandInterpreter.cpp +++ b/ndb/src/mgmclient/CommandInterpreter.cpp @@ -172,8 +172,15 @@ private: bool rep_connected; #endif struct NdbThread* m_event_thread; + NdbMutex *m_print_mutex; }; +struct event_thread_param { + NdbMgmHandle *m; + NdbMutex **p; +}; + +NdbMutex* print_mutex; /* * Facade object for CommandInterpreter @@ -409,6 +416,7 @@ CommandInterpreter::CommandInterpreter(const char *_host,int verbose) m_connected= false; m_event_thread= 0; try_reconnect = 0; + m_print_mutex= NdbMutex_Create(); #ifdef HAVE_GLOBAL_REPLICATION rep_host = NULL; m_repserver = NULL; @@ -422,6 +430,7 @@ CommandInterpreter::CommandInterpreter(const char *_host,int verbose) CommandInterpreter::~CommandInterpreter() { disconnect(); + NdbMutex_Destroy(m_print_mutex); ndb_mgm_destroy_handle(&m_mgmsrv); ndb_mgm_destroy_handle(&m_mgmsrv2); } @@ -461,11 +470,13 @@ CommandInterpreter::printError() static int do_event_thread; static void* -event_thread_run(void* m) +event_thread_run(void* p) { DBUG_ENTER("event_thread_run"); - NdbMgmHandle handle= *(NdbMgmHandle*)m; + struct event_thread_param param= *(struct event_thread_param*)p; + NdbMgmHandle handle= *(param.m); + NdbMutex* printmutex= *(param.p); int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_BACKUP, 1, NDB_MGM_EVENT_CATEGORY_STARTUP, @@ -483,7 +494,11 @@ event_thread_run(void* m) { const char ping_token[]= ""; if (memcmp(ping_token,tmp,sizeof(ping_token)-1)) - ndbout << tmp; + if(tmp && strlen(tmp)) + { + Guard g(printmutex); + ndbout << tmp; + } } } while(do_event_thread); NDB_CLOSE_SOCKET(fd); @@ -516,8 +531,11 @@ CommandInterpreter::connect() assert(m_event_thread == 0); assert(do_event_thread == 0); do_event_thread= 0; + struct event_thread_param p; + p.m= &m_mgmsrv2; + p.p= &m_print_mutex; m_event_thread = NdbThread_Create(event_thread_run, - (void**)&m_mgmsrv2, + (void**)&p, 32768, "CommandInterpreted_event_thread", NDB_THREAD_PRIO_LOW); @@ -607,6 +625,7 @@ CommandInterpreter::execute(const char *_line, int _try_reconnect, int result= execute_impl(_line); if (error) *error= m_error; + return result; } @@ -920,6 +939,7 @@ CommandInterpreter::executeForAll(const char * cmd, ExecuteFunction fun, ndbout_c("Trying to start all nodes of system."); ndbout_c("Use ALL STATUS to see the system start-up phases."); } else { + Guard g(m_print_mutex); struct ndb_mgm_cluster_state *cl= ndb_mgm_get_status(m_mgmsrv); if(cl == 0){ ndbout_c("Unable get status from management server"); -- cgit v1.2.1 From 3d1fcd427175a5f75dc6d63ccf14974970e65683 Mon Sep 17 00:00:00 2001 From: "stewart@mysql.com" <> Date: Mon, 3 Jul 2006 15:37:57 +1000 Subject: BUG#13985 ndb_mgm "status" command can return incorrect data node status Second half of the fix for this bug. This patch forces a heartbeat to be sent and will wait (a little while) for replies. This way we can get > all status X starting Y started X started > which is okay as the new status comes after the old status, always. There is the slimmest of opportunities to get output like above where only half the cluster appears started. This is about the best we can do with a command line interactive program. --- ndb/src/mgmsrv/MgmtSrvr.cpp | 6 ++++ ndb/src/mgmsrv/MgmtSrvr.hpp | 2 ++ ndb/src/mgmsrv/Services.cpp | 3 ++ ndb/src/ndbapi/ClusterMgr.cpp | 71 ++++++++++++++++++++++++++++++++++++++++--- ndb/src/ndbapi/ClusterMgr.hpp | 10 ++++-- 5 files changed, 86 insertions(+), 6 deletions(-) (limited to 'ndb') diff --git a/ndb/src/mgmsrv/MgmtSrvr.cpp b/ndb/src/mgmsrv/MgmtSrvr.cpp index b713a3908ab..f3e75f1afe4 100644 --- a/ndb/src/mgmsrv/MgmtSrvr.cpp +++ b/ndb/src/mgmsrv/MgmtSrvr.cpp @@ -1412,6 +1412,12 @@ MgmtSrvr::exitSingleUser(int * stopCount, bool abort) #include +void +MgmtSrvr::updateStatus(NodeBitmask nodes) +{ + theFacade->theClusterMgr->forceHB(nodes); +} + int MgmtSrvr::status(int nodeId, ndb_mgm_node_status * _status, diff --git a/ndb/src/mgmsrv/MgmtSrvr.hpp b/ndb/src/mgmsrv/MgmtSrvr.hpp index 007494a277d..1e59f32b76f 100644 --- a/ndb/src/mgmsrv/MgmtSrvr.hpp +++ b/ndb/src/mgmsrv/MgmtSrvr.hpp @@ -487,6 +487,8 @@ public: void get_connected_nodes(NodeBitmask &connected_nodes) const; SocketServer *get_socket_server() { return m_socket_server; } + void updateStatus(NodeBitmask nodes); + //************************************************************************** private: //************************************************************************** diff --git a/ndb/src/mgmsrv/Services.cpp b/ndb/src/mgmsrv/Services.cpp index d28e0eba5b3..1df7d9be7b4 100644 --- a/ndb/src/mgmsrv/Services.cpp +++ b/ndb/src/mgmsrv/Services.cpp @@ -951,6 +951,9 @@ printNodeStatus(OutputStream *output, MgmtSrvr &mgmsrv, enum ndb_mgm_node_type type) { NodeId nodeId = 0; + NodeBitmask hbnodes; + mgmsrv.get_connected_nodes(hbnodes); + mgmsrv.updateStatus(hbnodes); while(mgmsrv.getNextNodeId(&nodeId, type)) { enum ndb_mgm_node_status status; Uint32 startPhase = 0, diff --git a/ndb/src/ndbapi/ClusterMgr.cpp b/ndb/src/ndbapi/ClusterMgr.cpp index fbff57d3168..120ab76f7ca 100644 --- a/ndb/src/ndbapi/ClusterMgr.cpp +++ b/ndb/src/ndbapi/ClusterMgr.cpp @@ -39,6 +39,8 @@ int global_flag_send_heartbeat_now= 0; +//#define DEBUG_REG + // Just a C wrapper for threadMain extern "C" void* @@ -67,6 +69,8 @@ ClusterMgr::ClusterMgr(TransporterFacade & _facade): DBUG_ENTER("ClusterMgr::ClusterMgr"); ndbSetOwnVersion(); clusterMgrThreadMutex = NdbMutex_Create(); + waitForHBMutex= NdbMutex_Create(); + waitForHBCond= NdbCondition_Create(); noOfAliveNodes= 0; noOfConnectedNodes= 0; theClusterMgrThread= 0; @@ -77,7 +81,9 @@ ClusterMgr::ClusterMgr(TransporterFacade & _facade): ClusterMgr::~ClusterMgr() { DBUG_ENTER("ClusterMgr::~ClusterMgr"); - doStop(); + doStop(); + NdbCondition_Destroy(waitForHBCond); + NdbMutex_Destroy(waitForHBMutex); NdbMutex_Destroy(clusterMgrThreadMutex); DBUG_VOID_RETURN; } @@ -163,6 +169,49 @@ ClusterMgr::doStop( ){ DBUG_VOID_RETURN; } +void +ClusterMgr::forceHB(NodeBitmask waitFor) +{ + theFacade.lock_mutex(); + global_flag_send_heartbeat_now= 1; + + waitForHBFromNodes= waitFor; +#ifdef DEBUG_REG + char buf[128]; + ndbout << "Waiting for HB from " << waitForHBFromNodes.getText(buf) << endl; +#endif + NdbApiSignal signal(numberToRef(API_CLUSTERMGR, theFacade.ownId())); + + signal.theVerId_signalNumber = GSN_API_REGREQ; + signal.theReceiversBlockNumber = QMGR; + signal.theTrace = 0; + signal.theLength = ApiRegReq::SignalLength; + + ApiRegReq * req = CAST_PTR(ApiRegReq, signal.getDataPtrSend()); + req->ref = numberToRef(API_CLUSTERMGR, theFacade.ownId()); + req->version = NDB_VERSION; + + int nodeId= 0; + for(int i=0; + NodeBitmask::NotFound!=(nodeId= waitForHBFromNodes.find(i)); + i= nodeId+1) + { +#ifdef DEBUG_REG + ndbout << "FORCE HB to " << nodeId << endl; +#endif + theFacade.sendSignalUnCond(&signal, nodeId); + } + + theFacade.unlock_mutex(); + + NdbMutex_Lock(waitForHBMutex); + NdbCondition_WaitTimeout(waitForHBCond, waitForHBMutex, 1000); + NdbMutex_Unlock(waitForHBMutex); +#ifdef DEBUG_REG + ndbout << "Still waiting for HB from " << waitForHBFromNodes.getText(buf) << endl; +#endif +} + void ClusterMgr::threadMain( ){ NdbApiSignal signal(numberToRef(API_CLUSTERMGR, theFacade.ownId())); @@ -226,7 +275,7 @@ ClusterMgr::threadMain( ){ if (theNode.m_info.m_type == NodeInfo::REP) { signal.theReceiversBlockNumber = API_CLUSTERMGR; } -#if 0 +#ifdef DEBUG_REG ndbout_c("ClusterMgr: Sending API_REGREQ to node %d", (int)nodeId); #endif theFacade.sendSignalUnCond(&signal, nodeId); @@ -278,7 +327,7 @@ ClusterMgr::execAPI_REGREQ(const Uint32 * theData){ const ApiRegReq * const apiRegReq = (ApiRegReq *)&theData[0]; const NodeId nodeId = refToNode(apiRegReq->ref); -#if 0 +#ifdef DEBUG_REG ndbout_c("ClusterMgr: Recd API_REGREQ from node %d", nodeId); #endif @@ -319,7 +368,7 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){ const ApiRegConf * const apiRegConf = (ApiRegConf *)&theData[0]; const NodeId nodeId = refToNode(apiRegConf->qmgrRef); -#if 0 +#ifdef DEBUG_REG ndbout_c("ClusterMgr: Recd API_REGCONF from node %d", nodeId); #endif @@ -351,6 +400,13 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){ if (node.m_info.m_type != NodeInfo::REP) { node.hbFrequency = (apiRegConf->apiHeartbeatFrequency * 10) - 50; } + waitForHBFromNodes.clear(nodeId); + if(waitForHBFromNodes.isclear()) + { + NdbMutex_Lock(waitForHBMutex); + NdbCondition_Signal(waitForHBCond); + NdbMutex_Unlock(waitForHBMutex); + } } void @@ -379,6 +435,13 @@ ClusterMgr::execAPI_REGREF(const Uint32 * theData){ default: break; } + waitForHBFromNodes.clear(nodeId); + if(waitForHBFromNodes.isclear()) + { + NdbMutex_Lock(waitForHBMutex); + NdbCondition_Signal(waitForHBCond); + NdbMutex_Unlock(waitForHBMutex); + } } void diff --git a/ndb/src/ndbapi/ClusterMgr.hpp b/ndb/src/ndbapi/ClusterMgr.hpp index 1a1e622a889..6eda98c4773 100644 --- a/ndb/src/ndbapi/ClusterMgr.hpp +++ b/ndb/src/ndbapi/ClusterMgr.hpp @@ -49,7 +49,9 @@ public: void doStop(); void startThread(); - + + void forceHB(NodeBitmask waitFor); + private: void threadMain(); @@ -85,7 +87,11 @@ private: Uint32 noOfConnectedNodes; Node theNodes[MAX_NODES]; NdbThread* theClusterMgrThread; - + + NodeBitmask waitForHBFromNodes; // used in forcing HBs + NdbMutex* waitForHBMutex; + NdbCondition* waitForHBCond; + /** * Used for controlling start/stop of the thread */ -- cgit v1.2.1 From b918bda2d1c942ce5cf731c620e7afaffe10ea8c Mon Sep 17 00:00:00 2001 From: "stewart@willster.(none)" <> Date: Fri, 7 Jul 2006 17:39:11 +1000 Subject: BUG#13985 ndb_mgm "status" command can return incorrect data node status better support parallel show commands, hold mutex when touching waitForHBFromNodes --- ndb/src/ndbapi/ClusterMgr.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'ndb') diff --git a/ndb/src/ndbapi/ClusterMgr.cpp b/ndb/src/ndbapi/ClusterMgr.cpp index 120ab76f7ca..db169304acd 100644 --- a/ndb/src/ndbapi/ClusterMgr.cpp +++ b/ndb/src/ndbapi/ClusterMgr.cpp @@ -172,6 +172,7 @@ ClusterMgr::doStop( ){ void ClusterMgr::forceHB(NodeBitmask waitFor) { + NdbMutex_Lock(waitForHBMutex); theFacade.lock_mutex(); global_flag_send_heartbeat_now= 1; @@ -204,7 +205,6 @@ ClusterMgr::forceHB(NodeBitmask waitFor) theFacade.unlock_mutex(); - NdbMutex_Lock(waitForHBMutex); NdbCondition_WaitTimeout(waitForHBCond, waitForHBMutex, 1000); NdbMutex_Unlock(waitForHBMutex); #ifdef DEBUG_REG @@ -400,13 +400,14 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){ if (node.m_info.m_type != NodeInfo::REP) { node.hbFrequency = (apiRegConf->apiHeartbeatFrequency * 10) - 50; } + + NdbMutex_Lock(waitForHBMutex); waitForHBFromNodes.clear(nodeId); + if(waitForHBFromNodes.isclear()) - { - NdbMutex_Lock(waitForHBMutex); NdbCondition_Signal(waitForHBCond); - NdbMutex_Unlock(waitForHBMutex); - } + + NdbMutex_Unlock(waitForHBMutex); } void -- cgit v1.2.1 From 3f5f7b3434f0dd4f0ddb67ab409f987d7aa35da8 Mon Sep 17 00:00:00 2001 From: "stewart@willster.(none)" <> Date: Fri, 7 Jul 2006 18:39:38 +1000 Subject: BUG#13985 Cluster: ndb_mgm "status" command can return incorrect data node status use existing transporter mutex --- ndb/src/ndbapi/ClusterMgr.cpp | 25 +++++++++++-------------- ndb/src/ndbapi/ClusterMgr.hpp | 1 - 2 files changed, 11 insertions(+), 15 deletions(-) (limited to 'ndb') diff --git a/ndb/src/ndbapi/ClusterMgr.cpp b/ndb/src/ndbapi/ClusterMgr.cpp index db169304acd..772cbaf99b5 100644 --- a/ndb/src/ndbapi/ClusterMgr.cpp +++ b/ndb/src/ndbapi/ClusterMgr.cpp @@ -69,7 +69,6 @@ ClusterMgr::ClusterMgr(TransporterFacade & _facade): DBUG_ENTER("ClusterMgr::ClusterMgr"); ndbSetOwnVersion(); clusterMgrThreadMutex = NdbMutex_Create(); - waitForHBMutex= NdbMutex_Create(); waitForHBCond= NdbCondition_Create(); noOfAliveNodes= 0; noOfConnectedNodes= 0; @@ -83,7 +82,6 @@ ClusterMgr::~ClusterMgr() DBUG_ENTER("ClusterMgr::~ClusterMgr"); doStop(); NdbCondition_Destroy(waitForHBCond); - NdbMutex_Destroy(waitForHBMutex); NdbMutex_Destroy(clusterMgrThreadMutex); DBUG_VOID_RETURN; } @@ -172,8 +170,15 @@ ClusterMgr::doStop( ){ void ClusterMgr::forceHB(NodeBitmask waitFor) { - NdbMutex_Lock(waitForHBMutex); theFacade.lock_mutex(); + + if(!waitForHBFromNodes.isclear()) + { + NdbCondition_WaitTimeout(waitForHBCond, theFacade.theMutexPtr, 1000); + theFacade.unlock_mutex(); + return; + } + global_flag_send_heartbeat_now= 1; waitForHBFromNodes= waitFor; @@ -203,10 +208,8 @@ ClusterMgr::forceHB(NodeBitmask waitFor) theFacade.sendSignalUnCond(&signal, nodeId); } + NdbCondition_WaitTimeout(waitForHBCond, theFacade.theMutexPtr, 1000); theFacade.unlock_mutex(); - - NdbCondition_WaitTimeout(waitForHBCond, waitForHBMutex, 1000); - NdbMutex_Unlock(waitForHBMutex); #ifdef DEBUG_REG ndbout << "Still waiting for HB from " << waitForHBFromNodes.getText(buf) << endl; #endif @@ -401,13 +404,10 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){ node.hbFrequency = (apiRegConf->apiHeartbeatFrequency * 10) - 50; } - NdbMutex_Lock(waitForHBMutex); waitForHBFromNodes.clear(nodeId); if(waitForHBFromNodes.isclear()) - NdbCondition_Signal(waitForHBCond); - - NdbMutex_Unlock(waitForHBMutex); + NdbCondition_Broadcast(waitForHBCond); } void @@ -436,13 +436,10 @@ ClusterMgr::execAPI_REGREF(const Uint32 * theData){ default: break; } + waitForHBFromNodes.clear(nodeId); if(waitForHBFromNodes.isclear()) - { - NdbMutex_Lock(waitForHBMutex); NdbCondition_Signal(waitForHBCond); - NdbMutex_Unlock(waitForHBMutex); - } } void diff --git a/ndb/src/ndbapi/ClusterMgr.hpp b/ndb/src/ndbapi/ClusterMgr.hpp index 6eda98c4773..3284fd8aa8a 100644 --- a/ndb/src/ndbapi/ClusterMgr.hpp +++ b/ndb/src/ndbapi/ClusterMgr.hpp @@ -89,7 +89,6 @@ private: NdbThread* theClusterMgrThread; NodeBitmask waitForHBFromNodes; // used in forcing HBs - NdbMutex* waitForHBMutex; NdbCondition* waitForHBCond; /** -- cgit v1.2.1 From df09e3ea0d3288f14586238033b521346a063f93 Mon Sep 17 00:00:00 2001 From: "stewart@willster.(none)" <> Date: Fri, 7 Jul 2006 20:10:15 +1000 Subject: BUG#13985: Cluster: ndb_mgm "status" command can return incorrect data node status - only force HB to data nodes - flag for if we broadcast condition on receipt of HB --- ndb/src/mgmsrv/MgmtSrvr.cpp | 19 +++++++++++++++++++ ndb/src/mgmsrv/MgmtSrvr.hpp | 1 + ndb/src/mgmsrv/Services.cpp | 2 +- ndb/src/ndbapi/ClusterMgr.cpp | 19 ++++++++++++++----- ndb/src/ndbapi/ClusterMgr.hpp | 1 + 5 files changed, 36 insertions(+), 6 deletions(-) (limited to 'ndb') diff --git a/ndb/src/mgmsrv/MgmtSrvr.cpp b/ndb/src/mgmsrv/MgmtSrvr.cpp index f3e75f1afe4..793192dd487 100644 --- a/ndb/src/mgmsrv/MgmtSrvr.cpp +++ b/ndb/src/mgmsrv/MgmtSrvr.cpp @@ -1941,6 +1941,25 @@ MgmtSrvr::get_connected_nodes(NodeBitmask &connected_nodes) const } } +void +MgmtSrvr::get_connected_ndb_nodes(NodeBitmask &connected_nodes) const +{ + NodeBitmask ndb_nodes; + if (theFacade && theFacade->theClusterMgr) + { + for(Uint32 i = 0; i < MAX_NODES; i++) + { + if (getNodeType(i) == NDB_MGM_NODE_TYPE_NDB) + { + ndb_nodes.set(i); + const ClusterMgr::Node &node= theFacade->theClusterMgr->getNodeInfo(i); + connected_nodes.bitOR(node.m_state.m_connected_nodes); + } + } + } + connected_nodes.bitAND(ndb_nodes); +} + bool MgmtSrvr::alloc_node_id(NodeId * nodeId, enum ndb_mgm_node_type type, diff --git a/ndb/src/mgmsrv/MgmtSrvr.hpp b/ndb/src/mgmsrv/MgmtSrvr.hpp index 1e59f32b76f..5bacf640a18 100644 --- a/ndb/src/mgmsrv/MgmtSrvr.hpp +++ b/ndb/src/mgmsrv/MgmtSrvr.hpp @@ -485,6 +485,7 @@ public: const char *get_connect_address(Uint32 node_id); void get_connected_nodes(NodeBitmask &connected_nodes) const; + void get_connected_ndb_nodes(NodeBitmask &connected_nodes) const; SocketServer *get_socket_server() { return m_socket_server; } void updateStatus(NodeBitmask nodes); diff --git a/ndb/src/mgmsrv/Services.cpp b/ndb/src/mgmsrv/Services.cpp index 1df7d9be7b4..ae3433fe95b 100644 --- a/ndb/src/mgmsrv/Services.cpp +++ b/ndb/src/mgmsrv/Services.cpp @@ -952,7 +952,7 @@ printNodeStatus(OutputStream *output, enum ndb_mgm_node_type type) { NodeId nodeId = 0; NodeBitmask hbnodes; - mgmsrv.get_connected_nodes(hbnodes); + mgmsrv.get_connected_ndb_nodes(hbnodes); mgmsrv.updateStatus(hbnodes); while(mgmsrv.getNextNodeId(&nodeId, type)) { enum ndb_mgm_node_status status; diff --git a/ndb/src/ndbapi/ClusterMgr.cpp b/ndb/src/ndbapi/ClusterMgr.cpp index 772cbaf99b5..28f65eebde8 100644 --- a/ndb/src/ndbapi/ClusterMgr.cpp +++ b/ndb/src/ndbapi/ClusterMgr.cpp @@ -70,6 +70,7 @@ ClusterMgr::ClusterMgr(TransporterFacade & _facade): ndbSetOwnVersion(); clusterMgrThreadMutex = NdbMutex_Create(); waitForHBCond= NdbCondition_Create(); + waitingForHB= false; noOfAliveNodes= 0; noOfConnectedNodes= 0; theClusterMgrThread= 0; @@ -172,7 +173,7 @@ ClusterMgr::forceHB(NodeBitmask waitFor) { theFacade.lock_mutex(); - if(!waitForHBFromNodes.isclear()) + if(waitingForHB) { NdbCondition_WaitTimeout(waitForHBCond, theFacade.theMutexPtr, 1000); theFacade.unlock_mutex(); @@ -180,6 +181,7 @@ ClusterMgr::forceHB(NodeBitmask waitFor) } global_flag_send_heartbeat_now= 1; + waitingForHB= true; waitForHBFromNodes= waitFor; #ifdef DEBUG_REG @@ -209,10 +211,11 @@ ClusterMgr::forceHB(NodeBitmask waitFor) } NdbCondition_WaitTimeout(waitForHBCond, theFacade.theMutexPtr, 1000); - theFacade.unlock_mutex(); + waitingForHB= false; #ifdef DEBUG_REG ndbout << "Still waiting for HB from " << waitForHBFromNodes.getText(buf) << endl; #endif + theFacade.unlock_mutex(); } void @@ -404,10 +407,16 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){ node.hbFrequency = (apiRegConf->apiHeartbeatFrequency * 10) - 50; } - waitForHBFromNodes.clear(nodeId); + if(waitingForHB) + { + waitForHBFromNodes.clear(nodeId); - if(waitForHBFromNodes.isclear()) - NdbCondition_Broadcast(waitForHBCond); + if(waitForHBFromNodes.isclear()) + { + waitingForHB= false; + NdbCondition_Broadcast(waitForHBCond); + } + } } void diff --git a/ndb/src/ndbapi/ClusterMgr.hpp b/ndb/src/ndbapi/ClusterMgr.hpp index 3284fd8aa8a..b9863821b4f 100644 --- a/ndb/src/ndbapi/ClusterMgr.hpp +++ b/ndb/src/ndbapi/ClusterMgr.hpp @@ -90,6 +90,7 @@ private: NodeBitmask waitForHBFromNodes; // used in forcing HBs NdbCondition* waitForHBCond; + bool waitingForHB; /** * Used for controlling start/stop of the thread -- cgit v1.2.1 From 37d8c78036bf44b24d153d98f435fd3eb76efef1 Mon Sep 17 00:00:00 2001 From: "stewart@willster.(none)" <> Date: Wed, 9 Aug 2006 15:03:55 +0800 Subject: BUG#13985 fixups after review by jonas --- ndb/src/mgmclient/CommandInterpreter.cpp | 1 + ndb/src/mgmsrv/MgmtSrvr.cpp | 28 ++++------------------------ ndb/src/mgmsrv/MgmtSrvr.hpp | 3 +-- ndb/src/mgmsrv/Services.cpp | 4 +--- ndb/src/ndbapi/ClusterMgr.cpp | 29 +++++++++++++++++++---------- ndb/src/ndbapi/ClusterMgr.hpp | 2 +- 6 files changed, 27 insertions(+), 40 deletions(-) (limited to 'ndb') diff --git a/ndb/src/mgmclient/CommandInterpreter.cpp b/ndb/src/mgmclient/CommandInterpreter.cpp index 103c252ca04..d4f66a56da9 100644 --- a/ndb/src/mgmclient/CommandInterpreter.cpp +++ b/ndb/src/mgmclient/CommandInterpreter.cpp @@ -705,6 +705,7 @@ CommandInterpreter::execute_impl(const char *_line) DBUG_RETURN(true); if (strcasecmp(firstToken, "SHOW") == 0) { + Guard g(m_print_mutex); executeShow(allAfterFirstToken); DBUG_RETURN(true); } diff --git a/ndb/src/mgmsrv/MgmtSrvr.cpp b/ndb/src/mgmsrv/MgmtSrvr.cpp index d514f0da1a4..5fabb84adb7 100644 --- a/ndb/src/mgmsrv/MgmtSrvr.cpp +++ b/ndb/src/mgmsrv/MgmtSrvr.cpp @@ -77,7 +77,6 @@ }\ } -extern int global_flag_send_heartbeat_now; extern int g_no_nodeid_checks; extern my_bool opt_core; @@ -1456,9 +1455,9 @@ MgmtSrvr::exitSingleUser(int * stopCount, bool abort) #include void -MgmtSrvr::updateStatus(NodeBitmask nodes) +MgmtSrvr::updateStatus() { - theFacade->theClusterMgr->forceHB(nodes); + theFacade->theClusterMgr->forceHB(); } int @@ -1985,25 +1984,6 @@ MgmtSrvr::get_connected_nodes(NodeBitmask &connected_nodes) const } } -void -MgmtSrvr::get_connected_ndb_nodes(NodeBitmask &connected_nodes) const -{ - NodeBitmask ndb_nodes; - if (theFacade && theFacade->theClusterMgr) - { - for(Uint32 i = 0; i < MAX_NODES; i++) - { - if (getNodeType(i) == NDB_MGM_NODE_TYPE_NDB) - { - ndb_nodes.set(i); - const ClusterMgr::Node &node= theFacade->theClusterMgr->getNodeInfo(i); - connected_nodes.bitOR(node.m_state.m_connected_nodes); - } - } - } - connected_nodes.bitAND(ndb_nodes); -} - bool MgmtSrvr::alloc_node_id(NodeId * nodeId, enum ndb_mgm_node_type type, @@ -2178,7 +2158,7 @@ MgmtSrvr::alloc_node_id(NodeId * nodeId, if (found_matching_type && !found_free_node) { // we have a temporary error which might be due to that // we have got the latest connect status from db-nodes. Force update. - global_flag_send_heartbeat_now= 1; + updateStatus(); } BaseString type_string, type_c_string; @@ -2532,7 +2512,7 @@ MgmtSrvr::Allocated_resources::~Allocated_resources() if (!m_reserved_nodes.isclear()) { m_mgmsrv.m_reserved_nodes.bitANDC(m_reserved_nodes); // node has been reserved, force update signal to ndb nodes - global_flag_send_heartbeat_now= 1; + m_mgmsrv.updateStatus(); char tmp_str[128]; m_mgmsrv.m_reserved_nodes.getText(tmp_str); diff --git a/ndb/src/mgmsrv/MgmtSrvr.hpp b/ndb/src/mgmsrv/MgmtSrvr.hpp index ab71fe6f4dc..17debb19f50 100644 --- a/ndb/src/mgmsrv/MgmtSrvr.hpp +++ b/ndb/src/mgmsrv/MgmtSrvr.hpp @@ -488,10 +488,9 @@ public: const char *get_connect_address(Uint32 node_id); void get_connected_nodes(NodeBitmask &connected_nodes) const; - void get_connected_ndb_nodes(NodeBitmask &connected_nodes) const; SocketServer *get_socket_server() { return m_socket_server; } - void updateStatus(NodeBitmask nodes); + void updateStatus(); //************************************************************************** private: diff --git a/ndb/src/mgmsrv/Services.cpp b/ndb/src/mgmsrv/Services.cpp index 653f36ecc6d..7f5b0e29442 100644 --- a/ndb/src/mgmsrv/Services.cpp +++ b/ndb/src/mgmsrv/Services.cpp @@ -982,9 +982,7 @@ printNodeStatus(OutputStream *output, MgmtSrvr &mgmsrv, enum ndb_mgm_node_type type) { NodeId nodeId = 0; - NodeBitmask hbnodes; - mgmsrv.get_connected_ndb_nodes(hbnodes); - mgmsrv.updateStatus(hbnodes); + mgmsrv.updateStatus(); while(mgmsrv.getNextNodeId(&nodeId, type)) { enum ndb_mgm_node_status status; Uint32 startPhase = 0, diff --git a/ndb/src/ndbapi/ClusterMgr.cpp b/ndb/src/ndbapi/ClusterMgr.cpp index 28f65eebde8..4b3c409e9d4 100644 --- a/ndb/src/ndbapi/ClusterMgr.cpp +++ b/ndb/src/ndbapi/ClusterMgr.cpp @@ -37,8 +37,6 @@ #include #include -int global_flag_send_heartbeat_now= 0; - //#define DEBUG_REG // Just a C wrapper for threadMain @@ -169,7 +167,7 @@ ClusterMgr::doStop( ){ } void -ClusterMgr::forceHB(NodeBitmask waitFor) +ClusterMgr::forceHB() { theFacade.lock_mutex(); @@ -180,10 +178,25 @@ ClusterMgr::forceHB(NodeBitmask waitFor) return; } - global_flag_send_heartbeat_now= 1; waitingForHB= true; - waitForHBFromNodes= waitFor; + NodeBitmask ndb_nodes; + ndb_nodes.clear(); + waitForHBFromNodes.clear(); + for(Uint32 i = 0; i < MAX_NODES; i++) + { + if(!theNodes[i].defined) + continue; + if(theNodes[i].m_info.m_type == NodeInfo::DB) + { + ndb_nodes.set(i); + const ClusterMgr::Node &node= getNodeInfo(i); + waitForHBFromNodes.bitOR(node.m_state.m_connected_nodes); + } + ndbout << endl; + } + waitForHBFromNodes.bitAND(ndb_nodes); + #ifdef DEBUG_REG char buf[128]; ndbout << "Waiting for HB from " << waitForHBFromNodes.getText(buf) << endl; @@ -239,9 +252,6 @@ ClusterMgr::threadMain( ){ /** * Start of Secure area for use of Transporter */ - int send_heartbeat_now= global_flag_send_heartbeat_now; - global_flag_send_heartbeat_now= 0; - theFacade.lock_mutex(); for (int i = 1; i < MAX_NODES; i++){ /** @@ -264,8 +274,7 @@ ClusterMgr::threadMain( ){ } theNode.hbCounter += timeSlept; - if (theNode.hbCounter >= theNode.hbFrequency || - send_heartbeat_now) { + if (theNode.hbCounter >= theNode.hbFrequency) { /** * It is now time to send a new Heartbeat */ diff --git a/ndb/src/ndbapi/ClusterMgr.hpp b/ndb/src/ndbapi/ClusterMgr.hpp index b9863821b4f..d2bcc52f7e8 100644 --- a/ndb/src/ndbapi/ClusterMgr.hpp +++ b/ndb/src/ndbapi/ClusterMgr.hpp @@ -50,7 +50,7 @@ public: void doStop(); void startThread(); - void forceHB(NodeBitmask waitFor); + void forceHB(); private: void threadMain(); -- cgit v1.2.1 From 76d4e65adb5ab0eb46441c2ef85301ed897ff73c Mon Sep 17 00:00:00 2001 From: "stewart@willster.(none)" <> Date: Wed, 9 Aug 2006 15:39:32 +0800 Subject: remove undeeded printing of newline. --- ndb/src/ndbapi/ClusterMgr.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'ndb') diff --git a/ndb/src/ndbapi/ClusterMgr.cpp b/ndb/src/ndbapi/ClusterMgr.cpp index 4b3c409e9d4..475561af225 100644 --- a/ndb/src/ndbapi/ClusterMgr.cpp +++ b/ndb/src/ndbapi/ClusterMgr.cpp @@ -193,7 +193,6 @@ ClusterMgr::forceHB() const ClusterMgr::Node &node= getNodeInfo(i); waitForHBFromNodes.bitOR(node.m_state.m_connected_nodes); } - ndbout << endl; } waitForHBFromNodes.bitAND(ndb_nodes); -- cgit v1.2.1 From 03e297aafa208a50abea13569c5eef6f69afa557 Mon Sep 17 00:00:00 2001 From: "stewart@willster.(none)" <> Date: Tue, 15 Aug 2006 11:09:38 +0800 Subject: BUG#20823 testBackup FailMaster failing few cases not handled properly (NF occurs). --- ndb/src/kernel/blocks/backup/Backup.cpp | 64 +++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 23 deletions(-) (limited to 'ndb') diff --git a/ndb/src/kernel/blocks/backup/Backup.cpp b/ndb/src/kernel/blocks/backup/Backup.cpp index 43c1de5e2b3..10318e5f52d 100644 --- a/ndb/src/kernel/blocks/backup/Backup.cpp +++ b/ndb/src/kernel/blocks/backup/Backup.cpp @@ -274,36 +274,48 @@ Backup::execCONTINUEB(Signal* signal) BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptr_I); - TablePtr tabPtr; - ptr.p->tables.getPtr(tabPtr, tabPtr_I); - FragmentPtr fragPtr; - tabPtr.p->fragments.getPtr(fragPtr, fragPtr_I); - BackupFilePtr filePtr; - ptr.p->files.getPtr(filePtr, ptr.p->ctlFilePtr); - - const Uint32 sz = sizeof(BackupFormat::CtlFile::FragmentInfo) >> 2; - Uint32 * dst; - if (!filePtr.p->operation.dataBuffer.getWritePtr(&dst, sz)) + if (tabPtr_I == RNIL) { - sendSignalWithDelay(BACKUP_REF, GSN_CONTINUEB, signal, 100, 4); + closeFiles(signal, ptr); return; } + jam(); + TablePtr tabPtr; + ptr.p->tables.getPtr(tabPtr, tabPtr_I); + jam(); + if(tabPtr.p->fragments.getSize()) + { + FragmentPtr fragPtr; + tabPtr.p->fragments.getPtr(fragPtr, fragPtr_I); - BackupFormat::CtlFile::FragmentInfo * fragInfo = - (BackupFormat::CtlFile::FragmentInfo*)dst; - fragInfo->SectionType = htonl(BackupFormat::FRAGMENT_INFO); - fragInfo->SectionLength = htonl(sz); - fragInfo->TableId = htonl(fragPtr.p->tableId); - fragInfo->FragmentNo = htonl(fragPtr_I); - fragInfo->NoOfRecordsLow = htonl(fragPtr.p->noOfRecords & 0xFFFFFFFF); - fragInfo->NoOfRecordsHigh = htonl(fragPtr.p->noOfRecords >> 32); - fragInfo->FilePosLow = htonl(0 & 0xFFFFFFFF); - fragInfo->FilePosHigh = htonl(0 >> 32); + BackupFilePtr filePtr; + ptr.p->files.getPtr(filePtr, ptr.p->ctlFilePtr); - filePtr.p->operation.dataBuffer.updateWritePtr(sz); + const Uint32 sz = sizeof(BackupFormat::CtlFile::FragmentInfo) >> 2; + Uint32 * dst; + if (!filePtr.p->operation.dataBuffer.getWritePtr(&dst, sz)) + { + sendSignalWithDelay(BACKUP_REF, GSN_CONTINUEB, signal, 100, 4); + return; + } + + BackupFormat::CtlFile::FragmentInfo * fragInfo = + (BackupFormat::CtlFile::FragmentInfo*)dst; + fragInfo->SectionType = htonl(BackupFormat::FRAGMENT_INFO); + fragInfo->SectionLength = htonl(sz); + fragInfo->TableId = htonl(fragPtr.p->tableId); + fragInfo->FragmentNo = htonl(fragPtr_I); + fragInfo->NoOfRecordsLow = htonl(fragPtr.p->noOfRecords & 0xFFFFFFFF); + fragInfo->NoOfRecordsHigh = htonl(fragPtr.p->noOfRecords >> 32); + fragInfo->FilePosLow = htonl(0 & 0xFFFFFFFF); + fragInfo->FilePosHigh = htonl(0 >> 32); + + filePtr.p->operation.dataBuffer.updateWritePtr(sz); + + fragPtr_I++; + } - fragPtr_I++; if (fragPtr_I == tabPtr.p->fragments.getSize()) { signal->theData[0] = tabPtr.p->tableId; @@ -4243,6 +4255,12 @@ Backup::execSTOP_BACKUP_REQ(Signal* signal) TablePtr tabPtr; ptr.p->tables.first(tabPtr); + if (tabPtr.i == RNIL) + { + closeFiles(signal, ptr); + return; + } + signal->theData[0] = BackupContinueB::BACKUP_FRAGMENT_INFO; signal->theData[1] = ptr.i; signal->theData[2] = tabPtr.i; -- cgit v1.2.1 From 76940119d0998d1f996a7e8741f204fc6407535e Mon Sep 17 00:00:00 2001 From: "stewart@willster.(none)" <> Date: Wed, 16 Aug 2006 14:33:31 +0800 Subject: BUG#21670 memory leak in ndb_mgm -e show fix the following valgrind warning when running ndb_mgm -e show (leak only in client) ==20398== 14,596 (11,936 direct, 2,660 indirect) bytes in 4 blocks are definitely lost in loss record 24 of 25 ==20398== at 0x401C4A1: malloc (vg_replace_malloc.c:149) ==20398== by 0x80797A3: ConfigValuesFactory::create(unsigned, unsigned) (ConfigValues.cpp:309) ==20398== by 0x8079F03: ConfigValuesFactory::expand(unsigned, unsigned) (ConfigValues.cpp:325) ==20398== by 0x8079967: ConfigValuesFactory::put(ConfigValues::Entry const&) (ConfigValues.cpp:414) ==20398== by 0x807A7B6: ConfigValuesFactory::unpack(void const*, unsigned) (ConfigValues.cpp:701) ==20398== by 0x806CB9D: ConfigValuesFactory::unpack(UtilBuffer const&) (ConfigValues.hpp:252) ==20398== by 0x8069160: ndb_mgm_get_configuration (mgmapi.cpp:1941) ==20398== by 0x8060661: CommandInterpreter::executeShow(char*) (CommandInterpreter.cpp:1242) ==20398== by 0x8063966: CommandInterpreter::execute_impl(char const*) (CommandInterpreter.cpp:715) ==20398== by 0x8064040: CommandInterpreter::execute(char const*, int, int*) (CommandInterpreter.cpp:625) ==20398== by 0x8064189: Ndb_mgmclient::execute(char const*, int, int*) (CommandInterpreter.cpp:203) ==20398== by 0x805E56C: read_and_execute(int) (main.cpp:124) ==20398== by 0x805E754: main (main.cpp:162) ==20398== --- ndb/src/mgmclient/CommandInterpreter.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'ndb') diff --git a/ndb/src/mgmclient/CommandInterpreter.cpp b/ndb/src/mgmclient/CommandInterpreter.cpp index d4f66a56da9..ba68f6e4f0a 100644 --- a/ndb/src/mgmclient/CommandInterpreter.cpp +++ b/ndb/src/mgmclient/CommandInterpreter.cpp @@ -1245,6 +1245,7 @@ CommandInterpreter::executeShow(char* parameters) if(it == 0){ ndbout_c("Unable to create config iterator"); + ndb_mgm_destroy_configuration(conf); return; } NdbAutoPtr ptr(it); @@ -1291,6 +1292,7 @@ CommandInterpreter::executeShow(char* parameters) print_nodes(state, it, "ndb_mgmd", mgm_nodes, NDB_MGM_NODE_TYPE_MGM, 0); print_nodes(state, it, "mysqld", api_nodes, NDB_MGM_NODE_TYPE_API, 0); // ndbout << helpTextShow; + ndb_mgm_destroy_configuration(conf); return; } else if (strcasecmp(parameters, "PROPERTIES") == 0 || strcasecmp(parameters, "PROP") == 0) { -- cgit v1.2.1 From 44e3c3b79b1e46f1a522f80a9195449f10f25e79 Mon Sep 17 00:00:00 2001 From: "stewart@willster.(none)" <> Date: Wed, 16 Aug 2006 14:39:27 +0800 Subject: BUG#21671 memory leak for mgmapi event listeners fixes the following valgrind warning (when running ndb_mgm under valgrind, or, indeed any other mgmapi program listening to events): ==23291== 190 (20 direct, 170 indirect) bytes in 1 blocks are definitely lost in loss record 13 of 22 ==23291== at 0x401C895: operator new(unsigned) (vg_replace_malloc.c:163) ==23291== by 0x8075300: ParserImpl::run(Parser::Context*, Properties const**, bool volatile*) const (Parser.cpp:178) ==23291== by 0x806D09B: Parser::parse(Parser::Context&, ParserDummy&) (Parser.hpp:219) ==23291== by 0x80671C4: ndb_mgm_call(ndb_mgm_handle*, ParserRow const*, char const*, Properties const*) (mgmapi.cpp:355) ==23291== by 0x806AB79: ndb_mgm_listen_event_internal(ndb_mgm_handle*, int const*, int) (mgmapi.cpp:1419) ==23291== by 0x806AC1F: ndb_mgm_listen_event (mgmapi.cpp:1434) ==23291== by 0x805EB5D: event_thread_run(void*) (CommandInterpreter.cpp:467) ==23291== by 0x806F70A: ndb_thread_wrapper (NdbThread.c:68) ==23291== by 0x4042340: start_thread (in /lib/tls/i686/cmov/libpthread-2.3.6.so) ==23291== by 0x429D4ED: clone (in /lib/tls/i686/cmov/libc-2.3.6.so) --- ndb/src/mgmapi/mgmapi.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'ndb') diff --git a/ndb/src/mgmapi/mgmapi.cpp b/ndb/src/mgmapi/mgmapi.cpp index 4428b158b6b..9bf19dda3a4 100644 --- a/ndb/src/mgmapi/mgmapi.cpp +++ b/ndb/src/mgmapi/mgmapi.cpp @@ -1389,7 +1389,7 @@ ndb_mgm_listen_event_internal(NdbMgmHandle handle, const int filter[], MGM_END() }; CHECK_HANDLE(handle, -1); - + const char *hostname= ndb_mgm_get_connected_host(handle); int port= ndb_mgm_get_connected_port(handle); SocketClient s(hostname, port); @@ -1411,19 +1411,20 @@ ndb_mgm_listen_event_internal(NdbMgmHandle handle, const int filter[], } args.put("filter", tmp.c_str()); } - + int tmp = handle->socket; handle->socket = sockfd; - + const Properties *reply; reply = ndb_mgm_call(handle, stat_reply, "listen event", &args); - + handle->socket = tmp; - + if(reply == NULL) { close(sockfd); CHECK_REPLY(reply, -1); } + delete reply; return sockfd; } -- cgit v1.2.1