summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mysql-test/lib/mtr_timer.pl6
-rw-r--r--ndb/include/kernel/signaldata/DumpStateOrd.hpp4
-rw-r--r--ndb/include/kernel/signaldata/StartPerm.hpp6
-rw-r--r--ndb/include/kernel/signaldata/TcContinueB.hpp3
-rw-r--r--ndb/src/kernel/blocks/ERROR_codes.txt4
-rw-r--r--ndb/src/kernel/blocks/dbdih/Dbdih.hpp4
-rw-r--r--ndb/src/kernel/blocks/dbdih/DbdihMain.cpp241
-rw-r--r--ndb/src/kernel/blocks/dblqh/DblqhMain.cpp167
-rw-r--r--ndb/src/kernel/blocks/dbtc/Dbtc.hpp27
-rw-r--r--ndb/src/kernel/blocks/dbtc/DbtcMain.cpp415
-rw-r--r--ndb/src/kernel/blocks/qmgr/QmgrMain.cpp101
-rw-r--r--ndb/src/ndbapi/NdbTransaction.cpp4
-rw-r--r--ndb/src/ndbapi/Ndbif.cpp12
-rw-r--r--ndb/src/ndbapi/TransporterFacade.cpp13
-rw-r--r--ndb/src/ndbapi/TransporterFacade.hpp1
-rw-r--r--ndb/test/include/NdbRestarter.hpp1
-rw-r--r--ndb/test/ndbapi/testNodeRestart.cpp123
-rw-r--r--ndb/test/ndbapi/testSystemRestart.cpp53
-rw-r--r--ndb/test/ndbapi/testTimeout.cpp101
-rw-r--r--ndb/test/run-test/Makefile.am9
-rw-r--r--ndb/test/run-test/conf-daily-devel-ndbmaster.txt3
-rw-r--r--ndb/test/run-test/conf-dl145a.txt (renamed from ndb/test/run-test/conf-daily-basic-dl145a.txt)3
-rw-r--r--ndb/test/run-test/conf-ndbmaster.txt (renamed from ndb/test/run-test/conf-daily-basic-ndbmaster.txt)3
-rw-r--r--ndb/test/run-test/conf-shark.txt (renamed from ndb/test/run-test/conf-daily-basic-shark.txt)3
-rw-r--r--ndb/test/run-test/daily-basic-tests.txt16
-rwxr-xr-xndb/test/run-test/ndb-autotest.sh12
-rw-r--r--ndb/test/src/NdbRestarter.cpp33
-rw-r--r--ndb/tools/desc.cpp78
-rw-r--r--sql/ha_innodb.cc7
29 files changed, 1137 insertions, 316 deletions
diff --git a/mysql-test/lib/mtr_timer.pl b/mysql-test/lib/mtr_timer.pl
index 709cebd6407..a85ab8c6122 100644
--- a/mysql-test/lib/mtr_timer.pl
+++ b/mysql-test/lib/mtr_timer.pl
@@ -78,6 +78,12 @@ sub mtr_timer_start($$$) {
{
# Child, redirect output and exec
# FIXME do we need to redirect streams?
+
+ # Don't do the ^C cleanup in the timeout child processes!
+ # There is actually a race here, if we get ^C after fork(), but before
+ # clearing the signal handler.
+ $SIG{INT}= 'DEFAULT';
+
$0= "mtr_timer(timers,$name,$duration)";
sleep($duration);
exit(0);
diff --git a/ndb/include/kernel/signaldata/DumpStateOrd.hpp b/ndb/include/kernel/signaldata/DumpStateOrd.hpp
index 4dd22cf5092..b42b930711c 100644
--- a/ndb/include/kernel/signaldata/DumpStateOrd.hpp
+++ b/ndb/include/kernel/signaldata/DumpStateOrd.hpp
@@ -126,7 +126,11 @@ public:
DihAllAllowNodeStart = 7016,
DihMinTimeBetweenLCP = 7017,
DihMaxTimeBetweenLCP = 7018,
+ // 7019
+ // 7020
+ // 7021
EnableUndoDelayDataWrite = 7080, // DIH+ACC+TUP
+ DihSetTimeBetweenGcp = 7090,
DihStartLcpImmediately = 7099,
// 8000 Suma
// 12000 Tux
diff --git a/ndb/include/kernel/signaldata/StartPerm.hpp b/ndb/include/kernel/signaldata/StartPerm.hpp
index 38be72835a3..63e01ed3868 100644
--- a/ndb/include/kernel/signaldata/StartPerm.hpp
+++ b/ndb/include/kernel/signaldata/StartPerm.hpp
@@ -64,5 +64,11 @@ private:
Uint32 startingNodeId;
Uint32 errorCode;
+
+ enum ErrorCode
+ {
+ ZNODE_ALREADY_STARTING_ERROR = 305,
+ InitialStartRequired = 320
+ };
};
#endif
diff --git a/ndb/include/kernel/signaldata/TcContinueB.hpp b/ndb/include/kernel/signaldata/TcContinueB.hpp
index 85213791b2a..b87b982e49b 100644
--- a/ndb/include/kernel/signaldata/TcContinueB.hpp
+++ b/ndb/include/kernel/signaldata/TcContinueB.hpp
@@ -44,7 +44,8 @@ private:
CHECK_WAIT_DROP_TAB_FAILED_LQH = 16,
TRIGGER_PENDING = 17,
- DelayTCKEYCONF = 18
+ DelayTCKEYCONF = 18,
+ ZNF_CHECK_TRANSACTIONS = 19
};
};
diff --git a/ndb/src/kernel/blocks/ERROR_codes.txt b/ndb/src/kernel/blocks/ERROR_codes.txt
index eab4a8eb623..f2e77d4e7e0 100644
--- a/ndb/src/kernel/blocks/ERROR_codes.txt
+++ b/ndb/src/kernel/blocks/ERROR_codes.txt
@@ -228,6 +228,8 @@ Delay execution of COMPLETECONF signal 2 seconds to generate time-out.
8045: (ABORTCONF only as part of take-over)
Delay execution of ABORTCONF signal 2 seconds to generate time-out.
+8050: Send ZABORT_TIMEOUT_BREAK delayed
+
ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBTC
-------------------------------------------------
@@ -305,6 +307,8 @@ Test Crashes in handling node restarts
7131: Crash when receiving START_COPYREQ in master node
7132: Crash when receiving START_COPYCONF in starting node
+7170: Crash when receiving START_PERMREF (InitialStartRequired)
+
DICT:
6000 Crash during NR when receiving DICTSTARTREQ
6001 Crash during NR when receiving SCHEMA_INFO
diff --git a/ndb/src/kernel/blocks/dbdih/Dbdih.hpp b/ndb/src/kernel/blocks/dbdih/Dbdih.hpp
index 0c107e35603..78acf1ffd19 100644
--- a/ndb/src/kernel/blocks/dbdih/Dbdih.hpp
+++ b/ndb/src/kernel/blocks/dbdih/Dbdih.hpp
@@ -81,7 +81,6 @@
#define ZWRONG_FAILURE_NUMBER_ERROR 302
#define ZWRONG_START_NODE_ERROR 303
#define ZNO_REPLICA_FOUND_ERROR 304
-#define ZNODE_ALREADY_STARTING_ERROR 305
#define ZNODE_START_DISALLOWED_ERROR 309
// --------------------------------------
@@ -1038,7 +1037,8 @@ private:
void prepareReplicas(FragmentstorePtr regFragptr);
void removeNodeFromStored(Uint32 nodeId,
FragmentstorePtr regFragptr,
- ReplicaRecordPtr replicaPtr);
+ ReplicaRecordPtr replicaPtr,
+ bool temporary);
void removeOldStoredReplica(FragmentstorePtr regFragptr,
ReplicaRecordPtr replicaPtr);
void removeStoredReplica(FragmentstorePtr regFragptr,
diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
index 33736bcb4cf..6186ed3ac3c 100644
--- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
+++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
@@ -1428,6 +1428,33 @@ void Dbdih::ndbStartReqLab(Signal* signal, BlockReference ref)
return;
}
+ NodeRecordPtr nodePtr;
+ Uint32 gci = SYSFILE->lastCompletedGCI[getOwnNodeId()];
+ for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
+ {
+ jam();
+ ptrAss(nodePtr, nodeRecord);
+ if (SYSFILE->lastCompletedGCI[nodePtr.i] > gci)
+ {
+ jam();
+ /**
+ * Since we're starting(is master) and there
+ * there are other nodes with higher GCI...
+ * there gci's must be invalidated...
+ * and they _must_ do an initial start
+ * indicate this by setting lastCompletedGCI = 0
+ */
+ SYSFILE->lastCompletedGCI[nodePtr.i] = 0;
+ ndbrequire(nodePtr.p->nodeStatus != NodeRecord::ALIVE);
+ warningEvent("Making filesystem for node %d unusable",
+ nodePtr.i);
+ }
+ }
+ /**
+ * This set which GCI we will try to restart to
+ */
+ SYSFILE->newestRestorableGCI = gci;
+
ndbrequire(isMaster());
copyGciLab(signal, CopyGCIReq::RESTART); // We have already read the file!
}//Dbdih::ndbStartReqLab()
@@ -1563,7 +1590,7 @@ void Dbdih::execSTART_PERMREF(Signal* signal)
{
jamEntry();
Uint32 errorCode = signal->theData[1];
- if (errorCode == ZNODE_ALREADY_STARTING_ERROR) {
+ if (errorCode == StartPermRef::ZNODE_ALREADY_STARTING_ERROR) {
jam();
/*-----------------------------------------------------------------------*/
// The master was busy adding another node. We will wait for a second and
@@ -1573,6 +1600,20 @@ void Dbdih::execSTART_PERMREF(Signal* signal)
sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1);
return;
}//if
+
+ if (errorCode == StartPermRef::InitialStartRequired)
+ {
+ CRASH_INSERTION(7170);
+ char buf[255];
+ BaseString::snprintf(buf, sizeof(buf),
+ "Cluster requires this node to be started "
+ " with --initial as partial start has been performed"
+ " and this filesystem is unusable");
+ progError(__LINE__,
+ ERR_SR_RESTARTCONFLICT,
+ buf);
+ ndbrequire(false);
+ }
/*------------------------------------------------------------------------*/
// Some node process in another node involving our node was still active. We
// will recover from this by crashing here.
@@ -1663,7 +1704,7 @@ void Dbdih::execSTART_PERMREQ(Signal* signal)
(c_nodeStartMaster.wait != ZFALSE)) {
jam();
signal->theData[0] = nodeId;
- signal->theData[1] = ZNODE_ALREADY_STARTING_ERROR;
+ signal->theData[1] = StartPermRef::ZNODE_ALREADY_STARTING_ERROR;
sendSignal(retRef, GSN_START_PERMREF, signal, 2, JBB);
return;
}//if
@@ -1673,6 +1714,16 @@ void Dbdih::execSTART_PERMREQ(Signal* signal)
ndbrequire(false);
}//if
+ if (SYSFILE->lastCompletedGCI[nodeId] == 0 &&
+ typeStart != NodeState::ST_INITIAL_NODE_RESTART)
+ {
+ jam();
+ signal->theData[0] = nodeId;
+ signal->theData[1] = StartPermRef::InitialStartRequired;
+ sendSignal(retRef, GSN_START_PERMREF, signal, 2, JBB);
+ return;
+ }
+
/*----------------------------------------------------------------------
* WE START THE INCLUSION PROCEDURE
* ---------------------------------------------------------------------*/
@@ -3521,24 +3572,12 @@ void Dbdih::closingGcpLab(Signal* signal, FileRecordPtr filePtr)
/* ------------------------------------------------------------------------- */
void Dbdih::selectMasterCandidateAndSend(Signal* signal)
{
- Uint32 gci = 0;
- Uint32 masterCandidateId = 0;
- NodeRecordPtr nodePtr;
- for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
- jam();
- ptrAss(nodePtr, nodeRecord);
- if (SYSFILE->lastCompletedGCI[nodePtr.i] > gci) {
- jam();
- masterCandidateId = nodePtr.i;
- gci = SYSFILE->lastCompletedGCI[nodePtr.i];
- }//if
- }//for
- ndbrequire(masterCandidateId != 0);
setNodeGroups();
- signal->theData[0] = masterCandidateId;
- signal->theData[1] = gci;
+ signal->theData[0] = getOwnNodeId();
+ signal->theData[1] = SYSFILE->lastCompletedGCI[getOwnNodeId()];
sendSignal(cntrlblockref, GSN_DIH_RESTARTCONF, signal, 2, JBB);
-
+
+ NodeRecordPtr nodePtr;
Uint32 node_groups[MAX_NDB_NODES];
memset(node_groups, 0, sizeof(node_groups));
for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
@@ -3556,10 +3595,10 @@ void Dbdih::selectMasterCandidateAndSend(Signal* signal)
if(count != 0 && count != cnoReplicas){
char buf[255];
BaseString::snprintf(buf, sizeof(buf),
- "Illegal configuration change."
- " Initial start needs to be performed "
- " when changing no of replicas (%d != %d)",
- node_groups[nodePtr.i], cnoReplicas);
+ "Illegal configuration change."
+ " Initial start needs to be performed "
+ " when changing no of replicas (%d != %d)",
+ node_groups[nodePtr.i], cnoReplicas);
progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, buf);
}
}
@@ -5220,6 +5259,7 @@ void Dbdih::removeNodeFromTable(Signal* signal,
//const Uint32 lcpId = SYSFILE->latestLCP_ID;
const bool lcpOngoingFlag = (tabPtr.p->tabLcpStatus== TabRecord::TLS_ACTIVE);
+ const bool temporary = !tabPtr.p->storedTable;
FragmentstorePtr fragPtr;
for(Uint32 fragNo = 0; fragNo < tabPtr.p->totalfragments; fragNo++){
@@ -5240,7 +5280,7 @@ void Dbdih::removeNodeFromTable(Signal* signal,
jam();
found = true;
noOfRemovedReplicas++;
- removeNodeFromStored(nodeId, fragPtr, replicaPtr);
+ removeNodeFromStored(nodeId, fragPtr, replicaPtr, temporary);
if(replicaPtr.p->lcpOngoingFlag){
jam();
/**
@@ -5950,9 +5990,6 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId)
signal->theData[0] = 7012;
execDUMP_STATE_ORD(signal);
- signal->theData[0] = 7015;
- execDUMP_STATE_ORD(signal);
-
c_lcpMasterTakeOverState.set(LMTOS_IDLE, __LINE__);
checkLocalNodefailComplete(signal, failedNodePtr.i, NF_LCP_TAKE_OVER);
@@ -12164,9 +12201,18 @@ void Dbdih::removeDeadNode(NodeRecordPtr removeNodePtr)
/*---------------------------------------------------------------*/
void Dbdih::removeNodeFromStored(Uint32 nodeId,
FragmentstorePtr fragPtr,
- ReplicaRecordPtr replicatePtr)
+ ReplicaRecordPtr replicatePtr,
+ bool temporary)
{
- newCrashedReplica(nodeId, replicatePtr);
+ if (!temporary)
+ {
+ jam();
+ newCrashedReplica(nodeId, replicatePtr);
+ }
+ else
+ {
+ jam();
+ }
removeStoredReplica(fragPtr, replicatePtr);
linkOldStoredReplica(fragPtr, replicatePtr);
ndbrequire(fragPtr.p->storedReplicas != RNIL);
@@ -13100,7 +13146,8 @@ void
Dbdih::execDUMP_STATE_ORD(Signal* signal)
{
DumpStateOrd * const & dumpState = (DumpStateOrd *)&signal->theData[0];
- if (dumpState->args[0] == DumpStateOrd::DihDumpNodeRestartInfo) {
+ Uint32 arg = dumpState->args[0];
+ if (arg == DumpStateOrd::DihDumpNodeRestartInfo) {
infoEvent("c_nodeStartMaster.blockLcp = %d, c_nodeStartMaster.blockGcp = %d, c_nodeStartMaster.wait = %d",
c_nodeStartMaster.blockLcp, c_nodeStartMaster.blockGcp, c_nodeStartMaster.wait);
infoEvent("cstartGcpNow = %d, cgcpStatus = %d",
@@ -13110,7 +13157,7 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
infoEvent("cgcpOrderBlocked = %d, cgcpStartCounter = %d",
cgcpOrderBlocked, cgcpStartCounter);
}//if
- if (dumpState->args[0] == DumpStateOrd::DihDumpNodeStatusInfo) {
+ if (arg == DumpStateOrd::DihDumpNodeStatusInfo) {
NodeRecordPtr localNodePtr;
infoEvent("Printing nodeStatus of all nodes");
for (localNodePtr.i = 1; localNodePtr.i < MAX_NDB_NODES; localNodePtr.i++) {
@@ -13122,7 +13169,7 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
}//for
}//if
- if (dumpState->args[0] == DumpStateOrd::DihPrintFragmentation){
+ if (arg == DumpStateOrd::DihPrintFragmentation){
infoEvent("Printing fragmentation of all tables --");
for(Uint32 i = 0; i<ctabFileSize; i++){
TabRecordPtr tabPtr;
@@ -13297,7 +13344,7 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
}
}
- if(dumpState->args[0] == 7019 && signal->getLength() == 2)
+ if(arg == 7019 && signal->getLength() == 2)
{
char buf2[8+1];
NodeRecordPtr nodePtr;
@@ -13315,7 +13362,7 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
nodePtr.p->m_nodefailSteps.getText(buf2));
}
- if(dumpState->args[0] == 7020 && signal->getLength() > 3)
+ if(arg == 7020 && signal->getLength() > 3)
{
Uint32 gsn= signal->theData[1];
Uint32 block= signal->theData[2];
@@ -13339,7 +13386,7 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
gsn, getBlockName(block, "UNKNOWN"), length, buf);
}
- if(dumpState->args[0] == DumpStateOrd::DihDumpLCPState){
+ if(arg == DumpStateOrd::DihDumpLCPState){
infoEvent("-- Node %d LCP STATE --", getOwnNodeId());
infoEvent("lcpStatus = %d (update place = %d) ",
c_lcpState.lcpStatus, c_lcpState.lcpStatusUpdatedPlace);
@@ -13355,7 +13402,7 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
infoEvent("-- Node %d LCP STATE --", getOwnNodeId());
}
- if(dumpState->args[0] == DumpStateOrd::DihDumpLCPMasterTakeOver){
+ if(arg == DumpStateOrd::DihDumpLCPMasterTakeOver){
infoEvent("-- Node %d LCP MASTER TAKE OVER STATE --", getOwnNodeId());
infoEvent
("c_lcpMasterTakeOverState.state = %d updatePlace = %d failedNodeId = %d",
@@ -13370,52 +13417,25 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
infoEvent("-- Node %d LCP MASTER TAKE OVER STATE --", getOwnNodeId());
}
- if (signal->theData[0] == 7015){
- for(Uint32 i = 0; i<ctabFileSize; i++){
- TabRecordPtr tabPtr;
- tabPtr.i = i;
- ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
-
- if(tabPtr.p->tabStatus != TabRecord::TS_ACTIVE)
- continue;
-
- infoEvent
- ("Table %d: TabCopyStatus: %d TabUpdateStatus: %d TabLcpStatus: %d",
- tabPtr.i,
- tabPtr.p->tabCopyStatus,
- tabPtr.p->tabUpdateState,
- tabPtr.p->tabLcpStatus);
+ if (signal->theData[0] == 7015)
+ {
+ if (signal->getLength() == 1)
+ {
+ signal->theData[1] = 0;
+ }
- FragmentstorePtr fragPtr;
- for (Uint32 fid = 0; fid < tabPtr.p->totalfragments; fid++) {
- jam();
- getFragstore(tabPtr.p, fid, fragPtr);
-
- char buf[100], buf2[100];
- BaseString::snprintf(buf, sizeof(buf), " Fragment %d: noLcpReplicas==%d ",
- fid, fragPtr.p->noLcpReplicas);
-
- Uint32 num=0;
- ReplicaRecordPtr replicaPtr;
- replicaPtr.i = fragPtr.p->storedReplicas;
- do {
- ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
- BaseString::snprintf(buf2, sizeof(buf2), "%s %d(on %d)=%d(%s)",
- buf, num,
- replicaPtr.p->procNode,
- replicaPtr.p->lcpIdStarted,
- replicaPtr.p->lcpOngoingFlag ? "Ongoing" : "Idle");
- BaseString::snprintf(buf, sizeof(buf), "%s", buf2);
-
- num++;
- replicaPtr.i = replicaPtr.p->nextReplica;
- } while (replicaPtr.i != RNIL);
- infoEvent(buf);
- }
+ Uint32 tableId = signal->theData[1];
+ if (tableId < ctabFileSize)
+ {
+ signal->theData[0] = 7021;
+ execDUMP_STATE_ORD(signal);
+ signal->theData[0] = 7015;
+ signal->theData[1] = tableId + 1;
+ sendSignal(reference(), GSN_DUMP_STATE_ORD, signal, 2, JBB);
}
}
- if(dumpState->args[0] == DumpStateOrd::EnableUndoDelayDataWrite){
+ if(arg == DumpStateOrd::EnableUndoDelayDataWrite){
ndbout << "Dbdih:: delay write of datapages for table = "
<< dumpState->args[1]<< endl;
// Send this dump to ACC and TUP
@@ -13445,7 +13465,7 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
return;
}
- if(dumpState->args[0] == 7098){
+ if(arg == 7098){
if(signal->length() == 3){
jam();
infoEvent("startLcpRoundLoopLab(tabel=%d, fragment=%d)",
@@ -13458,10 +13478,73 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
}
}
- if(dumpState->args[0] == DumpStateOrd::DihStartLcpImmediately){
+ if(arg == DumpStateOrd::DihStartLcpImmediately){
c_lcpState.ctimer += (1 << c_lcpState.clcpDelay);
return;
}
+
+ if (arg == DumpStateOrd::DihSetTimeBetweenGcp)
+ {
+ if (signal->getLength() == 1)
+ {
+ const ndb_mgm_configuration_iterator * p =
+ theConfiguration.getOwnConfigIterator();
+ ndbrequire(p != 0);
+ ndb_mgm_get_int_parameter(p, CFG_DB_GCP_INTERVAL, &cgcpDelay);
+ }
+ else
+ {
+ cgcpDelay = signal->theData[1];
+ }
+ ndbout_c("Setting time between gcp : %d", cgcpDelay);
+ }
+
+ if (arg == 7021 && signal->getLength() == 2)
+ {
+ TabRecordPtr tabPtr;
+ tabPtr.i = signal->theData[1];
+ if (tabPtr.i >= ctabFileSize)
+ return;
+
+ ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
+
+ if(tabPtr.p->tabStatus != TabRecord::TS_ACTIVE)
+ return;
+
+ infoEvent
+ ("Table %d: TabCopyStatus: %d TabUpdateStatus: %d TabLcpStatus: %d",
+ tabPtr.i,
+ tabPtr.p->tabCopyStatus,
+ tabPtr.p->tabUpdateState,
+ tabPtr.p->tabLcpStatus);
+
+ FragmentstorePtr fragPtr;
+ for (Uint32 fid = 0; fid < tabPtr.p->totalfragments; fid++) {
+ jam();
+ getFragstore(tabPtr.p, fid, fragPtr);
+
+ char buf[100], buf2[100];
+ BaseString::snprintf(buf, sizeof(buf), " Fragment %d: noLcpReplicas==%d ",
+ fid, fragPtr.p->noLcpReplicas);
+
+ Uint32 num=0;
+ ReplicaRecordPtr replicaPtr;
+ replicaPtr.i = fragPtr.p->storedReplicas;
+ do {
+ ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
+ BaseString::snprintf(buf2, sizeof(buf2), "%s %d(on %d)=%d(%s)",
+ buf, num,
+ replicaPtr.p->procNode,
+ replicaPtr.p->lcpIdStarted,
+ replicaPtr.p->lcpOngoingFlag ? "Ongoing" : "Idle");
+ BaseString::snprintf(buf, sizeof(buf), "%s", buf2);
+
+ num++;
+ replicaPtr.i = replicaPtr.p->nextReplica;
+ } while (replicaPtr.i != RNIL);
+ infoEvent(buf);
+ }
+ }
}//Dbdih::execDUMP_STATE_ORD()
void
diff --git a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
index cdfc7880102..a03c4cf185a 100644
--- a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
+++ b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
@@ -18569,6 +18569,173 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal)
c_error_insert_table_id = dumpState->args[1];
SET_ERROR_INSERT_VALUE(5042);
}
+
+ TcConnectionrec *regTcConnectionrec = tcConnectionrec;
+ Uint32 ttcConnectrecFileSize = ctcConnectrecFileSize;
+ Uint32 arg = dumpState->args[0];
+ if(arg == 2306)
+ {
+ for(Uint32 i = 0; i<1024; i++)
+ {
+ TcConnectionrecPtr tcRec;
+ tcRec.i = ctransidHash[i];
+ while(tcRec.i != RNIL)
+ {
+ ptrCheckGuard(tcRec, ttcConnectrecFileSize, regTcConnectionrec);
+ ndbout << "TcConnectionrec " << tcRec.i;
+ signal->theData[0] = 2307;
+ signal->theData[1] = tcRec.i;
+ execDUMP_STATE_ORD(signal);
+ tcRec.i = tcRec.p->nextHashRec;
+ }
+ }
+ }
+
+ if(arg == 2307 || arg == 2308)
+ {
+ TcConnectionrecPtr tcRec;
+ tcRec.i = signal->theData[1];
+ ptrCheckGuard(tcRec, ttcConnectrecFileSize, regTcConnectionrec);
+
+ ndbout << " transactionState = " << tcRec.p->transactionState<<endl;
+ ndbout << " operation = " << tcRec.p->operation<<endl;
+ ndbout << " tcNodeFailrec = " << tcRec.p->tcNodeFailrec
+ << " seqNoReplica = " << tcRec.p->seqNoReplica
+ << " simpleRead = " << tcRec.p->simpleRead
+ << endl;
+ ndbout << " replicaType = " << tcRec.p->replicaType
+ << " reclenAiLqhkey = " << tcRec.p->reclenAiLqhkey
+ << " opExec = " << tcRec.p->opExec
+ << endl;
+ ndbout << " opSimple = " << tcRec.p->opSimple
+ << " nextSeqNoReplica = " << tcRec.p->nextSeqNoReplica
+ << " lockType = " << tcRec.p->lockType
+ << endl;
+ ndbout << " lastReplicaNo = " << tcRec.p->lastReplicaNo
+ << " indTakeOver = " << tcRec.p->indTakeOver
+ << " dirtyOp = " << tcRec.p->dirtyOp
+ << endl;
+ ndbout << " activeCreat = " << tcRec.p->activeCreat
+ << " tcBlockref = " << hex << tcRec.p->tcBlockref
+ << " reqBlockref = " << hex << tcRec.p->reqBlockref
+ << " primKeyLen = " << tcRec.p->primKeyLen
+ << endl;
+ ndbout << " nextReplica = " << tcRec.p->nextReplica
+ << " tcBlockref = " << hex << tcRec.p->tcBlockref
+ << " reqBlockref = " << hex << tcRec.p->reqBlockref
+ << " primKeyLen = " << tcRec.p->primKeyLen
+ << endl;
+ ndbout << " logStopPageNo = " << tcRec.p->logStopPageNo
+ << " logStartPageNo = " << tcRec.p->logStartPageNo
+ << " logStartPageIndex = " << tcRec.p->logStartPageIndex
+ << endl;
+ ndbout << " errorCode = " << tcRec.p->errorCode
+ << " clientBlockref = " << hex << tcRec.p->clientBlockref
+ << " applRef = " << hex << tcRec.p->applRef
+ << " totSendlenAi = " << tcRec.p->totSendlenAi
+ << endl;
+ ndbout << " totReclenAi = " << tcRec.p->totReclenAi
+ << " tcScanRec = " << tcRec.p->tcScanRec
+ << " tcScanInfo = " << tcRec.p->tcScanInfo
+ << " tcOprec = " << hex << tcRec.p->tcOprec
+ << endl;
+ ndbout << " tableref = " << tcRec.p->tableref
+ << " simpleTcConnect = " << tcRec.p->simpleTcConnect
+ << " storedProcId = " << tcRec.p->storedProcId
+ << " schemaVersion = " << tcRec.p->schemaVersion
+ << endl;
+ ndbout << " reqinfo = " << tcRec.p->reqinfo
+ << " reqRef = " << tcRec.p->reqRef
+ << " readlenAi = " << tcRec.p->readlenAi
+ << " prevTc = " << tcRec.p->prevTc
+ << endl;
+ ndbout << " prevLogTcrec = " << tcRec.p->prevLogTcrec
+ << " prevHashRec = " << tcRec.p->prevHashRec
+ << " nodeAfterNext0 = " << tcRec.p->nodeAfterNext[0]
+ << " nodeAfterNext1 = " << tcRec.p->nodeAfterNext[1]
+ << endl;
+ ndbout << " nextTcConnectrec = " << tcRec.p->nextTcConnectrec
+ << " nextTc = " << tcRec.p->nextTc
+ << " nextTcLogQueue = " << tcRec.p->nextTcLogQueue
+ << " nextLogTcrec = " << tcRec.p->nextLogTcrec
+ << endl;
+ ndbout << " nextHashRec = " << tcRec.p->nextHashRec
+ << " logWriteState = " << tcRec.p->logWriteState
+ << " logStartFileNo = " << tcRec.p->logStartFileNo
+ << " listState = " << tcRec.p->listState
+ << endl;
+ ndbout << " lastAttrinbuf = " << tcRec.p->lastAttrinbuf
+ << " lastTupkeybuf = " << tcRec.p->lastTupkeybuf
+ << " hashValue = " << tcRec.p->hashValue
+ << endl;
+ ndbout << " gci = " << tcRec.p->gci
+ << " fragmentptr = " << tcRec.p->fragmentptr
+ << " fragmentid = " << tcRec.p->fragmentid
+ << " firstTupkeybuf = " << tcRec.p->firstTupkeybuf
+ << endl;
+ ndbout << " firstAttrinbuf = " << tcRec.p->firstAttrinbuf
+ << " currTupAiLen = " << tcRec.p->currTupAiLen
+ << " currReclenAi = " << tcRec.p->currReclenAi
+ << endl;
+ ndbout << " tcTimer = " << tcRec.p->tcTimer
+ << " clientConnectrec = " << tcRec.p->clientConnectrec
+ << " applOprec = " << hex << tcRec.p->applOprec
+ << " abortState = " << tcRec.p->abortState
+ << endl;
+ ndbout << " transid0 = " << hex << tcRec.p->transid[0]
+ << " transid1 = " << hex << tcRec.p->transid[1]
+ << " tupkeyData0 = " << tcRec.p->tupkeyData[0]
+ << " tupkeyData1 = " << tcRec.p->tupkeyData[1]
+ << endl;
+ ndbout << " tupkeyData2 = " << tcRec.p->tupkeyData[2]
+ << " tupkeyData3 = " << tcRec.p->tupkeyData[3]
+ << endl;
+ switch (tcRec.p->transactionState) {
+
+ case TcConnectionrec::SCAN_STATE_USED:
+ if (tcRec.p->tcScanRec < cscanrecFileSize){
+ ScanRecordPtr TscanPtr;
+ c_scanRecordPool.getPtr(TscanPtr, tcRec.p->tcScanRec);
+ ndbout << " scanState = " << TscanPtr.p->scanState << endl;
+ //TscanPtr.p->scanLocalref[2];
+ ndbout << " copyPtr="<<TscanPtr.p->copyPtr
+ << " scanAccPtr="<<TscanPtr.p->scanAccPtr
+ << " scanAiLength="<<TscanPtr.p->scanAiLength
+ << endl;
+ ndbout << " m_curr_batch_size_rows="<<
+ TscanPtr.p->m_curr_batch_size_rows
+ << " m_max_batch_size_rows="<<
+ TscanPtr.p->m_max_batch_size_rows
+ << " scanErrorCounter="<<TscanPtr.p->scanErrorCounter
+ << endl;
+ ndbout << " scanSchemaVersion="<<TscanPtr.p->scanSchemaVersion
+ << " scanStoredProcId="<<TscanPtr.p->scanStoredProcId
+ << " scanTcrec="<<TscanPtr.p->scanTcrec
+ << endl;
+ ndbout << " scanType="<<TscanPtr.p->scanType
+ << " scanApiBlockref="<<TscanPtr.p->scanApiBlockref
+ << " scanNodeId="<<TscanPtr.p->scanNodeId
+ << " scanCompletedStatus="<<TscanPtr.p->scanCompletedStatus
+ << endl;
+ ndbout << " scanFlag="<<TscanPtr.p->scanFlag
+ << " scanLockHold="<<TscanPtr.p->scanLockHold
+ << " scanLockMode="<<TscanPtr.p->scanLockMode
+ << " scanNumber="<<TscanPtr.p->scanNumber
+ << endl;
+ ndbout << " scanReleaseCounter="<<TscanPtr.p->scanReleaseCounter
+ << " scanTcWaiting="<<TscanPtr.p->scanTcWaiting
+ << " scanKeyinfoFlag="<<TscanPtr.p->scanKeyinfoFlag
+ << endl;
+ } else{
+ ndbout << "No connected scan record found" << endl;
+ }
+ break;
+ default:
+ break;
+ }
+ ndbrequire(arg != 2308);
+ }
+
}//Dblqh::execDUMP_STATE_ORD()
void Dblqh::execSET_VAR_REQ(Signal* signal)
diff --git a/ndb/src/kernel/blocks/dbtc/Dbtc.hpp b/ndb/src/kernel/blocks/dbtc/Dbtc.hpp
index cb4f1c6244b..a0beec732a7 100644
--- a/ndb/src/kernel/blocks/dbtc/Dbtc.hpp
+++ b/ndb/src/kernel/blocks/dbtc/Dbtc.hpp
@@ -213,14 +213,6 @@ public:
LTS_ACTIVE = 1
};
- enum TakeOverState {
- TOS_NOT_DEFINED = 0,
- TOS_IDLE = 1,
- TOS_ACTIVE = 2,
- TOS_COMPLETED = 3,
- TOS_NODE_FAILED = 4
- };
-
enum FailState {
FS_IDLE = 0,
FS_LISTENING = 1,
@@ -638,6 +630,7 @@ public:
ConnectionState apiConnectstate;
UintR transid[2];
UintR firstTcConnect;
+ NdbNodeBitmask m_transaction_nodes;
//---------------------------------------------------
// Second 16 byte cache line. Hot variables.
@@ -934,7 +927,6 @@ public:
struct HostRecord {
HostState hostStatus;
LqhTransState lqhTransStatus;
- TakeOverState takeOverStatus;
bool inPackedList;
UintR noOfPackedWordsLqh;
UintR packedWordsLqh[26];
@@ -943,6 +935,17 @@ public:
UintR noOfWordsTCINDXCONF;
UintR packedWordsTCINDXCONF[30];
BlockReference hostLqhBlockRef;
+
+ enum NodeFailBits
+ {
+ NF_TAKEOVER = 0x1,
+ NF_CHECK_SCAN = 0x2,
+ NF_CHECK_TRANSACTION = 0x4,
+ NF_CHECK_DROP_TAB = 0x8,
+ NF_NODE_FAIL_BITS = 0xF // All bits...
+ };
+ Uint32 m_nf_bits;
+ NdbNodeBitmask m_lqh_trans_conf;
}; /* p2c: size = 128 bytes */
typedef Ptr<HostRecord> HostRecordPtr;
@@ -1589,7 +1592,7 @@ private:
void wrongSchemaVersionErrorLab(Signal* signal);
void noFreeConnectionErrorLab(Signal* signal);
void tckeyreq050Lab(Signal* signal);
- void timeOutFoundLab(Signal* signal, UintR anAdd);
+ void timeOutFoundLab(Signal* signal, UintR anAdd, Uint32 errCode);
void completeTransAtTakeOverLab(Signal* signal, UintR TtakeOverInd);
void completeTransAtTakeOverDoLast(Signal* signal, UintR TtakeOverInd);
void completeTransAtTakeOverDoOne(Signal* signal, UintR TtakeOverInd);
@@ -1611,6 +1614,9 @@ private:
void checkScanFragList(Signal*, Uint32 failedNodeId, ScanRecord * scanP,
LocalDLList<ScanFragRec>::Head&);
+ void nodeFailCheckTransactions(Signal*,Uint32 transPtrI,Uint32 failedNodeId);
+ void checkNodeFailComplete(Signal* signal, Uint32 failedNodeId, Uint32 bit);
+
// Initialisation
void initData();
void initRecords();
@@ -1637,6 +1643,7 @@ private:
HostRecord *hostRecord;
HostRecordPtr hostptr;
UintR chostFilesize;
+ NdbNodeBitmask c_alive_nodes;
GcpRecord *gcpRecord;
GcpRecordPtr gcpPtr;
diff --git a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
index d7232030c41..2788d20b842 100644
--- a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
+++ b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
@@ -266,6 +266,10 @@ void Dbtc::execCONTINUEB(Signal* signal)
jam();
checkScanActiveInFailedLqh(signal, Tdata0, Tdata1);
return;
+ case TcContinueB::ZNF_CHECK_TRANSACTIONS:
+ jam();
+ nodeFailCheckTransactions(signal, Tdata0, Tdata1);
+ return;
case TcContinueB::CHECK_WAIT_DROP_TAB_FAILED_LQH:
jam();
checkWaitDropTabFailedLqh(signal, Tdata0, Tdata1);
@@ -303,8 +307,8 @@ void Dbtc::execINCL_NODEREQ(Signal* signal)
hostptr.i = signal->theData[1];
ptrCheckGuard(hostptr, chostFilesize, hostRecord);
hostptr.p->hostStatus = HS_ALIVE;
- hostptr.p->takeOverStatus = TOS_IDLE;
signal->theData[0] = cownref;
+ c_alive_nodes.set(hostptr.i);
sendSignal(tblockref, GSN_INCL_NODECONF, signal, 1, JBB);
}
@@ -503,6 +507,7 @@ Dbtc::checkWaitDropTabFailedLqh(Signal* signal, Uint32 nodeId, Uint32 tableId)
* Finished
*/
jam();
+ checkNodeFailComplete(signal, nodeId, HostRecord::NF_CHECK_DROP_TAB);
return;
}
@@ -868,8 +873,6 @@ void Dbtc::execREAD_NODESCONF(Signal* signal)
hostptr.i = i;
ptrCheckGuard(hostptr, chostFilesize, hostRecord);
- hostptr.p->takeOverStatus = TOS_IDLE;
-
if (NodeBitmask::get(readNodes->inactiveNodes, i)) {
jam();
hostptr.p->hostStatus = HS_DEAD;
@@ -877,6 +880,7 @@ void Dbtc::execREAD_NODESCONF(Signal* signal)
jam();
con_lineNodes++;
hostptr.p->hostStatus = HS_ALIVE;
+ c_alive_nodes.set(i);
}//if
}//if
}//for
@@ -2378,6 +2382,7 @@ void Dbtc::initApiConnectRec(Signal* signal,
regApiPtr->commitAckMarker = RNIL;
regApiPtr->buddyPtr = RNIL;
regApiPtr->currSavePointId = 0;
+ regApiPtr->m_transaction_nodes.clear();
// Trigger data
releaseFiredTriggerData(&regApiPtr->theFiredTriggers),
// Index data
@@ -2986,6 +2991,10 @@ void Dbtc::tckeyreq050Lab(Signal* signal)
signal->theData[0] = TdihConnectptr;
signal->theData[1] = Ttableref;
signal->theData[2] = TdistrHashValue;
+ signal->theData[3] = 0;
+ signal->theData[4] = 0;
+ signal->theData[5] = 0;
+ signal->theData[6] = 0;
/*-------------------------------------------------------------*/
/* FOR EFFICIENCY REASONS WE AVOID THE SIGNAL SENDING HERE AND */
@@ -3165,6 +3174,7 @@ void Dbtc::sendlqhkeyreq(Signal* signal,
TcConnectRecord * const regTcPtr = tcConnectptr.p;
ApiConnectRecord * const regApiPtr = apiConnectptr.p;
CacheRecord * const regCachePtr = cachePtr.p;
+ UintR sig0, sig1, sig2, sig3, sig4, sig5, sig6;
#ifdef ERROR_INSERT
if (ERROR_INSERTED(8002)) {
systemErrorLab(signal, __LINE__);
@@ -3202,6 +3212,9 @@ void Dbtc::sendlqhkeyreq(Signal* signal,
LqhKeyReq::setScanTakeOverFlag(tslrAttrLen, regCachePtr->scanTakeOverInd);
Tdata10 = 0;
+ sig0 = regCachePtr->opSimple;
+ sig1 = regTcPtr->operation;
+ bool simpleRead = (sig1 == ZREAD && sig0 == ZTRUE);
LqhKeyReq::setKeyLen(Tdata10, regCachePtr->keylen);
LqhKeyReq::setLastReplicaNo(Tdata10, regTcPtr->lastReplicaNo);
LqhKeyReq::setLockType(Tdata10, regCachePtr->opLock);
@@ -3211,8 +3224,8 @@ void Dbtc::sendlqhkeyreq(Signal* signal,
LqhKeyReq::setApplicationAddressFlag(Tdata10, 1);
LqhKeyReq::setDirtyFlag(Tdata10, regTcPtr->dirtyOp);
LqhKeyReq::setInterpretedFlag(Tdata10, regCachePtr->opExec);
- LqhKeyReq::setSimpleFlag(Tdata10, regCachePtr->opSimple);
- LqhKeyReq::setOperation(Tdata10, regTcPtr->operation);
+ LqhKeyReq::setSimpleFlag(Tdata10, sig0);
+ LqhKeyReq::setOperation(Tdata10, sig1);
/* -----------------------------------------------------------------------
* Sequential Number of first LQH = 0, bit 22-23
* IF ATTRIBUTE INFORMATION IS SENT IN TCKEYREQ,
@@ -3225,18 +3238,16 @@ void Dbtc::sendlqhkeyreq(Signal* signal,
* ----------------------------------------------------------------------- */
//LqhKeyReq::setAPIVersion(Tdata10, regCachePtr->apiVersionNo);
Uint32 commitAckMarker = regTcPtr->commitAckMarker;
+ const Uint32 noOfLqhs = regTcPtr->noOfNodes;
if(commitAckMarker != RNIL){
jam();
-
LqhKeyReq::setMarkerFlag(Tdata10, 1);
- CommitAckMarker * tmp;
- tmp = m_commitAckMarkerHash.getPtr(commitAckMarker);
+ CommitAckMarker * tmp = m_commitAckMarkerHash.getPtr(commitAckMarker);
/**
* Populate LQH array
*/
- const Uint32 noOfLqhs = regTcPtr->noOfNodes;
tmp->noOfLqhs = noOfLqhs;
for(Uint32 i = 0; i<noOfLqhs; i++){
tmp->lqhNodeId[i] = regTcPtr->tcNodedata[i];
@@ -3247,7 +3258,6 @@ void Dbtc::sendlqhkeyreq(Signal* signal,
/* NO READ LENGTH SENT FROM TC. SEQUENTIAL NUMBER IS 1 AND IT */
/* IS SENT TO A PRIMARY NODE. */
/* ************************************************************> */
- UintR sig0, sig1, sig2, sig3, sig4, sig5, sig6;
LqhKeyReq * const lqhKeyReq = (LqhKeyReq *)signal->getDataPtrSend();
@@ -3271,6 +3281,14 @@ void Dbtc::sendlqhkeyreq(Signal* signal,
sig5 = regTcPtr->clientData;
sig6 = regCachePtr->scanInfo;
+ if (! simpleRead)
+ {
+ regApiPtr->m_transaction_nodes.set(regTcPtr->tcNodedata[0]);
+ regApiPtr->m_transaction_nodes.set(regTcPtr->tcNodedata[1]);
+ regApiPtr->m_transaction_nodes.set(regTcPtr->tcNodedata[2]);
+ regApiPtr->m_transaction_nodes.set(regTcPtr->tcNodedata[3]);
+ }
+
lqhKeyReq->tableSchemaVersion = sig0;
lqhKeyReq->fragmentData = sig1;
lqhKeyReq->transId1 = sig2;
@@ -4655,6 +4673,7 @@ void Dbtc::copyApi(Signal* signal)
UintR TgcpPointer = regTmpApiPtr->gcpPointer;
UintR TgcpFilesize = cgcpFilesize;
UintR TcommitAckMarker = regTmpApiPtr->commitAckMarker;
+ NdbNodeBitmask Tnodes = regTmpApiPtr->m_transaction_nodes;
GcpRecord *localGcpRecord = gcpRecord;
regApiPtr->ndbapiBlockref = regTmpApiPtr->ndbapiBlockref;
@@ -4665,6 +4684,7 @@ void Dbtc::copyApi(Signal* signal)
regApiPtr->transid[1] = Ttransid2;
regApiPtr->lqhkeyconfrec = Tlqhkeyconfrec;
regApiPtr->commitAckMarker = TcommitAckMarker;
+ regApiPtr->m_transaction_nodes = Tnodes;
gcpPtr.i = TgcpPointer;
ptrCheckGuard(gcpPtr, TgcpFilesize, localGcpRecord);
@@ -4675,6 +4695,7 @@ void Dbtc::copyApi(Signal* signal)
regTmpApiPtr->commitAckMarker = RNIL;
regTmpApiPtr->firstTcConnect = RNIL;
regTmpApiPtr->lastTcConnect = RNIL;
+ regTmpApiPtr->m_transaction_nodes.clear();
releaseAllSeizedIndexOperations(regTmpApiPtr);
}//Dbtc::copyApi()
@@ -4933,7 +4954,7 @@ void Dbtc::releaseTransResources(Signal* signal)
TcConnectRecordPtr localTcConnectptr;
UintR TtcConnectFilesize = ctcConnectFilesize;
TcConnectRecord *localTcConnectRecord = tcConnectRecord;
-
+ apiConnectptr.p->m_transaction_nodes.clear();
localTcConnectptr.i = apiConnectptr.p->firstTcConnect;
do {
jam();
@@ -5338,7 +5359,8 @@ void Dbtc::execTC_COMMITREQ(Signal* signal)
break;
case CS_ABORTING:
jam();
- errorCode = ZABORTINPROGRESS;
+ errorCode = regApiPtr->returncode ?
+ regApiPtr->returncode : ZABORTINPROGRESS;
break;
case CS_START_SCAN:
jam();
@@ -5877,9 +5899,9 @@ void Dbtc::abort010Lab(Signal* signal)
if (transP->firstTcConnect == RNIL) {
jam();
- /*-----------------------------------------------------------------------*/
- /* WE HAVE NO PARTICIPANTS IN THE TRANSACTION. */
- /*-----------------------------------------------------------------------*/
+ /*--------------------------------------------------------------------*/
+ /* WE HAVE NO PARTICIPANTS IN THE TRANSACTION. */
+ /*--------------------------------------------------------------------*/
releaseAbortResources(signal);
return;
}//if
@@ -6156,10 +6178,12 @@ void Dbtc::timeOutLoopStartLab(Signal* signal, Uint32 api_con_ptr)
if (api_timer != 0) {
time_out_value= time_out_param + (api_con_ptr & mask_value);
time_passed= tc_timer - api_timer;
- if (time_passed > time_out_value) {
+ if (time_passed > time_out_value)
+ {
jam();
- timeOutFoundLab(signal, api_con_ptr);
- return;
+ timeOutFoundLab(signal, api_con_ptr, ZTIME_OUT_ERROR);
+ api_con_ptr++;
+ break;
}
}
}
@@ -6179,10 +6203,8 @@ void Dbtc::timeOutLoopStartLab(Signal* signal, Uint32 api_con_ptr)
return;
}//Dbtc::timeOutLoopStartLab()
-void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr)
+void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr, Uint32 errCode)
{
- sendContinueTimeOutControl(signal, TapiConPtr + 1);
-
apiConnectptr.i = TapiConPtr;
ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord);
/*------------------------------------------------------------------*/
@@ -6195,7 +6217,8 @@ void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr)
<< "Time-out in state = " << apiConnectptr.p->apiConnectstate
<< " apiConnectptr.i = " << apiConnectptr.i
<< " - exec: " << apiConnectptr.p->m_exec_flag
- << " - place: " << c_apiConTimer_line[apiConnectptr.i]);
+ << " - place: " << c_apiConTimer_line[apiConnectptr.i]
+ << " code: " << errCode);
switch (apiConnectptr.p->apiConnectstate) {
case CS_STARTED:
if(apiConnectptr.p->lqhkeyreqrec == apiConnectptr.p->lqhkeyconfrec){
@@ -6212,7 +6235,7 @@ void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr)
}//if
}
apiConnectptr.p->returnsignal = RS_TCROLLBACKREP;
- apiConnectptr.p->returncode = ZTIME_OUT_ERROR;
+ apiConnectptr.p->returncode = errCode;
abort010Lab(signal);
return;
case CS_RECEIVING:
@@ -6225,7 +6248,7 @@ void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr)
/* START ABORTING THE TRANSACTION. ALSO START CHECKING THE */
/* REMAINING TRANSACTIONS. */
/*------------------------------------------------------------------*/
- terrorCode = ZTIME_OUT_ERROR;
+ terrorCode = errCode;
abortErrorLab(signal);
return;
case CS_COMMITTING:
@@ -6432,6 +6455,7 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
return;
}
+ bool found = false;
OperationState tmp[16];
Uint32 TloopCount = 0;
@@ -6439,7 +6463,31 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
jam();
if (tcConnectptr.i == RNIL) {
jam();
- if (Tcheck == 0) {
+
+#ifdef VM_TRACE
+ ndbout_c("found: %d Tcheck: %d apiConnectptr.p->counter: %d",
+ found, Tcheck, apiConnectptr.p->counter);
+#endif
+ if (found || apiConnectptr.p->counter)
+ {
+ jam();
+ /**
+ * We sent atleast one ABORT/ABORTED
+ * or ZABORT_TIMEOUT_BREAK is in job buffer
+ * wait for reception...
+ */
+ return;
+ }
+
+ if (Tcheck == 1)
+ {
+ jam();
+ releaseAbortResources(signal);
+ return;
+ }
+
+ if (Tcheck == 0)
+ {
jam();
/*------------------------------------------------------------------
* All nodes had already reported ABORTED for all tcConnect records.
@@ -6448,9 +6496,11 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
*------------------------------------------------------------------*/
char buf[96]; buf[0] = 0;
char buf2[96];
- BaseString::snprintf(buf, sizeof(buf), "TC %d: %d ops:",
- __LINE__, apiConnectptr.i);
- for(Uint32 i = 0; i<TloopCount; i++){
+ BaseString::snprintf(buf, sizeof(buf), "TC %d: %d counter: %d ops:",
+ __LINE__, apiConnectptr.i,
+ apiConnectptr.p->counter);
+ for(Uint32 i = 0; i<TloopCount; i++)
+ {
BaseString::snprintf(buf2, sizeof(buf2), "%s %d", buf, tmp[i]);
BaseString::snprintf(buf, sizeof(buf), buf2);
}
@@ -6458,7 +6508,9 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
ndbout_c(buf);
ndbrequire(false);
releaseAbortResources(signal);
+ return;
}
+
return;
}//if
TloopCount++;
@@ -6473,7 +6525,16 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
signal->theData[0] = TcContinueB::ZABORT_TIMEOUT_BREAK;
signal->theData[1] = tcConnectptr.i;
signal->theData[2] = apiConnectptr.i;
- sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
+ if (ERROR_INSERTED(8050))
+ {
+ ndbout_c("sending ZABORT_TIMEOUT_BREAK delayed (%d %d)",
+ Tcheck, apiConnectptr.p->counter);
+ sendSignalWithDelay(cownref, GSN_CONTINUEB, signal, 2000, 3);
+ }
+ else
+ {
+ sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
+ }
return;
}//if
ptrCheckGuard(tcConnectptr, ctcConnectFilesize, tcConnectRecord);
@@ -6496,7 +6557,7 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
jam();
if (tcConnectptr.p->tcNodedata[Ti] != 0) {
TloopCount += 31;
- Tcheck = 1;
+ found = true;
hostptr.i = tcConnectptr.p->tcNodedata[Ti];
ptrCheckGuard(hostptr, chostFilesize, hostRecord);
if (hostptr.p->hostStatus == HS_ALIVE) {
@@ -6869,58 +6930,44 @@ void Dbtc::execNODE_FAILREP(Signal* signal)
const Uint32 tnewMasterId = nodeFail->masterNodeId;
arrGuard(tnoOfNodes, MAX_NDB_NODES);
+ Uint32 i;
int index = 0;
- for (unsigned i = 1; i< MAX_NDB_NODES; i++) {
- if(NodeBitmask::get(nodeFail->theNodes, i)){
+ for (i = 1; i< MAX_NDB_NODES; i++)
+ {
+ if(NodeBitmask::get(nodeFail->theNodes, i))
+ {
cdata[index] = i;
index++;
}//if
}//for
+ cmasterNodeId = tnewMasterId;
+
tcNodeFailptr.i = 0;
ptrAss(tcNodeFailptr, tcFailRecord);
- Uint32 tindex;
- for (tindex = 0; tindex < tnoOfNodes; tindex++) {
+ for (i = 0; i < tnoOfNodes; i++)
+ {
jam();
- hostptr.i = cdata[tindex];
+ hostptr.i = cdata[i];
ptrCheckGuard(hostptr, chostFilesize, hostRecord);
+
/*------------------------------------------------------------*/
/* SET STATUS OF THE FAILED NODE TO DEAD SINCE IT HAS */
/* FAILED. */
/*------------------------------------------------------------*/
hostptr.p->hostStatus = HS_DEAD;
+ hostptr.p->m_nf_bits = HostRecord::NF_NODE_FAIL_BITS;
+ c_alive_nodes.clear(hostptr.i);
- if (hostptr.p->takeOverStatus == TOS_COMPLETED) {
- jam();
- /*------------------------------------------------------------*/
- /* A VERY UNUSUAL SITUATION. THE TAKE OVER WAS COMPLETED*/
- /* EVEN BEFORE WE HEARD ABOUT THE NODE FAILURE REPORT. */
- /* HOWEVER UNUSUAL THIS SITUATION IS POSSIBLE. */
- /*------------------------------------------------------------*/
- /* RELEASE THE CURRENTLY UNUSED LQH CONNECTIONS. THE */
- /* REMAINING WILL BE RELEASED WHEN THE TRANSACTION THAT */
- /* USED THEM IS COMPLETED. */
- /*------------------------------------------------------------*/
- {
- NFCompleteRep * const nfRep = (NFCompleteRep *)&signal->theData[0];
- nfRep->blockNo = DBTC;
- nfRep->nodeId = cownNodeid;
- nfRep->failedNodeId = hostptr.i;
- }
- sendSignal(cdihblockref, GSN_NF_COMPLETEREP, signal,
- NFCompleteRep::SignalLength, JBB);
- } else {
- ndbrequire(hostptr.p->takeOverStatus == TOS_IDLE);
- hostptr.p->takeOverStatus = TOS_NODE_FAILED;
- }//if
-
- if (tcNodeFailptr.p->failStatus == FS_LISTENING) {
+ if (tcNodeFailptr.p->failStatus == FS_LISTENING)
+ {
jam();
/*------------------------------------------------------------*/
/* THE CURRENT TAKE OVER CAN BE AFFECTED BY THIS NODE */
/* FAILURE. */
/*------------------------------------------------------------*/
- if (hostptr.p->lqhTransStatus == LTS_ACTIVE) {
+ if (hostptr.p->lqhTransStatus == LTS_ACTIVE)
+ {
jam();
/*------------------------------------------------------------*/
/* WE WERE WAITING FOR THE FAILED NODE IN THE TAKE OVER */
@@ -6932,86 +6979,46 @@ void Dbtc::execNODE_FAILREP(Signal* signal)
}//if
}//if
- }//for
-
- const bool masterFailed = (cmasterNodeId != tnewMasterId);
- cmasterNodeId = tnewMasterId;
-
- if(getOwnNodeId() == cmasterNodeId && masterFailed){
- /**
- * Master has failed and I'm the new master
- */
- jam();
-
- for (hostptr.i = 1; hostptr.i < MAX_NDB_NODES; hostptr.i++) {
+ if (getOwnNodeId() != tnewMasterId)
+ {
jam();
- ptrAss(hostptr, hostRecord);
- if (hostptr.p->hostStatus != HS_ALIVE) {
- jam();
- if (hostptr.p->takeOverStatus == TOS_COMPLETED) {
- jam();
- /*------------------------------------------------------------*/
- /* SEND TAKE OVER CONFIRMATION TO ALL ALIVE NODES IF */
- /* TAKE OVER IS COMPLETED. THIS IS PERFORMED TO ENSURE */
- /* THAT ALL NODES AGREE ON THE IDLE STATE OF THE TAKE */
- /* OVER. THIS MIGHT BE MISSED IN AN ERROR SITUATION IF */
- /* MASTER FAILS AFTER SENDING CONFIRMATION TO NEW */
- /* MASTER BUT FAILING BEFORE SENDING TO ANOTHER NODE */
- /* WHICH WAS NOT MASTER. IF THIS NODE LATER BECOMES */
- /* MASTER IT MIGHT START A NEW TAKE OVER EVEN AFTER THE */
- /* CRASHED NODE HAVE ALREADY RECOVERED. */
- /*------------------------------------------------------------*/
- for(tmpHostptr.i = 1; tmpHostptr.i < MAX_NDB_NODES;tmpHostptr.i++) {
- jam();
- ptrAss(tmpHostptr, hostRecord);
- if (tmpHostptr.p->hostStatus == HS_ALIVE) {
- jam();
- tblockref = calcTcBlockRef(tmpHostptr.i);
- signal->theData[0] = hostptr.i;
- sendSignal(tblockref, GSN_TAKE_OVERTCCONF, signal, 1, JBB);
- }//if
- }//for
- }//if
- }//if
- }//for
- }
-
- if(getOwnNodeId() == cmasterNodeId){
- jam();
- for (hostptr.i = 1; hostptr.i < MAX_NDB_NODES; hostptr.i++) {
+ /**
+ * Only master does takeover currently
+ */
+ hostptr.p->m_nf_bits &= ~HostRecord::NF_TAKEOVER;
+ }
+ else
+ {
jam();
- ptrAss(hostptr, hostRecord);
- if (hostptr.p->hostStatus != HS_ALIVE) {
- jam();
- if (hostptr.p->takeOverStatus == TOS_NODE_FAILED) {
- jam();
- /*------------------------------------------------------------*/
- /* CONCLUDE ALL ACTIVITIES THE FAILED TC DID CONTROL */
- /* SINCE WE ARE THE MASTER. THIS COULD HAVE BEEN STARTED*/
- /* BY A PREVIOUS MASTER BUT HAVE NOT BEEN CONCLUDED YET.*/
- /*------------------------------------------------------------*/
- hostptr.p->takeOverStatus = TOS_ACTIVE;
- signal->theData[0] = hostptr.i;
- sendSignal(cownref, GSN_TAKE_OVERTCREQ, signal, 1, JBB);
- }//if
- }//if
- }//for
- }//if
- for (tindex = 0; tindex < tnoOfNodes; tindex++) {
- jam();
- hostptr.i = cdata[tindex];
- ptrCheckGuard(hostptr, chostFilesize, hostRecord);
- /*------------------------------------------------------------*/
- /* LOOP THROUGH AND ABORT ALL SCANS THAT WHERE */
- /* CONTROLLED BY THIS TC AND ACTIVE IN THE FAILED */
- /* NODE'S LQH */
- /*------------------------------------------------------------*/
+ signal->theData[0] = hostptr.i;
+ sendSignal(cownref, GSN_TAKE_OVERTCREQ, signal, 1, JBB);
+ }
+
checkScanActiveInFailedLqh(signal, 0, hostptr.i);
checkWaitDropTabFailedLqh(signal, hostptr.i, 0); // nodeid, tableid
- }//for
-
+ nodeFailCheckTransactions(signal, 0, hostptr.i);
+ }
}//Dbtc::execNODE_FAILREP()
+void
+Dbtc::checkNodeFailComplete(Signal* signal,
+ Uint32 failedNodeId,
+ Uint32 bit)
+{
+ hostptr.i = failedNodeId;
+ ptrCheckGuard(hostptr, chostFilesize, hostRecord);
+ hostptr.p->m_nf_bits &= ~bit;
+ if (hostptr.p->m_nf_bits == 0)
+ {
+ NFCompleteRep * const nfRep = (NFCompleteRep *)&signal->theData[0];
+ nfRep->blockNo = DBTC;
+ nfRep->nodeId = cownNodeid;
+ nfRep->failedNodeId = hostptr.i;
+ sendSignal(cdihblockref, GSN_NF_COMPLETEREP, signal,
+ NFCompleteRep::SignalLength, JBB);
+ }
+}
+
void Dbtc::checkScanActiveInFailedLqh(Signal* signal,
Uint32 scanPtrI,
Uint32 failedNodeId){
@@ -7053,8 +7060,44 @@ void Dbtc::checkScanActiveInFailedLqh(Signal* signal,
sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
return;
}//for
+
+ checkNodeFailComplete(signal, failedNodeId, HostRecord::NF_CHECK_SCAN);
+}
+
+void
+Dbtc::nodeFailCheckTransactions(Signal* signal,
+ Uint32 transPtrI,
+ Uint32 failedNodeId)
+{
+ jam();
+ Ptr<ApiConnectRecord> transPtr;
+ for (transPtr.i = transPtrI; transPtr.i < capiConnectFilesize; transPtr.i++)
+ {
+ ptrCheckGuard(transPtr, capiConnectFilesize, apiConnectRecord);
+ if (transPtr.p->m_transaction_nodes.get(failedNodeId))
+ {
+ jam();
+ // Force timeout regardless of state
+ Uint32 save = c_appl_timeout_value;
+ c_appl_timeout_value = 1;
+ setApiConTimer(transPtr.i, 0, __LINE__);
+ timeOutFoundLab(signal, transPtr.i, ZNODEFAIL_BEFORE_COMMIT);
+ c_appl_timeout_value = save;
+ }
+
+ // Send CONTINUEB to continue later
+ signal->theData[0] = TcContinueB::ZNF_CHECK_TRANSACTIONS;
+ signal->theData[1] = transPtr.i + 1; // Check next
+ signal->theData[2] = failedNodeId;
+ sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
+ return;
+ }
+
+ checkNodeFailComplete(signal, failedNodeId,
+ HostRecord::NF_CHECK_TRANSACTION);
}
+
void
Dbtc::checkScanFragList(Signal* signal,
Uint32 failedNodeId,
@@ -7070,54 +7113,14 @@ void Dbtc::execTAKE_OVERTCCONF(Signal* signal)
tfailedNodeId = signal->theData[0];
hostptr.i = tfailedNodeId;
ptrCheckGuard(hostptr, chostFilesize, hostRecord);
- switch (hostptr.p->takeOverStatus) {
- case TOS_IDLE:
- jam();
- /*------------------------------------------------------------*/
- /* THIS MESSAGE ARRIVED EVEN BEFORE THE NODE_FAILREP */
- /* MESSAGE. THIS IS POSSIBLE IN EXTREME SITUATIONS. */
- /* WE SET THE STATE TO TAKE_OVER_COMPLETED AND WAIT */
- /* FOR THE NODE_FAILREP MESSAGE. */
- /*------------------------------------------------------------*/
- hostptr.p->takeOverStatus = TOS_COMPLETED;
- break;
- case TOS_NODE_FAILED:
- case TOS_ACTIVE:
- jam();
- /*------------------------------------------------------------*/
- /* WE ARE NOT MASTER AND THE TAKE OVER IS ACTIVE OR WE */
- /* ARE MASTER AND THE TAKE OVER IS ACTIVE. IN BOTH */
- /* WE SET THE STATE TO TAKE_OVER_COMPLETED. */
- /*------------------------------------------------------------*/
- /* RELEASE THE CURRENTLY UNUSED LQH CONNECTIONS. THE */
- /* REMAINING WILL BE RELEASED WHEN THE TRANSACTION THAT */
- /* USED THEM IS COMPLETED. */
- /*------------------------------------------------------------*/
- hostptr.p->takeOverStatus = TOS_COMPLETED;
- {
- NFCompleteRep * const nfRep = (NFCompleteRep *)&signal->theData[0];
- nfRep->blockNo = DBTC;
- nfRep->nodeId = cownNodeid;
- nfRep->failedNodeId = hostptr.i;
- }
- sendSignal(cdihblockref, GSN_NF_COMPLETEREP, signal,
- NFCompleteRep::SignalLength, JBB);
- break;
- case TOS_COMPLETED:
- jam();
- /*------------------------------------------------------------*/
- /* WE HAVE ALREADY RECEIVED THE CONF SIGNAL. IT IS MOST */
- /* LIKELY SENT FROM A NEW MASTER WHICH WASN'T SURE IF */
- /* THIS NODE HEARD THE CONF SIGNAL FROM THE OLD MASTER. */
- /* WE SIMPLY IGNORE THE MESSAGE. */
- /*------------------------------------------------------------*/
- /*empty*/;
- break;
- default:
+
+ if (signal->getSendersBlockRef() != reference())
+ {
jam();
- systemErrorLab(signal, __LINE__);
return;
- }//switch
+ }
+
+ checkNodeFailComplete(signal, hostptr.i, HostRecord::NF_TAKEOVER);
}//Dbtc::execTAKE_OVERTCCONF()
void Dbtc::execTAKE_OVERTCREQ(Signal* signal)
@@ -7357,16 +7360,10 @@ void Dbtc::completeTransAtTakeOverDoLast(Signal* signal, UintR TtakeOverInd)
/* TO REPORT THE COMPLETION OF THE TAKE OVER TO ALL */
/* NODES THAT ARE ALIVE. */
/*------------------------------------------------------------*/
- for (hostptr.i = 1; hostptr.i < MAX_NDB_NODES; hostptr.i++) {
- jam();
- ptrAss(hostptr, hostRecord);
- if (hostptr.p->hostStatus == HS_ALIVE) {
- jam();
- tblockref = calcTcBlockRef(hostptr.i);
- signal->theData[0] = tcNodeFailptr.p->takeOverNode;
- sendSignal(tblockref, GSN_TAKE_OVERTCCONF, signal, 1, JBB);
- }//if
- }//for
+ NodeReceiverGroup rg(DBTC, c_alive_nodes);
+ signal->theData[0] = tcNodeFailptr.p->takeOverNode;
+ sendSignal(rg, GSN_TAKE_OVERTCCONF, signal, 1, JBB);
+
if (tcNodeFailptr.p->queueIndex > 0) {
jam();
/*------------------------------------------------------------*/
@@ -8048,6 +8045,7 @@ void Dbtc::initApiConnectFail(Signal* signal)
apiConnectptr.p->ndbapiBlockref = 0;
apiConnectptr.p->ndbapiConnect = 0;
apiConnectptr.p->buddyPtr = RNIL;
+ apiConnectptr.p->m_transaction_nodes.clear();
setApiConTimer(apiConnectptr.i, 0, __LINE__);
switch(ttransStatus){
case LqhTransConf::Committed:
@@ -9875,6 +9873,7 @@ void Dbtc::initApiConnect(Signal* signal)
apiConnectptr.p->executingIndexOp = RNIL;
apiConnectptr.p->buddyPtr = RNIL;
apiConnectptr.p->currSavePointId = 0;
+ apiConnectptr.p->m_transaction_nodes.clear();
}//for
apiConnectptr.i = tiacTmp - 1;
ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord);
@@ -9902,6 +9901,7 @@ void Dbtc::initApiConnect(Signal* signal)
apiConnectptr.p->executingIndexOp = RNIL;
apiConnectptr.p->buddyPtr = RNIL;
apiConnectptr.p->currSavePointId = 0;
+ apiConnectptr.p->m_transaction_nodes.clear();
}//for
apiConnectptr.i = (2 * tiacTmp) - 1;
ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord);
@@ -9929,6 +9929,7 @@ void Dbtc::initApiConnect(Signal* signal)
apiConnectptr.p->executingIndexOp = RNIL;
apiConnectptr.p->buddyPtr = RNIL;
apiConnectptr.p->currSavePointId = 0;
+ apiConnectptr.p->m_transaction_nodes.clear();
}//for
apiConnectptr.i = (3 * tiacTmp) - 1;
ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord);
@@ -9989,13 +9990,13 @@ void Dbtc::inithost(Signal* signal)
ptrAss(hostptr, hostRecord);
hostptr.p->hostStatus = HS_DEAD;
hostptr.p->inPackedList = false;
- hostptr.p->takeOverStatus = TOS_NOT_DEFINED;
hostptr.p->lqhTransStatus = LTS_IDLE;
hostptr.p->noOfWordsTCKEYCONF = 0;
hostptr.p->noOfWordsTCINDXCONF = 0;
hostptr.p->noOfPackedWordsLqh = 0;
hostptr.p->hostLqhBlockRef = calcLqhBlockRef(hostptr.i);
}//for
+ c_alive_nodes.clear();
}//Dbtc::inithost()
void Dbtc::initialiseRecordsLab(Signal* signal, UintR Tdata0,
@@ -10248,6 +10249,7 @@ void Dbtc::releaseAbortResources(Signal* signal)
}//while
apiConnectptr.p->firstTcConnect = RNIL;
apiConnectptr.p->lastTcConnect = RNIL;
+ apiConnectptr.p->m_transaction_nodes.clear();
// MASV let state be CS_ABORTING until all
// signals in the "air" have been received. Reset to CS_CONNECTED
@@ -10321,6 +10323,7 @@ void Dbtc::releaseApiCon(Signal* signal, UintR TapiConnectPtr)
cfirstfreeApiConnect = TlocalApiConnectptr.i;
setApiConTimer(TlocalApiConnectptr.i, 0, __LINE__);
TlocalApiConnectptr.p->apiConnectstate = CS_DISCONNECTED;
+ ndbassert(TlocalApiConnectptr.p->m_transaction_nodes.isclear());
ndbassert(TlocalApiConnectptr.p->apiScanRec == RNIL);
TlocalApiConnectptr.p->ndbapiBlockref = 0;
}//Dbtc::releaseApiCon()
@@ -10856,6 +10859,34 @@ Dbtc::execDUMP_STATE_ORD(Signal* signal)
c_theIndexOperationPool.getSize(),
c_theIndexOperationPool.getNoOfFree());
}
+
+ if (dumpState->args[0] == 2514)
+ {
+ if (signal->getLength() == 2)
+ {
+ dumpState->args[0] = DumpStateOrd::TcDumpOneApiConnectRec;
+ execDUMP_STATE_ORD(signal);
+ }
+
+ NodeReceiverGroup rg(CMVMI, c_alive_nodes);
+ dumpState->args[0] = 15;
+ sendSignal(rg, GSN_DUMP_STATE_ORD, signal, 1, JBB);
+
+ signal->theData[0] = 2515;
+ sendSignalWithDelay(cownref, GSN_DUMP_STATE_ORD, signal, 1000, 1);
+ return;
+ }
+
+ if (dumpState->args[0] == 2515)
+ {
+ NdbNodeBitmask mask = c_alive_nodes;
+ mask.clear(getOwnNodeId());
+ NodeReceiverGroup rg(NDBCNTR, mask);
+
+ sendSignal(rg, GSN_SYSTEM_ERROR, signal, 1, JBB);
+ sendSignalWithDelay(cownref, GSN_SYSTEM_ERROR, signal, 300, 1);
+ return;
+ }
}//Dbtc::execDUMP_STATE_ORD()
void Dbtc::execSET_VAR_REQ(Signal* signal)
diff --git a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
index ed18a4ddb8b..1cecf69aaad 100644
--- a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
+++ b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
@@ -278,6 +278,7 @@ void Qmgr::setArbitTimeout(UintR aArbitTimeout)
void Qmgr::execCONNECT_REP(Signal* signal)
{
+ jamEntry();
const Uint32 nodeId = signal->theData[0];
c_connectedNodes.set(nodeId);
NodeRecPtr nodePtr;
@@ -285,9 +286,13 @@ void Qmgr::execCONNECT_REP(Signal* signal)
ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
switch(nodePtr.p->phase){
case ZSTARTING:
+ case ZRUNNING:
jam();
+ if(!c_start.m_nodes.isWaitingFor(nodeId)){
+ jam();
+ return;
+ }
break;
- case ZRUNNING:
case ZPREPARE_FAIL:
case ZFAIL_CLOSING:
jam();
@@ -298,21 +303,28 @@ void Qmgr::execCONNECT_REP(Signal* signal)
case ZAPI_INACTIVE:
return;
}
-
- if(!c_start.m_nodes.isWaitingFor(nodeId)){
- jam();
- return;
- }
-
+
switch(c_start.m_gsn){
case GSN_CM_REGREQ:
jam();
sendCmRegReq(signal, nodeId);
return;
- case GSN_CM_NODEINFOREQ:{
+ case GSN_CM_NODEINFOREQ:
jam();
sendCmNodeInfoReq(signal, nodeId, nodePtr.p);
return;
+ case GSN_CM_ADD:{
+ jam();
+
+ ndbrequire(getOwnNodeId() != cpresident);
+ c_start.m_nodes.clearWaitingFor(nodeId);
+ c_start.m_gsn = RNIL;
+
+ NodeRecPtr addNodePtr;
+ addNodePtr.i = nodeId;
+ ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
+ cmAddPrepare(signal, addNodePtr, nodePtr.p);
+ return;
}
default:
return;
@@ -945,15 +957,27 @@ Qmgr::cmAddPrepare(Signal* signal, NodeRecPtr nodePtr, const NodeRec * self){
return;
case ZFAIL_CLOSING:
jam();
-#ifdef VM_TRACE
- ndbout_c("Enabling communication to CM_ADD node state=%d",
- nodePtr.p->phase);
-#endif
+
+#if 1
+ warningEvent("Recieved request to incorperate node %u, "
+ "while error handling has not yet completed",
+ nodePtr.i);
+
+ ndbrequire(getOwnNodeId() != cpresident);
+ ndbrequire(signal->header.theVerId_signalNumber == GSN_CM_ADD);
+ c_start.m_nodes.clearWaitingFor();
+ c_start.m_nodes.setWaitingFor(nodePtr.i);
+ c_start.m_gsn = GSN_CM_ADD;
+#else
+ warningEvent("Enabling communication to CM_ADD node %u state=%d",
+ nodePtr.i,
+ nodePtr.p->phase);
nodePtr.p->phase = ZSTARTING;
nodePtr.p->failState = NORMAL;
signal->theData[0] = 0;
signal->theData[1] = nodePtr.i;
sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 2, JBA);
+#endif
return;
case ZSTARTING:
break;
@@ -1788,11 +1812,27 @@ void Qmgr::execNDB_FAILCONF(Signal* signal)
jamEntry();
failedNodePtr.i = signal->theData[0];
+
+ if (ERROR_INSERTED(930))
+ {
+ CLEAR_ERROR_INSERT_VALUE;
+ infoEvent("Discarding NDB_FAILCONF for %u", failedNodePtr.i);
+ return;
+ }
+
ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
if (failedNodePtr.p->failState == WAITING_FOR_NDB_FAILCONF){
failedNodePtr.p->failState = NORMAL;
} else {
jam();
+
+ char buf[100];
+ BaseString::snprintf(buf, 100,
+ "Received NDB_FAILCONF for node %u with state: %d %d",
+ failedNodePtr.i,
+ failedNodePtr.p->phase,
+ failedNodePtr.p->failState);
+ progError(__LINE__, 0, buf);
systemErrorLab(signal, __LINE__);
}//if
if (cpresident == getOwnNodeId()) {
@@ -2112,10 +2152,42 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode,
ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
if (failedNodePtr.i == getOwnNodeId()) {
jam();
- systemErrorLab(signal, __LINE__);
+
+ const char * msg = 0;
+ switch(aFailCause){
+ case FailRep::ZOWN_FAILURE:
+ msg = "Own failure";
+ break;
+ case FailRep::ZOTHER_NODE_WHEN_WE_START:
+ case FailRep::ZOTHERNODE_FAILED_DURING_START:
+ msg = "Other node died during start";
+ break;
+ case FailRep::ZIN_PREP_FAIL_REQ:
+ msg = "Prep fail";
+ break;
+ case FailRep::ZSTART_IN_REGREQ:
+ msg = "Start timeout";
+ break;
+ case FailRep::ZHEARTBEAT_FAILURE:
+ msg = "Hearbeat failure";
+ break;
+ case FailRep::ZLINK_FAILURE:
+ msg = "Connection failure";
+ break;
+ }
+
+ char buf[100];
+ BaseString::snprintf(buf, 100,
+ "We(%u) have been declared dead by %u reason: %s(%u)",
+ getOwnNodeId(),
+ refToNode(signal->getSendersBlockRef()),
+ aFailCause,
+ msg ? msg : "<Unknown>");
+
+ progError(__LINE__, 0, buf);
return;
}//if
-
+
myNodePtr.i = getOwnNodeId();
ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
if (myNodePtr.p->phase != ZRUNNING) {
@@ -2826,6 +2898,7 @@ void Qmgr::failReport(Signal* signal,
cfailureNr = cprepareFailureNr;
ctoFailureNr = 0;
ctoStatus = Q_ACTIVE;
+ c_start.reset(); // Don't take over nodes being started
if (cnoCommitFailedNodes > 0) {
jam();
/**-----------------------------------------------------------------
diff --git a/ndb/src/ndbapi/NdbTransaction.cpp b/ndb/src/ndbapi/NdbTransaction.cpp
index 294012d780c..7a2d8fc71c7 100644
--- a/ndb/src/ndbapi/NdbTransaction.cpp
+++ b/ndb/src/ndbapi/NdbTransaction.cpp
@@ -434,12 +434,12 @@ NdbTransaction::executeNoBlobs(ExecType aTypeOfExec,
//------------------------------------------------------------------------
Ndb* tNdb = theNdb;
+ Uint32 timeout = TransporterFacade::instance()->m_waitfor_timeout;
m_waitForReply = false;
executeAsynchPrepare(aTypeOfExec, NULL, NULL, abortOption);
if (m_waitForReply){
while (1) {
- int noOfComp = tNdb->sendPollNdb((3 * WAITFOR_RESPONSE_TIMEOUT),
- 1, forceSend);
+ int noOfComp = tNdb->sendPollNdb(3 * timeout, 1, forceSend);
if (noOfComp == 0) {
/**
* This timeout situation can occur if NDB crashes.
diff --git a/ndb/src/ndbapi/Ndbif.cpp b/ndb/src/ndbapi/Ndbif.cpp
index bfbf98d1b3a..d39b21b52f7 100644
--- a/ndb/src/ndbapi/Ndbif.cpp
+++ b/ndb/src/ndbapi/Ndbif.cpp
@@ -953,23 +953,25 @@ Ndb::pollCompleted(NdbTransaction** aCopyArray)
void
Ndb::check_send_timeout()
{
+ Uint32 timeout = TransporterFacade::instance()->m_waitfor_timeout;
NDB_TICKS current_time = NdbTick_CurrentMillisecond();
if (current_time - the_last_check_time > 1000) {
the_last_check_time = current_time;
Uint32 no_of_sent = theNoOfSentTransactions;
for (Uint32 i = 0; i < no_of_sent; i++) {
NdbTransaction* a_con = theSentTransactionsArray[i];
- if ((current_time - a_con->theStartTransTime) >
- WAITFOR_RESPONSE_TIMEOUT) {
+ if ((current_time - a_con->theStartTransTime) > timeout)
+ {
#ifdef VM_TRACE
a_con->printState();
Uint32 t1 = a_con->theTransactionId;
Uint32 t2 = a_con->theTransactionId >> 32;
- ndbout_c("[%.8x %.8x]", t1, t2);
- abort();
+ ndbout_c("4012 [%.8x %.8x]", t1, t2);
+ //abort();
#endif
+ a_con->theReleaseOnClose = true;
a_con->setOperationErrorCodeAbort(4012);
- a_con->theCommitStatus = NdbTransaction::Aborted;
+ a_con->theCommitStatus = NdbTransaction::NeedAbort;
a_con->theCompletionStatus = NdbTransaction::CompletedFailure;
a_con->handleExecuteCompletion();
remove_sent_list(i);
diff --git a/ndb/src/ndbapi/TransporterFacade.cpp b/ndb/src/ndbapi/TransporterFacade.cpp
index 77750a3c3d0..f661d53487c 100644
--- a/ndb/src/ndbapi/TransporterFacade.cpp
+++ b/ndb/src/ndbapi/TransporterFacade.cpp
@@ -563,6 +563,19 @@ TransporterFacade::init(Uint32 nodeId, const ndb_mgm_configuration* props)
m_batch_size= batch_size;
}
+ Uint32 timeout = 120000;
+ iter.first();
+ for (iter.first(); iter.valid(); iter.next())
+ {
+ Uint32 tmp1 = 0, tmp2 = 0;
+ iter.get(CFG_DB_TRANSACTION_CHECK_INTERVAL, &tmp1);
+ iter.get(CFG_DB_TRANSACTION_DEADLOCK_TIMEOUT, &tmp2);
+ tmp1 += tmp2;
+ if (tmp1 > timeout)
+ timeout = tmp1;
+ }
+ m_waitfor_timeout = timeout;
+
if (!theTransporterRegistry->start_service(m_socket_server)){
ndbout_c("Unable to start theTransporterRegistry->start_service");
DBUG_RETURN(false);
diff --git a/ndb/src/ndbapi/TransporterFacade.hpp b/ndb/src/ndbapi/TransporterFacade.hpp
index fa070889dd9..7174ce5206b 100644
--- a/ndb/src/ndbapi/TransporterFacade.hpp
+++ b/ndb/src/ndbapi/TransporterFacade.hpp
@@ -178,6 +178,7 @@ public:
* (Ndb objects should not be shared by different threads.)
*/
STATIC_CONST( MAX_NO_THREADS = 4711 );
+ Uint32 m_waitfor_timeout; // in milli seconds...
private:
struct ThreadData {
diff --git a/ndb/test/include/NdbRestarter.hpp b/ndb/test/include/NdbRestarter.hpp
index 19a88b4f8ad..3ec92ae786e 100644
--- a/ndb/test/include/NdbRestarter.hpp
+++ b/ndb/test/include/NdbRestarter.hpp
@@ -62,6 +62,7 @@ public:
int dumpStateAllNodes(int * _args, int _num_args);
int getMasterNodeId();
+ int getRandomNodeSameNodeGroup(int nodeId, int randomNumber);
int getRandomNodeOtherNodeGroup(int nodeId, int randomNumber);
int getRandomNotMasterNodeId(int randomNumber);
diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp
index 92d6c1830ef..726c575316f 100644
--- a/ndb/test/ndbapi/testNodeRestart.cpp
+++ b/ndb/test/ndbapi/testNodeRestart.cpp
@@ -535,6 +535,119 @@ err:
return NDBT_FAILED;
}
+int
+runBug16772(NDBT_Context* ctx, NDBT_Step* step){
+
+ NdbRestarter restarter;
+ if (restarter.getNumDbNodes() < 2)
+ {
+ ctx->stopTest();
+ return NDBT_OK;
+ }
+
+ int aliveNodeId = restarter.getRandomNotMasterNodeId(rand());
+ int deadNodeId = aliveNodeId;
+ while (deadNodeId == aliveNodeId)
+ deadNodeId = restarter.getDbNodeId(rand() % restarter.getNumDbNodes());
+
+ if (restarter.insertErrorInNode(aliveNodeId, 930))
+ return NDBT_FAILED;
+
+ if (restarter.restartOneDbNode(deadNodeId,
+ /** initial */ false,
+ /** nostart */ true,
+ /** abort */ true))
+ return NDBT_FAILED;
+
+ if (restarter.waitNodesNoStart(&deadNodeId, 1))
+ return NDBT_FAILED;
+
+ if (restarter.startNodes(&deadNodeId, 1))
+ return NDBT_FAILED;
+
+ // It should now be hanging since we throw away NDB_FAILCONF
+ int ret = restarter.waitNodesStartPhase(&deadNodeId, 1, 3, 10);
+ // So this should fail...i.e it should not reach startphase 3
+
+ // Now send a NDB_FAILCONF for deadNo
+ int dump[] = { 7020, 323, 252, 0 };
+ dump[3] = deadNodeId;
+ if (restarter.dumpStateOneNode(aliveNodeId, dump, 4))
+ return NDBT_FAILED;
+
+ if (restarter.waitNodesStarted(&deadNodeId, 1))
+ return NDBT_FAILED;
+
+ return ret ? NDBT_OK : NDBT_FAILED;
+}
+
+int
+runBug18414(NDBT_Context* ctx, NDBT_Step* step){
+
+ NdbRestarter restarter;
+ if (restarter.getNumDbNodes() < 2)
+ {
+ ctx->stopTest();
+ return NDBT_OK;
+ }
+
+ Ndb* pNdb = GETNDB(step);
+ HugoOperations hugoOps(*ctx->getTab());
+ HugoTransactions hugoTrans(*ctx->getTab());
+ int loop = 0;
+ do
+ {
+ if(hugoOps.startTransaction(pNdb) != 0)
+ goto err;
+
+ if(hugoOps.pkUpdateRecord(pNdb, 0, 128, rand()) != 0)
+ goto err;
+
+ if(hugoOps.execute_NoCommit(pNdb) != 0)
+ goto err;
+
+ int node1 = hugoOps.getTransaction()->getConnectedNodeId();
+ int node2 = restarter.getRandomNodeSameNodeGroup(node1, rand());
+
+ if (node1 == -1 || node2 == -1)
+ break;
+
+ if (loop & 1)
+ {
+ if (restarter.insertErrorInNode(node1, 8050))
+ goto err;
+ }
+
+ if (restarter.insertErrorInNode(node2, 5003))
+ goto err;
+
+ int res= hugoOps.execute_Rollback(pNdb);
+
+ if (restarter.waitNodesNoStart(&node2, 1) != 0)
+ goto err;
+
+ if (restarter.insertErrorInAllNodes(0))
+ goto err;
+
+ if (restarter.startNodes(&node2, 1) != 0)
+ goto err;
+
+ if (restarter.waitClusterStarted() != 0)
+ goto err;
+
+ if (hugoTrans.scanUpdateRecords(pNdb, 128) != 0)
+ goto err;
+
+ hugoOps.closeTransaction(pNdb);
+
+ } while(++loop < 5);
+
+ return NDBT_OK;
+
+err:
+ hugoOps.closeTransaction(pNdb);
+ return NDBT_FAILED;
+}
NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad",
@@ -820,6 +933,16 @@ TESTCASE("Bug15685",
STEP(runBug15685);
FINALIZER(runClearTable);
}
+TESTCASE("Bug16772",
+ "Test bug with restarting before NF handling is complete"){
+ STEP(runBug16772);
+}
+TESTCASE("Bug18414",
+ "Test bug with NF during NR"){
+ INITIALIZER(runLoadTable);
+ STEP(runBug18414);
+ FINALIZER(runClearTable);
+}
NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){
diff --git a/ndb/test/ndbapi/testSystemRestart.cpp b/ndb/test/ndbapi/testSystemRestart.cpp
index 35016896495..30f7aca9b06 100644
--- a/ndb/test/ndbapi/testSystemRestart.cpp
+++ b/ndb/test/ndbapi/testSystemRestart.cpp
@@ -1051,6 +1051,52 @@ int runSystemRestart9(NDBT_Context* ctx, NDBT_Step* step){
return result;
}
+int runBug18385(NDBT_Context* ctx, NDBT_Step* step){
+ NdbRestarter restarter;
+ const Uint32 nodeCount = restarter.getNumDbNodes();
+ if(nodeCount < 2){
+ g_info << "Bug18385 - Needs atleast 2 nodes to test" << endl;
+ return NDBT_OK;
+ }
+
+ int node1 = restarter.getDbNodeId(rand() % nodeCount);
+ int node2 = restarter.getRandomNodeSameNodeGroup(node1, rand());
+
+ if (node1 == -1 || node2 == -1)
+ return NDBT_OK;
+
+ int dump[] = { DumpStateOrd::DihSetTimeBetweenGcp, 300 };
+
+ int result = NDBT_OK;
+ do {
+ CHECK(restarter.dumpStateAllNodes(dump, 2) == 0);
+ CHECK(restarter.restartOneDbNode(node1, false, true, false) == 0);
+ NdbSleep_SecSleep(3);
+ CHECK(restarter.restartAll(false, true, false) == 0);
+
+ Uint32 cnt = 0;
+ int nodes[128];
+ for(Uint32 i = 0; i<nodeCount; i++)
+ if ((nodes[cnt] = restarter.getDbNodeId(i)) != node2)
+ cnt++;
+
+ assert(cnt == nodeCount - 1);
+
+ CHECK(restarter.startNodes(nodes, cnt) == 0);
+ CHECK(restarter.waitNodesStarted(nodes, cnt, 300) == 0);
+
+ CHECK(restarter.insertErrorInNode(node2, 7170) == 0);
+ CHECK(restarter.waitNodesNoStart(&node2, 1) == 0);
+ CHECK(restarter.restartOneDbNode(node2, true, false, true) == 0);
+ CHECK(restarter.waitNodesStarted(&node2, 1) == 0);
+
+ } while(0);
+
+ g_info << "Bug18385 finished" << endl;
+
+ return result;
+}
+
int runWaitStarted(NDBT_Context* ctx, NDBT_Step* step){
NdbRestarter restarter;
@@ -1234,6 +1280,13 @@ TESTCASE("SR9",
STEP(runSystemRestart9);
FINALIZER(runClearTable);
}
+TESTCASE("Bug18385",
+ "Perform partition system restart with other nodes with higher GCI"){
+ INITIALIZER(runWaitStarted);
+ INITIALIZER(runClearTable);
+ STEP(runBug18385);
+ FINALIZER(runClearTable);
+}
NDBT_TESTSUITE_END(testSystemRestart);
int main(int argc, const char** argv){
diff --git a/ndb/test/ndbapi/testTimeout.cpp b/ndb/test/ndbapi/testTimeout.cpp
index b02751ec819..36fb34a50e2 100644
--- a/ndb/test/ndbapi/testTimeout.cpp
+++ b/ndb/test/ndbapi/testTimeout.cpp
@@ -24,6 +24,7 @@
#define TIMEOUT (Uint32)3000
Uint32 g_org_timeout = 3000;
+Uint32 g_org_deadlock = 3000;
int
setTransactionTimeout(NDBT_Context* ctx, NDBT_Step* step){
@@ -59,6 +60,60 @@ resetTransactionTimeout(NDBT_Context* ctx, NDBT_Step* step){
return NDBT_OK;
}
+int
+setDeadlockTimeout(NDBT_Context* ctx, NDBT_Step* step){
+ NdbRestarter restarter;
+ int timeout = ctx->getProperty("TransactionDeadlockTimeout", TIMEOUT);
+
+ NdbConfig conf(GETNDB(step)->getNodeId()+1);
+ unsigned int nodeId = conf.getMasterNodeId();
+ if (!conf.getProperty(nodeId,
+ NODE_TYPE_DB,
+ CFG_DB_TRANSACTION_DEADLOCK_TIMEOUT,
+ &g_org_deadlock))
+ return NDBT_FAILED;
+
+ g_err << "Setting timeout: " << timeout << endl;
+ int val[] = { DumpStateOrd::TcSetTransactionTimeout, timeout };
+ if(restarter.dumpStateAllNodes(val, 2) != 0){
+ return NDBT_FAILED;
+ }
+
+ return NDBT_OK;
+}
+
+int
+getDeadlockTimeout(NDBT_Context* ctx, NDBT_Step* step){
+ NdbRestarter restarter;
+
+ Uint32 val = 0;
+ NdbConfig conf(GETNDB(step)->getNodeId()+1);
+ unsigned int nodeId = conf.getMasterNodeId();
+ if (!conf.getProperty(nodeId,
+ NODE_TYPE_DB,
+ CFG_DB_TRANSACTION_DEADLOCK_TIMEOUT,
+ &val))
+ return NDBT_FAILED;
+
+ if (val < 120000)
+ val = 120000;
+ ctx->setProperty("TransactionDeadlockTimeout", 4*val);
+
+ return NDBT_OK;
+}
+
+int
+resetDeadlockTimeout(NDBT_Context* ctx, NDBT_Step* step){
+ NdbRestarter restarter;
+
+ int val[] = { DumpStateOrd::TcSetTransactionTimeout, g_org_deadlock };
+ if(restarter.dumpStateAllNodes(val, 2) != 0){
+ return NDBT_FAILED;
+ }
+
+ return NDBT_OK;
+}
+
int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){
@@ -333,6 +388,43 @@ int runBuddyTransNoTimeout(NDBT_Context* ctx, NDBT_Step* step){
return result;
}
+int
+runError4012(NDBT_Context* ctx, NDBT_Step* step){
+ int result = NDBT_OK;
+ int loops = ctx->getNumLoops();
+ int stepNo = step->getStepNo();
+
+ int timeout = ctx->getProperty("TransactionDeadlockTimeout", TIMEOUT);
+
+ HugoOperations hugoOps(*ctx->getTab());
+ Ndb* pNdb = GETNDB(step);
+
+ do{
+ // Commit transaction
+ CHECK(hugoOps.startTransaction(pNdb) == 0);
+ CHECK(hugoOps.pkUpdateRecord(pNdb, 0) == 0);
+ int ret = hugoOps.execute_NoCommit(pNdb);
+ if (ret == 0)
+ {
+ int sleep = timeout;
+ ndbout << "Sleeping for " << sleep << " milliseconds" << endl;
+ NdbSleep_MilliSleep(sleep);
+
+ // Expect that transaction has NOT timed-out
+ CHECK(hugoOps.execute_Commit(pNdb) == 0);
+ }
+ else
+ {
+ CHECK(ret == 4012);
+ }
+ } while(false);
+
+ hugoOps.closeTransaction(pNdb);
+
+ return result;
+}
+
+
NDBT_TESTSUITE(testTimeout);
TESTCASE("DontTimeoutTransaction",
"Test that the transaction does not timeout "\
@@ -403,6 +495,15 @@ TESTCASE("BuddyTransNoTimeout5",
FINALIZER(resetTransactionTimeout);
FINALIZER(runClearTable);
}
+TESTCASE("Error4012", ""){
+ TC_PROPERTY("TransactionDeadlockTimeout", 120000);
+ INITIALIZER(runLoadTable);
+ INITIALIZER(getDeadlockTimeout);
+ INITIALIZER(setDeadlockTimeout);
+ STEPS(runError4012, 2);
+ FINALIZER(runClearTable);
+}
+
NDBT_TESTSUITE_END(testTimeout);
int main(int argc, const char** argv){
diff --git a/ndb/test/run-test/Makefile.am b/ndb/test/run-test/Makefile.am
index 60d64a7697f..2c45db50556 100644
--- a/ndb/test/run-test/Makefile.am
+++ b/ndb/test/run-test/Makefile.am
@@ -7,11 +7,10 @@ include $(top_srcdir)/ndb/config/type_mgmapiclient.mk.am
test_PROGRAMS = atrt
test_DATA=daily-basic-tests.txt daily-devel-tests.txt 16node-tests.txt \
- conf-daily-basic-ndbmaster.txt \
- conf-daily-basic-shark.txt \
- conf-daily-devel-ndbmaster.txt \
- conf-daily-sql-ndbmaster.txt \
- conf-daily-basic-dl145a.txt
+ conf-ndbmaster.txt \
+ conf-shark.txt \
+ conf-dl145a.txt
+
test_SCRIPTS=atrt-analyze-result.sh atrt-gather-result.sh atrt-setup.sh \
atrt-clear-result.sh make-config.sh make-index.sh make-html-reports.sh
diff --git a/ndb/test/run-test/conf-daily-devel-ndbmaster.txt b/ndb/test/run-test/conf-daily-devel-ndbmaster.txt
index 8b340e6a39d..51c171a6357 100644
--- a/ndb/test/run-test/conf-daily-devel-ndbmaster.txt
+++ b/ndb/test/run-test/conf-daily-devel-ndbmaster.txt
@@ -17,3 +17,6 @@ FileSystemPath: /space/autotest/run
PortNumber: 16000
ArbitrationRank: 1
DataDir: .
+
+[TCP DEFAULT]
+SendBufferMemory: 2M
diff --git a/ndb/test/run-test/conf-daily-basic-dl145a.txt b/ndb/test/run-test/conf-dl145a.txt
index d8cf8d34d82..d0a240f09d1 100644
--- a/ndb/test/run-test/conf-daily-basic-dl145a.txt
+++ b/ndb/test/run-test/conf-dl145a.txt
@@ -17,3 +17,6 @@ FileSystemPath: /home/ndbdev/autotest/run
PortNumber: 14000
ArbitrationRank: 1
DataDir: .
+
+[TCP DEFAULT]
+SendBufferMemory: 2M
diff --git a/ndb/test/run-test/conf-daily-basic-ndbmaster.txt b/ndb/test/run-test/conf-ndbmaster.txt
index bcd809593f3..89b41850ec0 100644
--- a/ndb/test/run-test/conf-daily-basic-ndbmaster.txt
+++ b/ndb/test/run-test/conf-ndbmaster.txt
@@ -17,3 +17,6 @@ FileSystemPath: /space/autotest/run
PortNumber: 14000
ArbitrationRank: 1
DataDir: .
+
+[TCP DEFAULT]
+SendBufferMemory: 2M
diff --git a/ndb/test/run-test/conf-daily-basic-shark.txt b/ndb/test/run-test/conf-shark.txt
index 6d1f8b64f44..d66d0280d8a 100644
--- a/ndb/test/run-test/conf-daily-basic-shark.txt
+++ b/ndb/test/run-test/conf-shark.txt
@@ -17,3 +17,6 @@ FileSystemPath: /space/autotest/run
PortNumber: 14000
ArbitrationRank: 1
DataDir: .
+
+[TCP DEFAULT]
+SendBufferMemory: 2M
diff --git a/ndb/test/run-test/daily-basic-tests.txt b/ndb/test/run-test/daily-basic-tests.txt
index 59f51044b51..d331a62cc7e 100644
--- a/ndb/test/run-test/daily-basic-tests.txt
+++ b/ndb/test/run-test/daily-basic-tests.txt
@@ -425,14 +425,26 @@ max-time: 500
cmd: testNodeRestart
args: -n Bug15685 T1
+max-time: 500
+cmd: testNodeRestart
+args: -n Bug16772 T1
+
+max-time: 500
+cmd: testSystemRestart
+args: -n Bug18385 T1
+
+max-time: 500
+cmd: testNodeRestart
+args: -n Bug18414 T1
+
# OLD FLEX
max-time: 500
cmd: flexBench
-args: -c 25 -t 10
+args: -c 25 -t 10
max-time: 500
cmd: flexHammer
-args: -r 5 -t 32
+args: -r 5 -t 32
#
# DICT TESTS
diff --git a/ndb/test/run-test/ndb-autotest.sh b/ndb/test/run-test/ndb-autotest.sh
index 4228d2354d3..544897a2aa2 100755
--- a/ndb/test/run-test/ndb-autotest.sh
+++ b/ndb/test/run-test/ndb-autotest.sh
@@ -13,7 +13,7 @@ save_args=$*
VERSION="ndb-autotest.sh version 1.04"
DATE=`date '+%Y-%m-%d'`
-HOST=`hostname`
+HOST=`hostname -s`
export DATE HOST
set -e
@@ -35,6 +35,7 @@ report=yes
clone=5.0-ndb
RUN="daily-basic daily-devel"
conf=autotest.conf
+LOCK=$HOME/.autotest-lock
############################
# Read command line entries#
@@ -66,7 +67,7 @@ done
if [ -f $conf ]
then
- . ./$conf
+ . $conf
else
echo "Can't find config file: $conf"
exit
@@ -105,7 +106,6 @@ fi
# Setup the clone source location #
####################################
-LOCK=$HOME/.autotest-lock
src_clone=$src_clone_base-$clone
#######################################
@@ -299,9 +299,12 @@ choose_conf(){
elif [ -f $test_dir/conf-$1.txt ]
then
echo "$test_dir/conf-$1.txt"
+ elif [ -f $test_dir/conf-$HOST.txt ]
+ echo "$test_dir/conf-$HOST.txt"
else
echo "Unable to find conf file looked for" 1>&2
echo "$test_dir/conf-$1-$HOST.txt and" 1>&2
+ echo "$test_dir/conf-$HOST.txt" 1>&2
echo "$test_dir/conf-$1.txt" 1>&2
exit
fi
@@ -386,7 +389,8 @@ do
awk '{for(i=1;i<='$count';i++)print $i;}'`
echo $run_hosts >> /tmp/filter_hosts.$$
- choose $conf $run_hosts > d.tmp
+ choose $conf $run_hosts > d.tmp.$$
+ sed -e s,CHOOSE_dir,"$install_dir",g < d.tmp.$$ > d.tmp
$mkconfig d.tmp
fi
diff --git a/ndb/test/src/NdbRestarter.cpp b/ndb/test/src/NdbRestarter.cpp
index 91c0963feae..2c16a05240d 100644
--- a/ndb/test/src/NdbRestarter.cpp
+++ b/ndb/test/src/NdbRestarter.cpp
@@ -174,6 +174,39 @@ NdbRestarter::getRandomNodeOtherNodeGroup(int nodeId, int rand){
return -1;
}
+int
+NdbRestarter::getRandomNodeSameNodeGroup(int nodeId, int rand){
+ if (!isConnected())
+ return -1;
+
+ if (getStatus() != 0)
+ return -1;
+
+ int node_group = -1;
+ for(size_t i = 0; i < ndbNodes.size(); i++){
+ if(ndbNodes[i].node_id == nodeId){
+ node_group = ndbNodes[i].node_group;
+ break;
+ }
+ }
+ if(node_group == -1){
+ return -1;
+ }
+
+ Uint32 counter = 0;
+ rand = rand % ndbNodes.size();
+ while(counter++ < ndbNodes.size() &&
+ (ndbNodes[rand].node_id == nodeId ||
+ ndbNodes[rand].node_group != node_group))
+ rand = (rand + 1) % ndbNodes.size();
+
+ if(ndbNodes[rand].node_group == node_group &&
+ ndbNodes[rand].node_id != nodeId)
+ return ndbNodes[rand].node_id;
+
+ return -1;
+}
+
int
NdbRestarter::waitClusterStarted(unsigned int _timeout){
return waitClusterState(NDB_MGM_NODE_STATUS_STARTED, _timeout);
diff --git a/ndb/tools/desc.cpp b/ndb/tools/desc.cpp
index be0f6942db5..408227452a7 100644
--- a/ndb/tools/desc.cpp
+++ b/ndb/tools/desc.cpp
@@ -23,6 +23,7 @@ NDB_STD_OPTS_VARS;
static const char* _dbname = "TEST_DB";
static int _unqualified = 0;
+static int _partinfo = 0;
static struct my_option my_long_options[] =
{
NDB_STD_OPTS("ndb_desc"),
@@ -32,6 +33,9 @@ static struct my_option my_long_options[] =
{ "unqualified", 'u', "Use unqualified table names",
(gptr*) &_unqualified, (gptr*) &_unqualified, 0,
GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 },
+ { "extra-partition-info", 'p', "Print more info per partition",
+ (gptr*) &_partinfo, (gptr*) &_partinfo, 0,
+ GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
};
static void usage()
@@ -45,6 +49,8 @@ static void usage()
my_print_variables(my_long_options);
}
+static void print_part_info(Ndb* pNdb, NDBT_Table* pTab);
+
int main(int argc, char** argv){
NDB_INIT(argv[0]);
const char *load_default_groups[]= { "mysql_cluster",0 };
@@ -109,7 +115,11 @@ int main(int argc, char** argv){
ndbout << (*pIdx) << endl;
}
+
ndbout << endl;
+
+ if (_partinfo)
+ print_part_info(pMyNdb, pTab);
}
else
ndbout << argv[i] << ": " << dict->getNdbError() << endl;
@@ -117,3 +127,71 @@ int main(int argc, char** argv){
return NDBT_ProgramExit(NDBT_OK);
}
+
+struct InfoInfo
+{
+ const char * m_title;
+ NdbRecAttr* m_rec_attr;
+ const NdbDictionary::Column* m_column;
+};
+
+
+static
+void print_part_info(Ndb* pNdb, NDBT_Table* pTab)
+{
+ InfoInfo g_part_info[] = {
+ { "Partition", 0, NdbDictionary::Column::FRAGMENT },
+ { "Row count", 0, NdbDictionary::Column::ROW_COUNT },
+ { "Commit count", 0, NdbDictionary::Column::COMMIT_COUNT },
+ { "Frag memory", 0, NdbDictionary::Column::FRAGMENT_MEMORY },
+ { 0, 0, 0 }
+ };
+
+ ndbout << "-- Per partition info -- " << endl;
+
+ NdbConnection* pTrans = pNdb->startTransaction();
+ if (pTrans == 0)
+ return;
+
+ do
+ {
+ NdbScanOperation* pOp= pTrans->getNdbScanOperation(pTab->getName());
+ if (pOp == NULL)
+ break;
+
+ NdbResultSet* rs= pOp->readTuples(NdbOperation::LM_CommittedRead);
+ if (rs == 0)
+ break;
+
+ if (pOp->interpret_exit_last_row() != 0)
+ break;
+
+ Uint32 i = 0;
+ for(i = 0; g_part_info[i].m_title != 0; i++)
+ {
+ if ((g_part_info[i].m_rec_attr = pOp->getValue(g_part_info[i].m_column)) == 0)
+ break;
+ }
+
+ if (g_part_info[i].m_title != 0)
+ break;
+
+ if (pTrans->execute(NoCommit) != 0)
+ break;
+
+ for (i = 0; g_part_info[i].m_title != 0; i++)
+ ndbout << g_part_info[i].m_title << "\t";
+ ndbout << endl;
+
+ while(rs->nextResult() == 0)
+ {
+ for(i = 0; g_part_info[i].m_title != 0; i++)
+ {
+ ndbout << *g_part_info[i].m_rec_attr << "\t";
+ }
+ ndbout << endl;
+ }
+ } while(0);
+
+ pTrans->close();
+}
diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc
index 1b1326920ad..b386439aed5 100644
--- a/sql/ha_innodb.cc
+++ b/sql/ha_innodb.cc
@@ -513,6 +513,13 @@ convert_error_code_to_mysql(
return(HA_ERR_NO_SAVEPOINT);
} else if (error == (int) DB_LOCK_TABLE_FULL) {
+ /* Since we rolled back the whole transaction, we must
+ tell it also to MySQL so that MySQL knows to empty the
+ cached binlog for this transaction */
+
+ if (thd) {
+ ha_rollback(thd);
+ }
return(HA_ERR_LOCK_TABLE_FULL);
} else {