diff options
-rw-r--r-- | buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml | 2 | ||||
-rw-r--r-- | jstests/sharding/balancing_sessions_collection.js | 159 | ||||
-rw-r--r-- | src/mongo/db/s/balancer/balancer.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/s/balancer/balancer.h | 11 | ||||
-rw-r--r-- | src/mongo/db/s/balancer/balancer_chunk_selection_policy_impl.cpp | 148 | ||||
-rw-r--r-- | src/mongo/shell/servers.js | 18 |
6 files changed, 315 insertions, 31 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml index c47c11a6eec..42affa9b642 100644 --- a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml +++ b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml @@ -28,6 +28,8 @@ selector: - jstests/sharding/index_and_collection_option_propagation.js # New feature in v3.6 mongos - jstests/sharding/logical_time_metadata.js + # New feature in v3.6 mongod. + - jstests/sharding/balancing_sessions_collection.js # New feature in v3.6 mongos and mongod. - jstests/sharding/advance_cluster_time_action_type.js - jstests/sharding/advance_logical_time_with_valid_signature.js diff --git a/jstests/sharding/balancing_sessions_collection.js b/jstests/sharding/balancing_sessions_collection.js new file mode 100644 index 00000000000..17292d13c9b --- /dev/null +++ b/jstests/sharding/balancing_sessions_collection.js @@ -0,0 +1,159 @@ +/* + * Tests that the balancer splits the sessions collection and uniformly distributes the chunks + * across shards in the cluster. + * @tags: [resource_intensive] + */ +(function() { + "use strict"; + + /* + * Returns the number of chunks for the sessions collection. + */ + function getNumTotalChunks() { + return configDB.chunks.count({ns: kSessionsNs}); + } + + /* + * Returns the number of chunks for the sessions collection that are the given shard. + */ + function getNumChunksOnShard(shardName) { + return configDB.chunks.count({ns: kSessionsNs, shard: shardName}); + } + + /* + * Returns the number of docs in the sessions collection on the given host. + */ + function getNumDocs(conn) { + return conn.getCollection(kSessionsNs).count(); + } + + /* + * Starts a replica-set shard, adds the shard to the cluster, and increments numShards. + * Returns the ReplSetTest object for the shard. + */ + function addShardToCluster() { + const shardName = clusterName + "-rs" + numShards; + + const replTest = new ReplSetTest({name: shardName, nodes: 1}); + replTest.startSet({shardsvr: ""}); + replTest.initiate(); + + assert.commandWorked(st.s.adminCommand({addShard: replTest.getURL(), name: shardName})); + numShards++; + return replTest; + } + + /* + * Removes the given shard from the cluster, waits util the state is completed, and + * decrements numShards. + */ + function removeShardFromCluster(shardName) { + assert.commandWorked(st.s.adminCommand({removeShard: shardName})); + assert.soon(function() { + const res = st.s.adminCommand({removeShard: shardName}); + assert.commandWorked(res); + return ("completed" == res.state); + }, "failed to remove shard " + shardName, kBalancerTimeoutMS); + numShards--; + } + + /* + * Returns true if the chunks for the sessions collection are evenly distributed across the + * given shards. That is, the number of chunks on the most loaded shard and on the least + * loaded shard differs by no more than 1. + */ + function isBalanced(shardNames) { + const expectedMinNumChunksPerShard = Math.floor(kExpectedNumChunks / shardNames.length); + + let minNumChunks = Number.MAX_VALUE; + let maxNumChunks = 0; + for (let shardName of shardNames) { + const numChunks = getNumChunksOnShard(shardName); + minNumChunks = Math.min(numChunks, minNumChunks); + maxNumChunks = Math.max(numChunks, maxNumChunks); + } + + return (maxNumChunks - minNumChunks <= 1) && (minNumChunks == expectedMinNumChunksPerShard); + } + + /* + * Returns the standard deviation for given numbers. + */ + function computeStdDev(nums) { + const mean = nums.reduce((a, b) => a + b) / nums.length; + return Math.sqrt(nums.map(x => Math.pow(x - mean, 2)).reduce((a, b) => a + b) / + nums.length); + } + + const kMinNumChunks = 100; + const kExpectedNumChunks = 128; // the balancer rounds kMinNumChunks to the next power of 2. + const kNumSessions = 10000; + const kBalancerTimeoutMS = 5 * 60 * 1000; + + let numShards = 2; + const clusterName = jsTest.name(); + const st = new ShardingTest({ + name: clusterName, + shards: numShards, + other: + {configOptions: {setParameter: {minNumChunksForSessionsCollection: kMinNumChunks}}} + }); + const kSessionsNs = "config.system.sessions"; + const configDB = st.s.getDB("config"); + + // There is only one chunk initially. + assert.eq(1, getNumTotalChunks()); + + st.startBalancer(); + + jsTest.log( + "Verify that the balancer splits the initial chunks and distributes chunks evenly across existing shards"); + + assert.soon(() => getNumTotalChunks() == kExpectedNumChunks, + "balancer did not split the initial chunk for the sessions collection"); + assert.soon(() => isBalanced([st.shard0.shardName, st.shard1.shardName]), + "balancer did not distribute chunks evenly across existing shards", + kBalancerTimeoutMS); + + jsTest.log( + "Verify that the balancer redistributes chunks when more shards are added to the cluster"); + const shard2 = addShardToCluster(); + const shard3 = addShardToCluster(); + const shard4 = addShardToCluster(); + + assert.soon( + () => isBalanced( + [st.shard0.shardName, st.shard1.shardName, shard2.name, shard3.name, shard4.name]), + "balancer did not redistribute chunks evenly after more shards were added", + kBalancerTimeoutMS); + + jsTest.log("Verify that the session docs are distributed almost evenly across shards"); + // Start sessions and trigger a refresh to flush the sessions to the sessions collection. + for (let i = 0; i < kNumSessions; i++) { + assert.commandWorked(st.s.adminCommand({startSession: 1})); + } + assert.commandWorked(st.s.adminCommand({refreshLogicalSessionCacheNow: 1})); + assert.lte(kNumSessions, getNumDocs(st.s)); + + const shards = + [st.shard0, st.shard1, shard2.getPrimary(), shard3.getPrimary(), shard4.getPrimary()]; + const numDocsOnShards = shards.map(shard => getNumDocs(shard)); + assert.lt(computeStdDev(numDocsOnShards), 0.1 * kNumSessions / shards.length); + + jsTest.log( + "Verify that the balancer redistributes chunks when shards are removed from the cluster"); + removeShardFromCluster(shard2.name); + + assert.soon( + () => isBalanced([st.shard0.shardName, st.shard1.shardName, shard3.name, shard4.name]), + "balancer did not redistribute chunks evenly after one of the shards was removed", + kBalancerTimeoutMS); + assert.eq(0, getNumChunksOnShard(shard2.name)); + + st.stopBalancer(); + + st.stop(); + shard2.stopSet(); + shard3.stopSet(); + shard4.stopSet(); +}()); diff --git a/src/mongo/db/s/balancer/balancer.cpp b/src/mongo/db/s/balancer/balancer.cpp index 6caddbe97f5..41367734c8d 100644 --- a/src/mongo/db/s/balancer/balancer.cpp +++ b/src/mongo/db/s/balancer/balancer.cpp @@ -367,9 +367,9 @@ void Balancer::_mainThread() { OCCASIONALLY warnOnMultiVersion( uassertStatusOK(_clusterStats->getStats(opCtx.get()))); - Status status = _enforceTagRanges(opCtx.get()); + Status status = _splitChunksIfNeeded(opCtx.get()); if (!status.isOK()) { - warning() << "Failed to enforce tag ranges" << causedBy(status); + warning() << "Failed to split chunks" << causedBy(status); } else { LOG(1) << "Done enforcing tag range boundaries."; } @@ -540,7 +540,7 @@ bool Balancer::_checkOIDs(OperationContext* opCtx) { return true; } -Status Balancer::_enforceTagRanges(OperationContext* opCtx) { +Status Balancer::_splitChunksIfNeeded(OperationContext* opCtx) { auto chunksToSplitStatus = _chunkSelectionPolicy->selectChunksToSplit(opCtx); if (!chunksToSplitStatus.isOK()) { return chunksToSplitStatus.getStatus(); @@ -565,7 +565,7 @@ Status Balancer::_enforceTagRanges(OperationContext* opCtx) { ChunkRange(splitInfo.minKey, splitInfo.maxKey), splitInfo.splitKeys); if (!splitStatus.isOK()) { - warning() << "Failed to enforce tag range for chunk " << redact(splitInfo.toString()) + warning() << "Failed to split chunk " << redact(splitInfo.toString()) << causedBy(redact(splitStatus.getStatus())); } } diff --git a/src/mongo/db/s/balancer/balancer.h b/src/mongo/db/s/balancer/balancer.h index da54de484ac..6fd8b7fbc4b 100644 --- a/src/mongo/db/s/balancer/balancer.h +++ b/src/mongo/db/s/balancer/balancer.h @@ -183,10 +183,15 @@ private: bool _checkOIDs(OperationContext* opCtx); /** - * Iterates through all chunks in all collections and ensures that no chunks straddle tag - * boundary. If any do, they will be split. + * Iterates through all chunks in all collections. If the collection is the sessions collection, + * checks if the number of chunks is greater than or equal to the configured minimum number of + * chunks for the sessions collection (minNumChunksForSessionsCollection). If it isn't, + * calculates split points that evenly partition the key space into N ranges (where N is + * minNumChunksForSessionsCollection rounded up the next power of 2), and splits any chunks that + * straddle those split points. If the collection is any other collection, splits any chunks + * that straddle tag boundaries. */ - Status _enforceTagRanges(OperationContext* opCtx); + Status _splitChunksIfNeeded(OperationContext* opCtx); /** * Schedules migrations for the specified set of chunks and returns how many chunks were diff --git a/src/mongo/db/s/balancer/balancer_chunk_selection_policy_impl.cpp b/src/mongo/db/s/balancer/balancer_chunk_selection_policy_impl.cpp index 25b328b3fd4..47610d5bec3 100644 --- a/src/mongo/db/s/balancer/balancer_chunk_selection_policy_impl.cpp +++ b/src/mongo/db/s/balancer/balancer_chunk_selection_policy_impl.cpp @@ -39,6 +39,8 @@ #include "mongo/base/status_with.h" #include "mongo/bson/bsonobj_comparator_interface.h" +#include "mongo/db/server_parameters.h" +#include "mongo/platform/bits.h" #include "mongo/s/catalog/sharding_catalog_client.h" #include "mongo/s/catalog/type_chunk.h" #include "mongo/s/catalog/type_collection.h" @@ -51,6 +53,20 @@ namespace mongo { +// The minimum number of chunks for config.system.sessions collection. +MONGO_EXPORT_SERVER_PARAMETER(minNumChunksForSessionsCollection, int, 1024) + ->withValidator([](const int& newVal) { + if (newVal < 1) { + return Status(ErrorCodes::BadValue, + "minNumChunksForSessionsCollection must not be less than 1"); + } + if (newVal > 1000000) { + return Status(ErrorCodes::BadValue, + "minNumChunksForSessionsCollection must not be greater than 1000000"); + } + return Status::OK(); + }); + using MigrateInfoVector = BalancerChunkSelectionPolicy::MigrateInfoVector; using SplitInfoVector = BalancerChunkSelectionPolicy::SplitInfoVector; using std::shared_ptr; @@ -177,6 +193,95 @@ private: BSONObjIndexedMap<BalancerChunkSelectionPolicy::SplitInfo> _chunkSplitPoints; }; +/** + * Populates splitCandidates with chunk and splitPoint pairs for chunks that violate tag + * range boundaries. + */ +void getSplitCandidatesToEnforceTagRanges(const ChunkManager* cm, + const DistributionStatus& distribution, + SplitCandidatesBuffer* splitCandidates) { + const auto& globalMax = cm->getShardKeyPattern().getKeyPattern().globalMax(); + + // For each tag range, find chunks that need to be split. + for (const auto& tagRangeEntry : distribution.tagRanges()) { + const auto& tagRange = tagRangeEntry.second; + + const auto chunkAtZoneMin = cm->findIntersectingChunkWithSimpleCollation(tagRange.min); + invariant(chunkAtZoneMin->getMax().woCompare(tagRange.min) > 0); + + if (chunkAtZoneMin->getMin().woCompare(tagRange.min)) { + splitCandidates->addSplitPoint(chunkAtZoneMin, tagRange.min); + } + + // The global max key can never fall in the middle of a chunk. + if (!tagRange.max.woCompare(globalMax)) + continue; + + const auto chunkAtZoneMax = cm->findIntersectingChunkWithSimpleCollation(tagRange.max); + + // We need to check that both the chunk's minKey does not match the zone's max and also that + // the max is not equal, which would only happen in the case of the zone ending in MaxKey. + if (chunkAtZoneMax->getMin().woCompare(tagRange.max) && + chunkAtZoneMax->getMax().woCompare(tagRange.max)) { + splitCandidates->addSplitPoint(chunkAtZoneMax, tagRange.max); + } + } +} + +/** + * If the number of chunks as given by the ChunkManager is less than the configured minimum + * number of chunks for the sessions collection (minNumChunksForSessionsCollection), calculates + * split points that evenly partition the key space into N ranges (where N is + * minNumChunksForSessionsCollection rounded up to the next power of 2), and populates + * splitCandidates with chunk and splitPoint pairs for chunks that need to split. + */ +void getSplitCandidatesForSessionsCollection(OperationContext* opCtx, + const ChunkManager* cm, + SplitCandidatesBuffer* splitCandidates) { + const auto minNumChunks = minNumChunksForSessionsCollection.load(); + + if (cm->numChunks() >= minNumChunks) { + return; + } + + // Use the next power of 2 as the target number of chunks. + const size_t numBits = 64 - countLeadingZeros64(minNumChunks - 1); + const uint32_t numChunks = 1 << numBits; + + // Compute split points for _id.id that partition the UUID 128-bit data space into numChunks + // equal ranges. Since the numChunks is a power of 2, the split points are the permutations + // of the prefix numBits right-padded with 0's. + std::vector<BSONObj> splitPoints; + for (uint32_t i = 1; i < numChunks; i++) { + // Start with a buffer of 0's. + std::array<uint8_t, 16> buf{0b0}; + + // Left-shift i to fill the remaining bits in the prefix 32 bits with 0's. + const uint32_t high = i << (CHAR_BIT * 4 - numBits); + + // Fill the prefix 4 bytes with high's bytes. + buf[0] = static_cast<uint8_t>(high >> CHAR_BIT * 3); + buf[1] = static_cast<uint8_t>(high >> CHAR_BIT * 2); + buf[2] = static_cast<uint8_t>(high >> CHAR_BIT * 1); + buf[3] = static_cast<uint8_t>(high); + + ConstDataRange cdr(reinterpret_cast<const char*>(buf.data()), sizeof(buf)); + splitPoints.push_back(BSON("_id" << BSON("id" << UUID::fromCDR(cdr)))); + } + + // For each split point, find a chunk that needs to be split. + for (auto& splitPoint : splitPoints) { + const auto chunkAtSplitPoint = cm->findIntersectingChunkWithSimpleCollation(splitPoint); + invariant(chunkAtSplitPoint->getMax().woCompare(splitPoint) > 0); + + if (chunkAtSplitPoint->getMin().woCompare(splitPoint)) { + splitCandidates->addSplitPoint(chunkAtSplitPoint, splitPoint); + } + } + + return; +} + } // namespace BalancerChunkSelectionPolicyImpl::BalancerChunkSelectionPolicyImpl(ClusterStatistics* clusterStats, @@ -222,8 +327,13 @@ StatusWith<SplitInfoVector> BalancerChunkSelectionPolicyImpl::selectChunksToSpli // Namespace got dropped before we managed to get to it, so just skip it continue; } else if (!candidatesStatus.isOK()) { - warning() << "Unable to enforce tag range policy for collection " << nss.ns() - << causedBy(candidatesStatus.getStatus()); + if (nss.ns() == "config.system.sessions") { + warning() << "Unable to split sessions collection chunks " + << causedBy(candidatesStatus.getStatus()); + } else { + warning() << "Unable to enforce tag range policy for collection " << nss.ns() + << causedBy(candidatesStatus.getStatus()); + } continue; } @@ -377,8 +487,6 @@ StatusWith<SplitInfoVector> BalancerChunkSelectionPolicyImpl::_getSplitCandidate const auto cm = routingInfoStatus.getValue().cm().get(); - const auto& shardKeyPattern = cm->getShardKeyPattern().getKeyPattern(); - const auto collInfoStatus = createCollectionDistributionStatus(opCtx, shardStats, cm); if (!collInfoStatus.isOK()) { return collInfoStatus.getStatus(); @@ -389,28 +497,20 @@ StatusWith<SplitInfoVector> BalancerChunkSelectionPolicyImpl::_getSplitCandidate // Accumulate split points for the same chunk together SplitCandidatesBuffer splitCandidates(nss, cm->getVersion()); - for (const auto& tagRangeEntry : distribution.tagRanges()) { - const auto& tagRange = tagRangeEntry.second; - - const auto chunkAtZoneMin = cm->findIntersectingChunkWithSimpleCollation(tagRange.min); - invariant(chunkAtZoneMin->getMax().woCompare(tagRange.min) > 0); - - if (chunkAtZoneMin->getMin().woCompare(tagRange.min)) { - splitCandidates.addSplitPoint(chunkAtZoneMin, tagRange.min); + if (nss.ns() == "config.system.sessions") { + if (!distribution.tags().empty()) { + str::stream builder; + builder << "Ignoring zones for the sessions collection: "; + for (const auto& tag : distribution.tags()) { + builder << tag << ", "; + } + const std::string msg = builder; + warning() << msg; } - // The global max key can never fall in the middle of a chunk - if (!tagRange.max.woCompare(shardKeyPattern.globalMax())) - continue; - - const auto chunkAtZoneMax = cm->findIntersectingChunkWithSimpleCollation(tagRange.max); - - // We need to check that both the chunk's minKey does not match the zone's max and also that - // the max is not equal, which would only happen in the case of the zone ending in MaxKey. - if (chunkAtZoneMax->getMin().woCompare(tagRange.max) && - chunkAtZoneMax->getMax().woCompare(tagRange.max)) { - splitCandidates.addSplitPoint(chunkAtZoneMax, tagRange.max); - } + getSplitCandidatesForSessionsCollection(opCtx, cm, &splitCandidates); + } else { + getSplitCandidatesToEnforceTagRanges(cm, distribution, &splitCandidates); } return splitCandidates.done(); diff --git a/src/mongo/shell/servers.js b/src/mongo/shell/servers.js index e6fb65ecc12..3f613f90f95 100644 --- a/src/mongo/shell/servers.js +++ b/src/mongo/shell/servers.js @@ -999,6 +999,14 @@ var MongoRunner, _startMongod, startMongoProgram, runMongoProgram, startMongoPro .length > 0); } + function argArrayContainsSetParameterValue(value) { + assert(value.endsWith("="), + "Expected value argument to be of the form <parameterName>="); + return argArray.some(function(el) { + return typeof el === "string" && el.startsWith(value); + }); + } + // programName includes the version, e.g., mongod-3.2. // baseProgramName is the program name without any version information, e.g., mongod. let programName = argArray[0]; @@ -1074,6 +1082,16 @@ var MongoRunner, _startMongod, startMongoProgram, runMongoProgram, startMongoPro } } + // New mongod-specific options in 3.6.x + if (!programMajorMinorVersion || programMajorMinorVersion >= 360) { + if ((jsTest.options().setParameters === undefined || + jsTest.options().setParameters['minNumChunksForSessionsCollection'] === + undefined) && + !argArrayContainsSetParameterValue('minNumChunksForSessionsCollection=')) { + argArray.push(...['--setParameter', "minNumChunksForSessionsCollection=1"]); + } + } + // TODO: Make this unconditional in 3.8. if (!programMajorMinorVersion || programMajorMinorVersion > 304) { let hasParam = false; |