diff options
author | Jess Balint <jbalint@gmail.com> | 2022-02-24 15:55:13 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-02-24 16:13:36 +0000 |
commit | df25c71b8674a78e17468f48bcda5285decb9246 (patch) | |
tree | 77839dd4f48e39a4cf3dcb9e4a71e4f6ad4698c7 | |
parent | ac73e3cd3465814b4bccd086792a377dce55a161 (diff) | |
download | mongo-df25c71b8674a78e17468f48bcda5285decb9246.tar.gz |
SERVER-40691 $nin:[[],...] queries are not indexedr4.4.13-rc0r4.4.13
(cherry picked from commit 6889d8d80d0e69cc5c3de68eb05fe89e9b93cff4)
-rw-r--r-- | jstests/core/null_query_semantics.js | 39 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds_builder.cpp | 26 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds_builder.h | 10 | ||||
-rw-r--r-- | src/mongo/db/query/planner_ixselect.cpp | 29 | ||||
-rw-r--r-- | src/mongo/db/query/planner_ixselect.h | 14 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_index_test.cpp | 49 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_test_lib.cpp | 7 | ||||
-rw-r--r-- | src/mongo/s/query/results_merger_test_fixture.h | 2 |
8 files changed, 164 insertions, 12 deletions
diff --git a/jstests/core/null_query_semantics.js b/jstests/core/null_query_semantics.js index f858b59831f..b4531880bf3 100644 --- a/jstests/core/null_query_semantics.js +++ b/jstests/core/null_query_semantics.js @@ -314,6 +314,45 @@ function testNotEqualsNullSemantics() { projectResults); }()); + // Test the semantics of the query {a: {$nin: [null, []]}}. + (function testNotInNullAndEmptyArrayQuery() { + const query = {a: {$nin: [null, []]}}; + const noProjectResults = coll.find(query).toArray(); + const expected = [ + // Documents excluded by the query predicate are commented + // out below. + {_id: "a_empty_subobject", a: {}}, + //{_id: "a_null", a: null}, + {_id: "a_number", a: 4}, + {_id: "a_subobject_b_not_null", a: {b: "hi"}}, + {_id: "a_subobject_b_null", a: {b: null}}, + {_id: "a_subobject_b_undefined", a: {b: undefined}}, + // Semantic difference during backport: this document doesn't match "null" in this + // version (c.f. SERVER-21929). + {_id: "a_undefined", a: undefined}, + //{_id: "no_a"}, + + //{_id: "a_double_array", a: [[]]}, + //{_id: "a_empty_array", a: []}, + {_id: "a_object_array_all_b_nulls", a: [{b: null}, {b: undefined}, {b: null}, {}]}, + {_id: "a_object_array_no_b_nulls", a: [{b: 1}, {b: 3}, {b: "string"}]}, + {_id: "a_object_array_some_b_nulls", a: [{b: null}, {b: 3}, {b: null}]}, + {_id: "a_object_array_some_b_undefined", a: [{b: undefined}, {b: 3}]}, + {_id: "a_object_array_some_b_missing", a: [{b: 3}, {}]}, + //{_id: "a_value_array_all_nulls", a: [null, null]}, + {_id: "a_value_array_no_nulls", a: [1, "string", 4]}, + //{_id: "a_value_array_with_null", a: [1, "string", null, 4]}, + // Semantic difference during backport: this document doesn't match "null" in this + // version (c.f. SERVER-21929). + {_id: "a_value_array_with_undefined", a: [1, "string", undefined, 4]}, + ]; + + assert(resultsEq(noProjectResults, expected), noProjectResults); + + const projectResults = coll.find(query, projectToOnlyA).toArray(); + assert(resultsEq(projectResults, extractAValues(expected)), projectResults); + }()); + (function testNotInNullAndRegexQuery() { const query = {a: {$nin: [null, /^str.*/]}}; const noProjectResults = coll.find(query).toArray(); diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp index 1f8dcae97e9..742faf4f938 100644 --- a/src/mongo/db/query/index_bounds_builder.cpp +++ b/src/mongo/db/query/index_bounds_builder.cpp @@ -121,6 +121,7 @@ bool stringMayHaveUnescapedPipe(StringData str) { const BSONObj kUndefinedElementObj = BSON("" << BSONUndefined); const BSONObj kNullElementObj = BSON("" << BSONNULL); +const BSONObj kEmptyArrayElementObj = BSON("" << BSONArray()); const Interval kHashedUndefinedInterval = IndexBoundsBuilder::makePointInterval( ExpressionMapping::hash(kUndefinedElementObj.firstElement())); @@ -558,6 +559,31 @@ void IndexBoundsBuilder::_translatePredicate(const MatchExpression* expr, return; } + if (MatchExpression::MATCH_IN == child->matchType()) { + auto ime = static_cast<const InMatchExpression*>(child); + if (QueryPlannerIXSelect::canUseIndexForNin(ime)) { + // Permit documents with an undefined value to be returned from $nin index scan. The + // empty array is represented as undefined in the index but the empty array values + // are filtered out. This is required to match the collection scan (no index) + // behavior. This was later changed by SERVER-21929. + BSONObjBuilder bob; + bob.appendMinKey(""); + bob.appendUndefined(""); + oilOut->intervals.push_back(makeRangeInterval( + bob.asTempObj().getOwned(), BoundInclusion::kIncludeBothStartAndEndKeys)); + bob.resetToEmpty(); + bob.appendNull(""); + bob.appendMaxKey(""); + oilOut->intervals.push_back( + makeRangeInterval(bob.done().getOwned(), BoundInclusion::kIncludeEndKeyOnly)); + unionize(oilOut); + if (index.pathHasMultikeyComponent(elt.fieldNameStringData())) { + *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH; + } + return; + } + } + _translatePredicate(child, elt, index, oilOut, tightnessOut); oilOut->complement(); diff --git a/src/mongo/db/query/index_bounds_builder.h b/src/mongo/db/query/index_bounds_builder.h index 0453df842cb..69d72549e0f 100644 --- a/src/mongo/db/query/index_bounds_builder.h +++ b/src/mongo/db/query/index_bounds_builder.h @@ -48,6 +48,16 @@ public: * Describes various degrees of precision with which predicates can be evaluated based * on the index bounds. * + * Exact vs. inexact is about whether or not you will need to apply a predicate after scanning, + * and fetch vs. not fetch is whether you need data which is not in the index to answer the + * query. Often if you have inexact bounds it causes you to need more than the index data, but + * not always. For example, to correctly match null predicates like {a: {$eq: null}} you may + * need to fetch the data to distinguish between a null and missing 'a' value (the index stores + * both with a literal null), so would need INEXACT_FETCH bounds. And as an INEXACT_COVERED + * example you could think of something like $mod where you can apply the predicate to the data + * directly in the index, but $mod won't generate tight bounds so you still need to apply the + * predicate. + * * The integer values of the enum are significant, and are assigned in order of * increasing tightness. These values are used when we need to do comparison between two * BoundsTightness values. Such comparisons can answer questions such as "Does predicate diff --git a/src/mongo/db/query/planner_ixselect.cpp b/src/mongo/db/query/planner_ixselect.cpp index 2457f15b828..f1a3efa85fc 100644 --- a/src/mongo/db/query/planner_ixselect.cpp +++ b/src/mongo/db/query/planner_ixselect.cpp @@ -174,12 +174,15 @@ bool QueryPlannerIXSelect::notEqualsNullCanUseIndex(const IndexEntry& index, } } -static double fieldWithDefault(const BSONObj& infoObj, const string& name, double def) { - BSONElement e = infoObj[name]; - if (e.isNumber()) { - return e.numberDouble(); - } - return def; +bool QueryPlannerIXSelect::canUseIndexForNin(const InMatchExpression* ime) { + const std::vector<BSONElement>& inList = ime->getEqualities(); + auto containsNull = [](const BSONElement& elt) { return elt.type() == jstNULL; }; + auto containsEmptyArray = [](const BSONElement& elt) { + return elt.type() == Array && elt.embeddedObject().isEmpty(); + }; + return ime->getRegexes().empty() && inList.size() == 2 && + std::any_of(inList.begin(), inList.end(), containsNull) && + std::any_of(inList.begin(), inList.end(), containsEmptyArray); } /** @@ -435,10 +438,6 @@ bool QueryPlannerIXSelect::_compatible(const BSONElement& keyPatternElt, return false; } - if (isEqualsArrayOrNotInArray(child)) { - return false; - } - // Most of the time we can't use a multikey index for a $ne: null query, however there // are a few exceptions around $elemMatch. const bool isNotEqualsNull = isQueryNegatingEqualToNull(node); @@ -451,6 +450,12 @@ bool QueryPlannerIXSelect::_compatible(const BSONElement& keyPatternElt, // If it's a negated $in, it can't have any REGEX's inside. if (MatchExpression::MATCH_IN == childtype) { InMatchExpression* ime = static_cast<InMatchExpression*>(node->getChild(0)); + + if (canUseIndexForNin(ime)) { + // This is a case that we know is supported. + return true; + } + if (!ime->getRegexes().empty()) { return false; } @@ -461,6 +466,10 @@ bool QueryPlannerIXSelect::_compatible(const BSONElement& keyPatternElt, return false; } } + + if (isEqualsArrayOrNotInArray(child)) { + return false; + } } // If this is an $elemMatch value, make sure _all_ of the children can use the index. diff --git a/src/mongo/db/query/planner_ixselect.h b/src/mongo/db/query/planner_ixselect.h index 0bd1accd5bc..84f00d656da 100644 --- a/src/mongo/db/query/planner_ixselect.h +++ b/src/mongo/db/query/planner_ixselect.h @@ -158,6 +158,20 @@ public: */ static bool logicalNodeMayBeSupportedByAnIndex(const MatchExpression* queryExpr); + /** + * We can use an index for this special case: {$not:{$in:[null, []]}}. Return true if this is + * the expression (modulo in-list ordering) and it doesn't contain any regexes. + * + * Why is this case special? An equality expression for "null" will match both documents with a + * literal null for the specified key, and those where the key is not present. An equality + * expression for "[]" (which is stored as "undefined" in the index) will match documents with + * an empty array or an array with an empty array element. If we negate either of this bounds in + * isolation, we may produce incomplete results wrt the other expression. If we negate the + * composition of the two, we can properly return complete results excluding both null and empty + * array values. + */ + static bool canUseIndexForNin(const InMatchExpression* ime); + private: /** * Used to keep track of if any $elemMatch predicates were encountered when walking a diff --git a/src/mongo/db/query/query_planner_index_test.cpp b/src/mongo/db/query/query_planner_index_test.cpp index 87bd2d584d1..559afce88b9 100644 --- a/src/mongo/db/query/query_planner_index_test.cpp +++ b/src/mongo/db/query/query_planner_index_test.cpp @@ -157,6 +157,55 @@ TEST_F(QueryPlannerTest, NegationInElemMatchDoesNotUseSparseIndex) { assertHasOnlyCollscan(); } +TEST_F(QueryPlannerTest, NinListWithOnlyNullAndEmptyArrayShouldUseMultikeyIndex) { + params.options = QueryPlannerParams::NO_TABLE_SCAN; + // Use a multikey index. + addIndex(fromjson("{a: 1}"), true); + runQuery(fromjson("{a: { $nin: [[], null] }}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {a: {$not: {$in: [null, []]}}}, node: {ixscan: " + "{pattern: {a: 1}, bounds: " + "{a: [" + "['MinKey', undefined, true, true]," + "[null, 'MaxKey', false, true]" + "]}}}}}"); +} + +TEST_F(QueryPlannerTest, NinListWithOnlyNullAndEmptyArrayShouldUseIndex) { + params.options = QueryPlannerParams::NO_TABLE_SCAN; + // Use an index which is not multikey. + addIndex(fromjson("{a: 1}")); + runQuery(fromjson("{a: { $nin: [[], null] }}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {node: {ixscan: " + "{pattern: {a: 1}, bounds: " + "{a: [" + "['MinKey', undefined, true, true]," + "[null, 'MaxKey', false, true]" + "]}}}}}"); +} + +TEST_F(QueryPlannerTest, NinListWithNullShouldNotUseIndex) { + addIndex(fromjson("{a: 1}"), true); + runQuery(fromjson("{a: { $nin: [null] }}")); + assertHasOnlyCollscan(); +} + +TEST_F(QueryPlannerTest, NinListWithRegexCannotUseIndex) { + addIndex(fromjson("{a: 1}"), true); + // This matches the [[], null] pattern but also has a regex. + runQuery(fromjson("{a: { $nin: [[], null, /abc/] }}")); + assertHasOnlyCollscan(); +} + +TEST_F(QueryPlannerTest, NinListWithNonEmptyArrayShouldNotUseIndex) { + addIndex(fromjson("{a: 1}"), true); + runQuery(fromjson("{a: { $nin: [[], [1]] }}")); + assertHasOnlyCollscan(); +} + TEST_F(QueryPlannerTest, SparseIndexCannotSupportEqualsNull) { addIndex(BSON("i" << 1), false, // multikey diff --git a/src/mongo/db/query/query_planner_test_lib.cpp b/src/mongo/db/query/query_planner_test_lib.cpp index 39609566056..40beb098874 100644 --- a/src/mongo/db/query/query_planner_test_lib.cpp +++ b/src/mongo/db/query/query_planner_test_lib.cpp @@ -81,8 +81,13 @@ bool filterMatches(const BSONObj& testFilter, if (!statusWithMatcher.isOK()) { return false; } - const std::unique_ptr<MatchExpression> root = std::move(statusWithMatcher.getValue()); + std::unique_ptr<MatchExpression> root = std::move(statusWithMatcher.getValue()); CanonicalQuery::sortTree(root.get()); + if (root->matchType() == mongo::MatchExpression::NOT) { + // Ideally we would optimize() everything, but some of the tests depend on structural + // equivalence of single-arg $or expressions. + root = MatchExpression::optimize(std::move(root)); + } std::unique_ptr<MatchExpression> trueFilter(trueFilterNode->filter->shallowClone()); CanonicalQuery::sortTree(trueFilter.get()); return trueFilter->equivalent(root.get()); diff --git a/src/mongo/s/query/results_merger_test_fixture.h b/src/mongo/s/query/results_merger_test_fixture.h index 8fc20eb2903..51b2d79cb8d 100644 --- a/src/mongo/s/query/results_merger_test_fixture.h +++ b/src/mongo/s/query/results_merger_test_fixture.h @@ -145,7 +145,7 @@ protected: void scheduleNetworkResponses(std::vector<CursorResponse> responses) { std::vector<BSONObj> objs; for (const auto& cursorResponse : responses) { - // For tests of the AsyncResultsMerger, all CursorRepsonses scheduled by the tests are + // For tests of the AsyncResultsMerger, all CursorResponses scheduled by the tests are // subsequent responses, since the AsyncResultsMerger will only ever run getMores. objs.push_back(cursorResponse.toBSON(CursorResponse::ResponseType::SubsequentResponse)); } |