summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
authorDavid Percy <david.percy@mongodb.com>2023-04-14 21:11:17 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-04-25 21:20:56 +0000
commit8ecaea0cba2e7b121c189d4d4eedf7e3d725dc21 (patch)
treee55b4704262b6265f3da8c21a31917348b8016e7 /src/mongo/db
parent9f160dea3f2d311efb8f981c239e717b79a09f65 (diff)
downloadmongo-8ecaea0cba2e7b121c189d4d4eedf7e3d725dc21.tar.gz
SERVER-70407 [CQF] Document Memo, MemoIntegrator; remove dead code
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp19
-rw-r--r--src/mongo/db/query/optimizer/cascades/memo.cpp217
-rw-r--r--src/mongo/db/query/optimizer/cascades/memo.h118
-rw-r--r--src/mongo/db/query/optimizer/cascades/memo_defs.h10
-rw-r--r--src/mongo/db/query/optimizer/cascades/rewrite_queues.h2
5 files changed, 150 insertions, 216 deletions
diff --git a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
index 158a0a1694a..e68098c1eb1 100644
--- a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
+++ b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
@@ -124,7 +124,7 @@ std::pair<GroupIdType, NodeIdSet> LogicalRewriter::addNode(const ABT& node,
const GroupIdType targetGroupId,
const LogicalRewriteType rule,
const bool addExistingNodeWithNewChild) {
- NodeIdSet insertNodeIds;
+ NodeIdSet insertedNodeIds;
Memo::NodeTargetGroupMap targetGroupMap;
if (targetGroupId >= 0) {
@@ -135,15 +135,18 @@ std::pair<GroupIdType, NodeIdSet> LogicalRewriter::addNode(const ABT& node,
Memo::Context{&_metadata, &_debugInfo, &_logicalPropsDerivation, &_cardinalityEstimator},
node,
std::move(targetGroupMap),
- insertNodeIds,
- rule,
- addExistingNodeWithNewChild);
+ insertedNodeIds,
+ rule);
uassert(6624046,
"Result group is not the same as target group",
targetGroupId < 0 || targetGroupId == resultGroupId);
- for (const MemoLogicalNodeId& nodeMemoId : insertNodeIds) {
+ // Every memo group that was extended with a new node may have new rewrites that can apply to
+ // it, so enqueue each of these groups to be visited by a rewrite later.
+ for (const MemoLogicalNodeId& nodeMemoId : insertedNodeIds) {
+ // However, if 'addExistingNodeWithNewChild' then don't schedule the 'targetGroupId' for new
+ // rewrites, to avoid applying the same rewrite forever.
if (addExistingNodeWithNewChild && nodeMemoId._groupId == targetGroupId) {
continue;
}
@@ -156,7 +159,7 @@ std::pair<GroupIdType, NodeIdSet> LogicalRewriter::addNode(const ABT& node,
}
}
- return {resultGroupId, std::move(insertNodeIds)};
+ return {resultGroupId, std::move(insertedNodeIds)};
}
void LogicalRewriter::clearGroup(const GroupIdType groupId) {
@@ -705,6 +708,10 @@ static void convertFilterToSargableNode(ABT::reference_type node,
IndexReqTarget::Complete,
filterNode.getChild());
if (conversion->_retainPredicate) {
+ // '_retainPredicate' means the 'sargableNode' is an over-approximation, so we also have to
+ // keep the original Filter node. But this means the Filter-to-Sargable rewrite could apply
+ // again, to avoid rewriting endlessly we need to avoid scheduling this rewrite. So we pass
+ // 'addExistingNodeWithNewChild = true'.
ABT newNode = node;
newNode.cast<FilterNode>()->getChild() = std::move(sargableNode);
ctx.addNode(newNode, true /*substitute*/, true /*addExistingNodeWithNewChild*/);
diff --git a/src/mongo/db/query/optimizer/cascades/memo.cpp b/src/mongo/db/query/optimizer/cascades/memo.cpp
index 64bdc811291..afe235e497f 100644
--- a/src/mongo/db/query/optimizer/cascades/memo.cpp
+++ b/src/mongo/db/query/optimizer/cascades/memo.cpp
@@ -123,7 +123,14 @@ const ExpressionBinder& Group::binder() const {
}
/**
- * TODO SERVER-70407: Improve documentation around the Memo and related classes.
+ * MemoIntegrator adds a logical plan to a Memo, by putting each node (stage) in the appropriate
+ * group, and returning the group ID where the root node can be found.
+ *
+ * It works recursively: to add a node to a Memo, first add each child, and replace each child with
+ * a MemoLogicalDelegator that refers to the group where that child was put.
+ *
+ * After stubbing out the children it relies on 'Memo::addNode' to ensure that if a syntactically
+ * equal node is already in some group, we reuse it.
*/
class MemoIntegrator {
public:
@@ -131,14 +138,12 @@ public:
Memo& memo,
Memo::NodeTargetGroupMap targetGroupMap,
NodeIdSet& insertedNodeIds,
- const LogicalRewriteType rule,
- const bool addExistingNodeWithNewChild)
+ const LogicalRewriteType rule)
: _ctx(std::move(ctx)),
_memo(memo),
_insertedNodeIds(insertedNodeIds),
_targetGroupMap(std::move(targetGroupMap)),
- _rule(rule),
- _addExistingNodeWithNewChild(addExistingNodeWithNewChild) {}
+ _rule(rule) {}
// This is a transient structure. We do not allow copying or moving.
MemoIntegrator(const MemoIntegrator& /*other*/) = delete;
@@ -149,9 +154,6 @@ public:
/**
* Nodes
*/
- void prepare(const ABT& n, const ScanNode& node, const VariableEnvironment& /*env*/) {
- // noop
- }
GroupIdType transport(const ABT& n,
const ScanNode& node,
@@ -160,10 +162,6 @@ public:
return addNodes(n, node, n, env, {});
}
- void prepare(const ABT& n, const ValueScanNode& node, const VariableEnvironment& /*env*/) {
- // noop
- }
-
GroupIdType transport(const ABT& n,
const ValueScanNode& node,
const VariableEnvironment& env,
@@ -171,12 +169,6 @@ public:
return addNodes(n, node, n, env, {});
}
- void prepare(const ABT& n,
- const MemoLogicalDelegatorNode& node,
- const VariableEnvironment& /*env*/) {
- // noop
- }
-
GroupIdType transport(const ABT& n,
const MemoLogicalDelegatorNode& node,
const VariableEnvironment& env) {
@@ -187,10 +179,6 @@ public:
return addNodes(n, node, n, env, {});
}
- void prepare(const ABT& n, const FilterNode& node, const VariableEnvironment& /*env*/) {
- updateTargetGroupMapUnary(n, node);
- }
-
GroupIdType transport(const ABT& n,
const FilterNode& node,
const VariableEnvironment& env,
@@ -199,10 +187,6 @@ public:
return addNode(n, node, env, child);
}
- void prepare(const ABT& n, const EvaluationNode& node, const VariableEnvironment& /*env*/) {
- updateTargetGroupMapUnary(n, node);
- }
-
GroupIdType transport(const ABT& n,
const EvaluationNode& node,
const VariableEnvironment& env,
@@ -211,10 +195,6 @@ public:
return addNode(n, node, env, child);
}
- void prepare(const ABT& n, const SargableNode& node, const VariableEnvironment& /*env*/) {
- updateTargetGroupMapUnary(n, node);
- }
-
GroupIdType transport(const ABT& n,
const SargableNode& node,
const VariableEnvironment& env,
@@ -224,14 +204,6 @@ public:
return addNode(n, node, env, child);
}
- void prepare(const ABT& n, const RIDIntersectNode& node, const VariableEnvironment& /*env*/) {
- // noop.
- }
-
- void prepare(const ABT& n, const RIDUnionNode& node, const VariableEnvironment& /*env*/) {
- // noop.
- }
-
GroupIdType transport(const ABT& n,
const RIDIntersectNode& node,
const VariableEnvironment& env,
@@ -248,10 +220,6 @@ public:
return addNodes(n, node, env, leftChild, rightChild);
}
- void prepare(const ABT& n, const BinaryJoinNode& node, const VariableEnvironment& /*env*/) {
- updateTargetGroupMapBinary(n, node);
- }
-
GroupIdType transport(const ABT& n,
const BinaryJoinNode& node,
const VariableEnvironment& env,
@@ -261,10 +229,6 @@ public:
return addNodes(n, node, env, leftChild, rightChild);
}
- void prepare(const ABT& n, const UnionNode& node, const VariableEnvironment& /*env*/) {
- updateTargetGroupMapNary(n, node);
- }
-
GroupIdType transport(const ABT& n,
const UnionNode& node,
const VariableEnvironment& env,
@@ -274,10 +238,6 @@ public:
return addNodes(n, node, env, std::move(children));
}
- void prepare(const ABT& n, const GroupByNode& node, const VariableEnvironment& /*env*/) {
- updateTargetGroupMapUnary(n, node);
- }
-
GroupIdType transport(const ABT& n,
const GroupByNode& node,
const VariableEnvironment& env,
@@ -289,10 +249,6 @@ public:
return addNode(n, node, env, child);
}
- void prepare(const ABT& n, const UnwindNode& node, const VariableEnvironment& /*env*/) {
- updateTargetGroupMapUnary(n, node);
- }
-
GroupIdType transport(const ABT& n,
const UnwindNode& node,
const VariableEnvironment& env,
@@ -302,10 +258,6 @@ public:
return addNode(n, node, env, child);
}
- void prepare(const ABT& n, const CollationNode& node, const VariableEnvironment& /*env*/) {
- updateTargetGroupMapUnary(n, node);
- }
-
GroupIdType transport(const ABT& n,
const CollationNode& node,
const VariableEnvironment& env,
@@ -314,10 +266,6 @@ public:
return addNode(n, node, env, child);
}
- void prepare(const ABT& n, const LimitSkipNode& node, const VariableEnvironment& /*env*/) {
- updateTargetGroupMapUnary(n, node);
- }
-
GroupIdType transport(const ABT& n,
const LimitSkipNode& node,
const VariableEnvironment& env,
@@ -325,10 +273,6 @@ public:
return addNode(n, node, env, child);
}
- void prepare(const ABT& n, const ExchangeNode& node, const VariableEnvironment& /*env*/) {
- updateTargetGroupMapUnary(n, node);
- }
-
GroupIdType transport(const ABT& n,
const ExchangeNode& node,
const VariableEnvironment& env,
@@ -337,10 +281,6 @@ public:
return addNode(n, node, env, child);
}
- void prepare(const ABT& n, const RootNode& node, const VariableEnvironment& /*env*/) {
- updateTargetGroupMapUnary(n, node);
- }
-
GroupIdType transport(const ABT& n,
const RootNode& node,
const VariableEnvironment& env,
@@ -361,11 +301,6 @@ public:
return -1;
}
- template <typename T, typename... Ts>
- void prepare(const ABT& n, const T& /*node*/, const VariableEnvironment& /*env*/) {
- static_assert(!canBeLogicalNode<T>(), "Logical node must implement its prepare.");
- }
-
GroupIdType integrate(const ABT& n) {
return algebra::transport<true>(n, *this, VariableEnvironment::build(n, &_memo));
}
@@ -430,105 +365,20 @@ private:
return addNodes(n, node, std::move(forMemo), env, {leftGroupId, rightGroupId});
}
- template <class T>
- ABT::reference_type findExistingNodeFromTargetGroupMap(const ABT& n, const T& node) {
- auto it = _targetGroupMap.find(n.ref());
- if (it == _targetGroupMap.cend()) {
- return nullptr;
- }
- if (const auto index = _memo.findNodeInGroup(it->second, n.ref())) {
- ABT::reference_type result = _memo.getNode({it->second, *index});
- uassert(6624049, "Node type in memo does not match target type", result.is<T>());
- return result;
- }
- return nullptr;
- }
-
- void updateTargetGroupRefs(
- const std::vector<std::pair<ABT::reference_type, GroupIdType>>& childGroups) {
- for (auto [childRef, targetGroupId] : childGroups) {
- auto it = _targetGroupMap.find(childRef);
- if (it == _targetGroupMap.cend()) {
- _targetGroupMap.emplace(childRef, targetGroupId);
- } else if (it->second != targetGroupId) {
- uasserted(6624050, "Incompatible target groups for parent and child");
- }
- }
- }
-
- template <class T>
- void updateTargetGroupMapUnary(const ABT& n, const T& node) {
- if (_addExistingNodeWithNewChild) {
- return;
- }
-
- ABT::reference_type existing = findExistingNodeFromTargetGroupMap(n, node);
- if (!existing.empty()) {
- const GroupIdType targetGroupId = existing.cast<T>()
- ->getChild()
- .template cast<MemoLogicalDelegatorNode>()
- ->getGroupId();
- updateTargetGroupRefs({{node.getChild().ref(), targetGroupId}});
- }
- }
-
- template <class T>
- void updateTargetGroupMapNary(const ABT& n, const T& node) {
- ABT::reference_type existing = findExistingNodeFromTargetGroupMap(n, node);
- if (!existing.empty()) {
- const ABTVector& existingChildren = existing.cast<T>()->nodes();
- const ABTVector& targetChildren = node.nodes();
- uassert(6624051,
- "Different number of children between existing and target node",
- existingChildren.size() == targetChildren.size());
-
- std::vector<std::pair<ABT::reference_type, GroupIdType>> childGroups;
- for (size_t i = 0; i < existingChildren.size(); i++) {
- const ABT& existingChild = existingChildren.at(i);
- const ABT& targetChild = targetChildren.at(i);
- childGroups.emplace_back(
- targetChild.ref(),
- existingChild.cast<MemoLogicalDelegatorNode>()->getGroupId());
- }
- updateTargetGroupRefs(childGroups);
- }
- }
-
- template <class T>
- void updateTargetGroupMapBinary(const ABT& n, const T& node) {
- ABT::reference_type existing = findExistingNodeFromTargetGroupMap(n, node);
- if (existing.empty()) {
- return;
- }
-
- const T& existingNode = *existing.cast<T>();
- const GroupIdType leftGroupId =
- existingNode.getLeftChild().template cast<MemoLogicalDelegatorNode>()->getGroupId();
- const GroupIdType rightGroupId =
- existingNode.getRightChild().template cast<MemoLogicalDelegatorNode>()->getGroupId();
- updateTargetGroupRefs(
- {{node.getLeftChild().ref(), leftGroupId}, {node.getRightChild().ref(), rightGroupId}});
- }
-
- /**
- * We do not own any of these.
- */
+ // Contains several resources required for calling Memo::addNode.
Memo::Context _ctx;
+ // The memo to be updated.
Memo& _memo;
+ // An out param, for reporting nodes we insert to the memo.
NodeIdSet& _insertedNodeIds;
- /**
- * We own this.
- */
+ // Each [node, groupId] entry means force the given node to go into the given group.
+ // This is only used for the root node passed in to Memo::integrate(), so there should be
+ // at most 1 entry.
Memo::NodeTargetGroupMap _targetGroupMap;
// Rewrite rule that triggered this node to be created.
const LogicalRewriteType _rule;
-
- // If set we enable modification of target group based on existing nodes. In practical terms, we
- // would not assume that if F(x) = F(y) then x = y. This is currently used in conjunction with
- // $elemMatch rewrite (PathTraverse over PathCompose).
- bool _addExistingNodeWithNewChild;
};
Memo::Context::Context(const Metadata* metadata,
@@ -565,30 +415,11 @@ Group& Memo::getGroup(const GroupIdType groupId) {
return *_groups.at(groupId);
}
-boost::optional<size_t> Memo::findNodeInGroup(GroupIdType groupId, ABT::reference_type node) const {
- return getGroup(groupId)._logicalNodes.find(node);
-}
-
GroupIdType Memo::addGroup(ProjectionNameSet projections) {
_groups.emplace_back(std::make_unique<Group>(std::move(projections)));
return _groups.size() - 1;
}
-std::pair<MemoLogicalNodeId, bool> Memo::addNode(GroupIdType groupId,
- ABT n,
- LogicalRewriteType rule) {
- uassert(6624052, "Attempting to insert a physical node", !n.is<ExclusivelyPhysicalNode>());
-
- Group& group = *_groups.at(groupId);
- OrderPreservingABTSet& nodes = group._logicalNodes;
-
- const auto [index, inserted] = nodes.emplace_back(std::move(n));
- if (inserted) {
- group._rules.push_back(rule);
- }
- return {{groupId, index}, inserted};
-}
-
ABT::reference_type Memo::getNode(const MemoLogicalNodeId nodeMemoId) const {
return getGroup(nodeMemoId._groupId)._logicalNodes.at(nodeMemoId._index);
}
@@ -682,6 +513,7 @@ MemoLogicalNodeId Memo::addNode(const Context& ctx,
NodeIdSet& insertedNodeIds,
ABT n,
const LogicalRewriteType rule) {
+ uassert(6624052, "Attempting to insert a physical node", !n.is<ExclusivelyPhysicalNode>());
for (const GroupIdType groupId : groupVector) {
// Invalid tree: node is its own child.
uassert(6624127, "Target group appears inside group vector", groupId != targetGroupId);
@@ -703,7 +535,14 @@ MemoLogicalNodeId Memo::addNode(const Context& ctx,
// Current node is not in the memo. Insert unchanged.
const GroupIdType groupId = noTargetGroup ? addGroup(std::move(projections)) : targetGroupId;
- auto [newId, inserted] = addNode(groupId, std::move(n), rule);
+ Group& group = *_groups.at(groupId);
+ OrderPreservingABTSet& nodes = group._logicalNodes;
+ const auto [index, inserted] = nodes.emplace_back(std::move(n));
+ if (inserted) {
+ group._rules.push_back(rule);
+ }
+ auto newId = MemoLogicalNodeId{groupId, index};
+
if (inserted || noTargetGroup) {
insertedNodeIds.insert(newId);
_inputGroupsToNodeIdMap[groupVector].insert(newId);
@@ -731,11 +570,9 @@ GroupIdType Memo::integrate(const Memo::Context& ctx,
const ABT& node,
NodeTargetGroupMap targetGroupMap,
NodeIdSet& insertedNodeIds,
- const LogicalRewriteType rule,
- const bool addExistingNodeWithNewChild) {
+ const LogicalRewriteType rule) {
_stats._numIntegrations++;
- MemoIntegrator integrator(
- ctx, *this, std::move(targetGroupMap), insertedNodeIds, rule, addExistingNodeWithNewChild);
+ MemoIntegrator integrator(ctx, *this, std::move(targetGroupMap), insertedNodeIds, rule);
return integrator.integrate(node);
}
diff --git a/src/mongo/db/query/optimizer/cascades/memo.h b/src/mongo/db/query/optimizer/cascades/memo.h
index 26c6148f1ed..7f1c1d22cc0 100644
--- a/src/mongo/db/query/optimizer/cascades/memo.h
+++ b/src/mongo/db/query/optimizer/cascades/memo.h
@@ -87,30 +87,87 @@ private:
opt::unordered_map<properties::PhysProps, size_t, PhysPropsHasher> _physPropsToPhysNodeMap;
};
+/**
+ * Represents a set of equivalent query plans. See 'class Memo' for more detail.
+ */
struct Group {
explicit Group(ProjectionNameSet projections);
Group(const Group&) = delete;
Group(Group&&) = default;
+ // Returns the set of bindings that all plans in this group are expected to produce.
+ // (Since all plans in a Group are equivalent, they all must produce the same bindings.)
const ExpressionBinder& binder() const;
- // Associated logical nodes.
+ // Contains a set of equivalent logical plans. Each element is a LogicalNode, and its immediate
+ // immediate children are MemoLogicalDelegatorNode. This ensures every logical node has an
+ // associated group. For example we would never have (Filter B (Filter A (Delegator _))) here
+ // because 'Filter A' would have no associated group.
OrderPreservingABTSet _logicalNodes;
- // Rule that triggered each logical node.
+ // Stores, for each logical node, the rewrite rule that first caused that node to be created.
+ // '_rules[i]' corresponds to '_logicalNodes[i]'.
+ // Used only for explain / debugging.
std::vector<LogicalRewriteType> _rules;
- // Group logical properties.
+ // Contains logical properties that are derived bottom-up from the first logical plan in the
+ // group. Since all plans in the group are expected to be equivalent, the logical properties are
+ // expected to be true for all plans in the group.
properties::LogicalProps _logicalProperties;
+
+ // Same as 'binder()'.
ABT _binder;
+ // A collection of 'LogicalRewriteEntry', indicating which rewrites we will attempt next, and at
+ // which node.
+ //
+ // Each entry represents a specific rewrite rule, and a specific node. Typically there are many
+ // entries pointing to the same node, but each for a different rewrite rule. In
+ // 'LogicalRewriter::addNode', for every newly added node we schedule all possible rewrites
+ // which transform it or reorder it against other nodes. The goal is to try all possible ways to
+ // generate new plans using this new node.
LogicalRewriteQueue _logicalRewriteQueue;
// Best physical plan for given physical properties: aka "Winner's circle".
+ //
+ // Unlike '_logicalNodes', the immediate children of physical nodes are not required to be
+ // delegator nodes. Each entry in '_physicalNode' can be a complex tree of nodes, which may or
+ // may not end in 'MemoPhysicalDelegatorNode' at the leaves.
PhysNodes _physicalNodes;
};
/**
- * TODO SERVER-70407: Improve documentation around the Memo and related classes.
+ * A Memo holds all the alternative plans for a query, and for all of its subqueries.
+ *
+ * A Memo is made of 'groups': a group is a set of alternative plans that produce the same result
+ * (the same bag of rows). You can think of a group as representing a question: "what is the best
+ * plan for this query?". During optimization a group holds several possible answers, and at the end
+ * we will choose the best answer based on cost estimates.
+ *
+ * The logical plans in a group are all interchangeable, since they compute the same bag. Anywhere
+ * one logical plan can appear, so can an equivalent one: it doesn't change the overall result.
+ * So, the logical plans in a group are all stored together in one ABTVector.
+ *
+ * By contrast, not all physical plans are interchangeable. For example, the MergeJoin algorithm
+ * requires sorted input. So the physical plans in a group are stored separately, to answer separate
+ * questions:
+ * - "What is the best physical plan whose results are sorted by <x>?"
+ * - "What is the best physical plan that uses an index?"
+ * - "What is the best physical plan whose results are sorted by (<x>, <y>), and uses an index?"
+ * - "What is the best physical plan (with no constraints)?"
+ * etc. Each set of physical properties is a different optimization question. So a group has a
+ * mapping from set of physical properties, to the best physical plan discovered so far that
+ * produces the same logical result and satisfies those properties. For optimization we only need
+ * the best plan for each set of properties, but if 'keepRejectedPlans' is enabled then we keep the
+ * non-best plans for debugging.
+ *
+ * Typically a Memo is populated by calling 'integrate()' to add the initial logical plan, and then
+ * letting rewrite rules add more plans.
+ * - In the substitution phase, 'RewriteContext' uses 'Memo::clearLogicalNodes()' and
+ * 'Memo::integrate()' to replace a group with a single logical node.
+ * - In the exploration phase, 'RewriteContext' uses 'Memo::integrate()' to add alternative logical
+ * plans to a group.
+ * - In the implementation phase, 'PhysicalRewriter' uses 'PhysNodes::addOptimizationResult()' to
+ * update the set of physical plans.
*/
class Memo : public MemoExplainInterface, public MemoGroupBinderInterface {
// To be able to access _stats field.
@@ -178,8 +235,6 @@ public:
LogicalRewriteQueue& getLogicalRewriteQueue(GroupIdType groupId);
- boost::optional<size_t> findNodeInGroup(GroupIdType groupId, ABT::reference_type node) const;
-
ABT::reference_type getNode(MemoLogicalNodeId nodeMemoId) const;
/**
@@ -189,20 +244,20 @@ public:
*/
void estimateCE(const Context& ctx, GroupIdType groupId);
- MemoLogicalNodeId addNode(const Context& ctx,
- GroupIdVector groupVector,
- ProjectionNameSet projections,
- GroupIdType targetGroupId,
- NodeIdSet& insertedNodeIds,
- ABT n,
- LogicalRewriteType rule);
-
+ /**
+ * Takes a logical plan, and adds each Node to the appropriate group.
+ *
+ * Caller can use 'targetGroupMap' to force a node to go into a desired group.
+ * The out-param 'insertedNodeIds' tells the caller which nodes were newly inserted.
+ * Optional 'rule' is used to annotate any newly inserted nodes, for debugging.
+ *
+ * See 'class MemoIntegrator' for more details.
+ */
GroupIdType integrate(const Context& ctx,
const ABT& node,
NodeTargetGroupMap targetGroupMap,
NodeIdSet& insertedNodeIds,
- LogicalRewriteType rule = LogicalRewriteType::Root,
- bool addExistingNodeWithNewChild = false);
+ LogicalRewriteType rule = LogicalRewriteType::Root);
void clearLogicalNodes(GroupIdType groupId);
@@ -215,13 +270,40 @@ public:
size_t getPhysicalNodeCount() const;
private:
+ // MemoIntegrator is a helper / transport for 'Memo::integrate()'.
+ friend class MemoIntegrator;
+
+ /**
+ * Ensures the logical node 'n' is present in some Group.
+ *
+ * 'groupVector' should be the set of group IDs that contain the immediate children of 'n'. This
+ * is used to maintain '_inputGroupsToNodeIdMap' and '_nodeIdToInputGroupsMap'.
+ *
+ * 'projections' should be the set of output bindings of 'n'. It's used to initialize the
+ * ProjectionAvailability property in the case where a new Group is created.
+ *
+ * Optional 'targetGroupId' means force the node to be added to the given group,
+ * and raise an error if it's already present in some other group. '-1' means use an existing
+ * group if possible or create a new one otherwise.
+ *
+ * 'rule' is for explain/debugging only: it identifies the rewrite that introduced the node 'n'.
+ *
+ * The out-param 'insertedNodeIds' is appended to if a new logical node was added to any group
+ * (existing or new).
+ */
+ MemoLogicalNodeId addNode(const Context& ctx,
+ GroupIdVector groupVector,
+ ProjectionNameSet projections,
+ GroupIdType targetGroupId,
+ NodeIdSet& insertedNodeIds,
+ ABT n,
+ LogicalRewriteType rule);
+
const Group& getGroup(GroupIdType groupId) const;
Group& getGroup(GroupIdType groupId);
GroupIdType addGroup(ProjectionNameSet projections);
- std::pair<MemoLogicalNodeId, bool> addNode(GroupIdType groupId, ABT n, LogicalRewriteType rule);
-
boost::optional<MemoLogicalNodeId> findNode(const GroupIdVector& groups, const ABT& node);
std::vector<std::unique_ptr<Group>> _groups;
diff --git a/src/mongo/db/query/optimizer/cascades/memo_defs.h b/src/mongo/db/query/optimizer/cascades/memo_defs.h
index 456116d05f9..85ad0d2948a 100644
--- a/src/mongo/db/query/optimizer/cascades/memo_defs.h
+++ b/src/mongo/db/query/optimizer/cascades/memo_defs.h
@@ -42,16 +42,24 @@ namespace mongo::optimizer::cascades {
* the memo itself.
*/
+/**
+ * Deep hashing, compatible with deep equality comparison.
+ */
struct MemoNodeRefHash {
size_t operator()(const ABT::reference_type& nodeRef) const;
};
+/**
+ * Deep equality comparison.
+ */
struct MemoNodeRefCompare {
bool operator()(const ABT::reference_type& left, const ABT::reference_type& right) const;
};
/**
- * A set of ABT nodes which keeps track of the order in which we inserted them.
+ * A set of ABT which keeps track of the order in which we inserted them.
+ *
+ * Compares ABTs using deep equality.
*/
class OrderPreservingABTSet {
public:
diff --git a/src/mongo/db/query/optimizer/cascades/rewrite_queues.h b/src/mongo/db/query/optimizer/cascades/rewrite_queues.h
index 732d338a153..d96f7cc7d79 100644
--- a/src/mongo/db/query/optimizer/cascades/rewrite_queues.h
+++ b/src/mongo/db/query/optimizer/cascades/rewrite_queues.h
@@ -37,7 +37,7 @@
namespace mongo::optimizer::cascades {
/**
- * Keeps track of candidate physical rewrites.
+ * Represents a logical rewrite that will be attempted at a particular node, in a particular group.
*/
struct LogicalRewriteEntry {
LogicalRewriteEntry(double priority, LogicalRewriteType type, MemoLogicalNodeId nodeId);