/** * Copyright (C) 2022-present MongoDB, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the Server Side Public License, version 1, * as published by MongoDB, Inc. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * Server Side Public License for more details. * * You should have received a copy of the Server Side Public License * along with this program. If not, see * . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the Server Side Public License in all respects for * all of the code used other than as permitted herein. If you modify file(s) * with this exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do so, * delete this exception statement from your version. If you delete this * exception statement from all source files in the program, then also delete * it in the license file. */ #pragma once #include "mongo/db/query/optimizer/defs.h" #include "mongo/db/query/optimizer/node.h" #include "mongo/db/query/optimizer/node_defs.h" #include "mongo/db/query/optimizer/props.h" #include "mongo/db/query/optimizer/utils/physical_plan_builder.h" #include "mongo/util/id_generator.h" namespace mongo::optimizer { template ABT makeBalancedTreeImpl(Builder builder, std::vector& leaves, size_t from, size_t until) { invariant(from < until); if (from + 1 == until) { return std::move(leaves[from]); } else { size_t mid = from + (until - from) / 2; auto lhs = makeBalancedTreeImpl(builder, leaves, from, mid); auto rhs = makeBalancedTreeImpl(builder, leaves, mid, until); return builder(std::move(lhs), std::move(rhs)); } } template ABT makeBalancedTree(Builder builder, std::vector leaves) { return makeBalancedTreeImpl(builder, leaves, 0, leaves.size()); } ABT makeBalancedBooleanOpTree(Operations logicOp, std::vector leaves); inline void updateHash(size_t& result, const size_t hash) { result = 31 * result + hash; } inline void updateHashUnordered(size_t& result, const size_t hash) { result ^= hash; } template , class T1 = std::conditional_t, const T, const T&>> inline size_t computeVectorHash(const std::vector& v) { size_t result = 17; for (T1 e : v) { updateHash(result, Hasher()(e)); } return result; } template inline size_t computeHashSeq(const Args&... seq) { size_t result = 17 + typeCode; (updateHash(result, seq), ...); return result; } /** * Used to access and manipulate the child of a unary node. */ template struct DefaultChildAccessor { const ABT& operator()(const ABT& node) const { return node.cast()->getChild(); } ABT& operator()(ABT& node) const { return node.cast()->getChild(); } }; /** * Used to access and manipulate the left child of a binary node. */ template struct LeftChildAccessor { const ABT& operator()(const ABT& node) const { return node.cast()->getLeftChild(); } ABT& operator()(ABT& node) const { return node.cast()->getLeftChild(); } }; /** * Used to access and manipulate the right child of a binary node. */ template struct RightChildAccessor { const ABT& operator()(const ABT& node) const { return node.cast()->getRightChild(); } ABT& operator()(ABT& node) const { return node.cast()->getRightChild(); } }; /** * Used to access children of a n-ary node. By default, it accesses the first child. */ template struct IndexedChildAccessor { const ABT& operator()(const ABT& node) const { return node.cast()->nodes().at(index); } ABT& operator()(ABT& node) { return node.cast()->nodes().at(index); } size_t index = 0; }; /** * Used to vend out fresh projection names. The method getNextId receives an optional prefix. If we * are generating descriptive names, the variable name we return starts with the prefix and includes * a prefix-specific counter. If we are not generating descriptive variable names, the prefix is * ignored and instead we use a global counter instead and ignore the prefix. */ class PrefixId { using IdType = uint64_t; using PrefixMapType = opt::unordered_map; public: static PrefixId create(const bool useDescriptiveVarNames) { return {useDescriptiveVarNames}; } static PrefixId createForTests() { return {true /*useDescripriveVarNames*/}; } template ProjectionName getNextId(const char (&prefix)[N]) { if (std::holds_alternative(_ids)) { return ProjectionName{str::stream() << "p" << std::get(_ids)++}; } else { return ProjectionName{str::stream() << prefix << "_" << std::get(_ids)[prefix]++}; } } PrefixId(const PrefixId& other) = delete; PrefixId(PrefixId&& other) = default; PrefixId& operator=(const PrefixId& other) = delete; PrefixId& operator=(PrefixId&& other) = default; private: PrefixId(const bool useDescriptiveVarNames) { if (useDescriptiveVarNames) { _ids = {PrefixMapType{}}; } else { _ids = {uint64_t{}}; } } std::variant _ids; }; using SpoolIdGenerator = IdGenerator; void combineLimitSkipProperties(properties::LimitSkipRequirement& aboveProp, const properties::LimitSkipRequirement& belowProp); properties::LogicalProps createInitialScanProps(const ProjectionName& projectionName, const std::string& scanDefName, GroupIdType groupId = -1, properties::DistributionSet distributions = {}); /** * Used to track references originating from a set of physical properties. */ ProjectionNameSet extractReferencedColumns(const properties::PhysProps& properties); // Use a union node to restrict the set of projections we expose up the tree. The union node is // optimized away during lowering. void restrictProjections(ProjectionNameVector projNames, CEType ce, PhysPlanBuilder& input); struct CollationSplitResult { bool _validSplit = false; ProjectionCollationSpec _leftCollation; ProjectionCollationSpec _rightCollation; }; /** * Split a collation requirement between an outer (left) and inner (right) side. The outer side must * be a prefix in the collation spec, and the right side a suffix. */ CollationSplitResult splitCollationSpec(const boost::optional& ridProjName, const ProjectionCollationSpec& collationSpec, const ProjectionNameSet& leftProjections, const ProjectionNameSet& rightProjections); struct PartialSchemaReqConversion { PartialSchemaReqConversion(PartialSchemaRequirements reqMap); PartialSchemaReqConversion(ABT bound); // If set, contains a Constant or Variable bound of an (yet unknown) interval. boost::optional _bound; // Requirements we have built so far. May be trivially true. PartialSchemaRequirements _reqMap; // Have we added a PathComposeM. bool _hasIntersected; // Have we added a PathTraverse. bool _hasTraversed; // If true, retain original predicate after the conversion. In this case, the requirement map // might capture only a part of the predicate. // TODO: consider generalizing to retain only a part of the predicate. bool _retainPredicate; }; using PathToIntervalFn = std::function(const ABT&)>; /** * Takes an expression that comes from an Filter or Evaluation node, and attempt to convert * to a PartialSchemaReqConversion. This is done independent of the availability of indexes. * Essentially this means to extract intervals over paths whenever possible. If the conversion is * not possible, return empty result. * * A direct node-to-intervals converter may be specified, used to selectively expands for example * PathArr into an equivalent interval representation. */ boost::optional convertExprToPartialSchemaReq( const ABT& expr, bool isFilterContext, const PathToIntervalFn& pathToInterval); /** * Given a set of non-multikey paths, remove redundant Traverse elements from paths in a Partial * Schema Requirement structure. Following that the intervals of any remaining non-multikey paths * (following simplification) on the same key are intersected. Intervals of multikey paths are * checked for subsumption and if one subsumes the other, the subsuming one is retained. Returns * true if we have an always-false predicate after simplification. Each redundant binding gets an * entry in 'projectionRenames', which maps redundant name to the de-duplicated name. */ [[nodiscard]] bool simplifyPartialSchemaReqPaths( const boost::optional& scanProjName, const MultikeynessTrie& multikeynessTrie, PartialSchemaRequirements& reqMap, ProjectionRenames& projectionRenames, const ConstFoldFn& constFold); /** * Try to check whether the predicate 'lhs' is a subset of 'rhs'. * * True means 'lhs' is contained in 'rhs': every document that matches * 'lhs' also matches 'rhs'. * * False means either: * - Not a subset: there is a counterexample. * - Not sure: this function was unable to determine one way or the other. */ bool isSubsetOfPartialSchemaReq(const PartialSchemaRequirements& lhs, const PartialSchemaRequirements& rhs); /** * Computes the intersection of two PartialSchemeRequirements objects. * On success, returns true and stores the result in 'target'. * On failure, returns false and leaves 'target' in an unspecified state. * * Assumes 'target' comes before 'source', so 'source' may refer to bindings * produced by 'target'. * * The intersection: * - is a predicate that matches iff both original predicates match. * - has all the bindings from 'target' and 'source', but excluding * bindings that would be redundant (have the same key). * * "Failure" means we are unable to represent the result as a PartialSchemaRequirements. * This can happen when: * - The resulting predicate is always false. * - 'source' reads from a projection bound by 'target'. */ bool intersectPartialSchemaReq(PartialSchemaRequirements& target, const PartialSchemaRequirements& source); PartialSchemaRequirements unionPartialSchemaReq(PartialSchemaRequirements&& left, PartialSchemaRequirements&& right); /** * Encode an index of an index field as a field name in order to use with a FieldProjectionMap. */ std::string encodeIndexKeyName(size_t indexField); /** * Decode an field name as an index field. */ size_t decodeIndexKeyName(const std::string& fieldName); /** * Compute a list of candidate indexes. A CandidateIndexEntry describes intervals that could be * used for accessing each of the indexes in the map. The intervals themselves are derived from * 'reqMap'. */ CandidateIndexes computeCandidateIndexes(PrefixId& prefixId, const ProjectionName& scanProjectionName, const PartialSchemaRequirements& reqMap, const ScanDefinition& scanDef, const QueryHints& hints, const ConstFoldFn& constFold); /** * Computes a set of residual predicates which will be applied on top of a Scan. */ boost::optional computeScanParams(PrefixId& prefixId, const PartialSchemaRequirements& reqMap, const ProjectionName& rootProj); /** * Checks if we have an interval tree which has at least one atomic interval which may include Null * as an endpoint. */ bool checkMaybeHasNull(const IntervalReqExpr::Node& intervals, const ConstFoldFn& constFold); /** * Used to lower a Sargable node to a subtree consisting of functionally equivalent Filter and Eval * nodes. */ void lowerPartialSchemaRequirement(const PartialSchemaKey& key, const PartialSchemaRequirement& req, const PathToIntervalFn& pathToInterval, boost::optional residualCE, PhysPlanBuilder& builder); /** * Lower ResidualRequirementsWithCE to a subtree consisting of functionally equivalent Filter and * Eval nodes. Note that we take indexPredSels by value here because the implementation needs its * own copy. "scanGroupCE" is the estimated cardinality of the underlying collection scan (the * Sargable node's child group), while the "baseCE" is the initial cardinality on top of which the * residual predicates act. For a sargable node with a "Seek" target it is "1" to reflect the fact * that we fetch one row id at a time, and for "Complete" and "Index" it is the same as the * "scanGroupCE". */ void lowerPartialSchemaRequirements(boost::optional scanGroupCE, boost::optional baseCE, std::vector indexPredSels, ResidualRequirementsWithOptionalCE::Node requirements, const PathToIntervalFn& pathToInterval, PhysPlanBuilder& builder); /** * Build ResidualRequirementsWithOptionalCE by combining 'residReqs' with the corresponding entries * in 'partialSchemaKeyCE'. */ ResidualRequirementsWithOptionalCE::Node createResidualReqsWithCE( const ResidualRequirements::Node& residReqs, const PartialSchemaKeyCE& partialSchemaKeyCE); /** * Build ResidualRequirementsWithOptionalCE where each entry in 'reqs' has boost::none CE. */ ResidualRequirementsWithOptionalCE::Node createResidualReqsWithEmptyCE(const PSRExpr::Node& reqs); /** * Sort requirements under a Conjunction by estimated cost. */ void sortResidualRequirements(ResidualRequirementsWithOptionalCE::Node& residualReq); void applyProjectionRenames(ProjectionRenames projectionRenames, ABT& node); /** * Remove unused requirements from 'residualReqs' and remove unused projections from * 'fieldProjectionMap'. A boost::none 'residualReqs' indicates that there are no residual * requirements to be applied after the IndexScan, PhysicalScan or Seek (can be thought of as * "trivially true" residual requirements). */ void removeRedundantResidualPredicates(const ProjectionNameOrderPreservingSet& requiredProjections, boost::optional& residualReqs, FieldProjectionMap& fieldProjectionMap); /** * Implements an RID Intersect node using Union and GroupBy. */ PhysPlanBuilder lowerRIDIntersectGroupBy(PrefixId& prefixId, const ProjectionName& ridProjName, CEType intersectedCE, CEType leftCE, CEType rightCE, const properties::PhysProps& physProps, const properties::PhysProps& leftPhysProps, const properties::PhysProps& rightPhysProps, PhysPlanBuilder leftChild, PhysPlanBuilder rightChild, ChildPropsType& childProps); /** * Implements an RID Intersect node using a HashJoin. */ PhysPlanBuilder lowerRIDIntersectHashJoin(PrefixId& prefixId, const ProjectionName& ridProjName, CEType intersectedCE, CEType leftCE, CEType rightCE, const properties::PhysProps& leftPhysProps, const properties::PhysProps& rightPhysProps, PhysPlanBuilder leftChild, PhysPlanBuilder rightChild, ChildPropsType& childProps); PhysPlanBuilder lowerRIDIntersectMergeJoin(PrefixId& prefixId, const ProjectionName& ridProjName, CEType intersectedCE, CEType leftCE, CEType rightCE, const properties::PhysProps& leftPhysProps, const properties::PhysProps& rightPhysProps, PhysPlanBuilder leftChild, PhysPlanBuilder rightChild, ChildPropsType& childProps); /** * Lowers a plan consisting of one or several equality prefixes. The sub-plans for each equality * prefix are connected using correlated joins. The sub-plans for each prefix in turn are * implemented as one or more index scans which are unioned or intersected depending on the shape of * the interval expression (e.g. conjunction or disjunction). */ PhysPlanBuilder lowerEqPrefixes(PrefixId& prefixId, const ProjectionName& ridProjName, FieldProjectionMap indexProjectionMap, const std::string& scanDefName, const std::string& indexDefName, SpoolIdGenerator& spoolId, size_t indexFieldCount, const std::vector& eqPrefixes, size_t eqPrefixIndex, const std::vector& reverseOrder, ProjectionNameVector correlatedProjNames, const std::map& indexPredSelMap, CEType indexCE, CEType scanGroupCE, bool useSortedMerge); bool hasProperIntervals(const PSRExpr::Node& reqs); } // namespace mongo::optimizer