/** * Copyright (C) 2022-present MongoDB, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the Server Side Public License, version 1, * as published by MongoDB, Inc. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * Server Side Public License for more details. * * You should have received a copy of the Server Side Public License * along with this program. If not, see * . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the Server Side Public License in all respects for * all of the code used other than as permitted herein. If you modify file(s) * with this exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do so, * delete this exception statement from your version. If you delete this * exception statement from all source files in the program, then also delete * it in the license file. */ #include "mongo/db/query/optimizer/utils/abt_hash.h" #include "mongo/db/query/optimizer/node.h" #include "mongo/db/query/optimizer/utils/utils.h" namespace mongo::optimizer { static size_t computeCollationHash(const properties::CollationRequirement& prop) { size_t collationHash = 17; for (const auto& entry : prop.getCollationSpec()) { updateHash(collationHash, ProjectionName::Hasher()(entry.first)); updateHash(collationHash, std::hash()(entry.second)); } return collationHash; } static size_t computeLimitSkipHash(const properties::LimitSkipRequirement& prop) { size_t limitSkipHash = 17; updateHash(limitSkipHash, std::hash()(prop.getLimit())); updateHash(limitSkipHash, std::hash()(prop.getSkip())); return limitSkipHash; } static size_t computePropertyProjectionsHash(const ProjectionNameVector& projections) { size_t resultHash = 17; for (const ProjectionName& projection : projections) { updateHashUnordered(resultHash, ProjectionName::Hasher()(projection)); } return resultHash; } static size_t computeProjectionRequirementHash(const properties::ProjectionRequirement& prop) { return computePropertyProjectionsHash(prop.getProjections().getVector()); } static size_t computeDistributionHash(const properties::DistributionRequirement& prop) { size_t resultHash = 17; const auto& distribAndProjections = prop.getDistributionAndProjections(); updateHash(resultHash, std::hash()(distribAndProjections._type)); updateHash(resultHash, computePropertyProjectionsHash(distribAndProjections._projectionNames)); return resultHash; } static void updateBoundHash(size_t& result, const BoundRequirement& bound) { updateHash(result, std::hash()(bound.isInclusive())); updateHash(result, ABTHashGenerator::generate(bound.getBound())); }; static void updateCompoundBoundHash(size_t& result, const CompoundBoundRequirement& bound) { updateHash(result, std::hash()(bound.isInclusive())); for (const auto& expr : bound.getBound()) { updateHash(result, ABTHashGenerator::generate(expr)); } } template class BoolExprHasher { public: size_t transport(const typename T::Atom& node) { // Dispatch to one of the computeHash functions below. return computeHash(node.getExpr()); } size_t transport(const typename T::Conjunction& node, const std::vector childResults) { size_t result = 31; for (const size_t childResult : childResults) { updateHash(result, childResult); } return result; } size_t transport(const typename T::Disjunction& node, const std::vector childResults) { size_t result = 29; for (const size_t childResult : childResults) { updateHash(result, childResult); } return result; } size_t compute(const typename T::Node& intervals) { return algebra::transport(intervals, *this); } }; static size_t computeHash(const IntervalRequirement& req) { size_t result = 17; updateBoundHash(result, req.getLowBound()); updateBoundHash(result, req.getHighBound()); return 17; } static size_t computeHash(const CompoundIntervalRequirement& req) { size_t result = 19; updateCompoundBoundHash(result, req.getLowBound()); updateCompoundBoundHash(result, req.getHighBound()); return result; } static size_t computeHash(const PartialSchemaEntry& entry) { const auto& [key, req] = entry; size_t result = 17; if (const auto& projName = key._projectionName) { updateHash(result, ProjectionName::Hasher()(*projName)); } updateHash(result, ABTHashGenerator::generate(key._path)); if (const auto& projName = req.getBoundProjectionName()) { updateHash(result, ProjectionName::Hasher()(*projName)); } BoolExprHasher intervalHasher; updateHash(result, intervalHasher.compute(req.getIntervals())); updateHash(result, std::hash()(req.getIsPerfOnly())); return result; } static size_t computePartialSchemaReqHash(const PartialSchemaRequirements& reqMap) { BoolExprHasher psrHasher; return psrHasher.compute(reqMap.getRoot()); } /** * Hasher for ABT nodes. Used in conjunction with memo. */ class ABTHashTransporter { public: /** * Nodes */ template size_t transport(const T& /*node*/, Ts&&...) { // Physical nodes do not currently need to implement hash. static_assert(!std::is_base_of_v && !std::is_base_of_v && !canBeLogicalNode(), "Logical nodes, paths, and expressions must implement their hash."); uasserted(6624142, "must implement custom hash"); } size_t transport(const References& references, std::vector inResults) { return computeHashSeq<1>(computeVectorHash(inResults)); } size_t transport(const ExpressionBinder& binders, std::vector inResults) { return computeHashSeq<2>( computeVectorHash(binders.names()), computeVectorHash(inResults)); } size_t transport(const ScanNode& node, size_t bindResult) { return computeHashSeq<3>(std::hash()(node.getScanDefName()), bindResult); } size_t transport(const ValueScanNode& node, size_t bindResult) { // Specifically not hashing props here. Those are compared in the equality operator. return computeHashSeq<46>(std::hash()(node.getHasRID()), std::hash()(node.getArraySize()), ABTHashGenerator::generate(node.getValueArray()), bindResult); } size_t transport(const MemoLogicalDelegatorNode& node) { return computeHashSeq<4>(std::hash()(node.getGroupId())); } size_t transport(const FilterNode& node, size_t childResult, size_t filterResult) { return computeHashSeq<5>(filterResult, childResult); } size_t transport(const EvaluationNode& node, size_t childResult, size_t projectionResult) { return computeHashSeq<6>(projectionResult, childResult); } size_t transport(const SargableNode& node, size_t childResult, size_t /*bindResult*/, size_t /*refResult*/) { // Specifically not hashing the candidate indexes and ScanParams. Those are derivative of // the requirements, and can have temp projection names. return computeHashSeq<44>(computePartialSchemaReqHash(node.getReqMap()), std::hash()(node.getTarget()), childResult); } size_t transport(const RIDIntersectNode& node, size_t leftChildResult, size_t rightChildResult) { // Specifically always including children. return computeHashSeq<45>(ProjectionName::Hasher()(node.getScanProjectionName()), leftChildResult, rightChildResult); } size_t transport(const RIDUnionNode& node, size_t leftChildResult, size_t rightChildResult) { return computeHashSeq<47>(ProjectionName::Hasher()(node.getScanProjectionName()), leftChildResult, rightChildResult); } size_t transport(const BinaryJoinNode& node, size_t leftChildResult, size_t rightChildResult, size_t filterResult) { // Specifically always including children. return computeHashSeq<7>(filterResult, leftChildResult, rightChildResult); } size_t transport(const UnionNode& node, std::vector childResults, size_t bindResult, size_t refsResult) { // Specifically always including children. return computeHashSeq<9>(bindResult, refsResult, computeVectorHash(childResults)); } size_t transport(const GroupByNode& node, size_t childResult, size_t bindAggResult, size_t refsAggResult, size_t bindGbResult, size_t refsGbResult) { return computeHashSeq<10>(bindAggResult, refsAggResult, bindGbResult, refsGbResult, std::hash()(node.getType()), childResult); } size_t transport(const UnwindNode& node, size_t childResult, size_t bindResult, size_t refsResult) { return computeHashSeq<11>( std::hash()(node.getRetainNonArrays()), bindResult, refsResult, childResult); } size_t transport(const CollationNode& node, size_t childResult, size_t /*refsResult*/) { return computeHashSeq<13>(computeCollationHash(node.getProperty()), childResult); } size_t transport(const LimitSkipNode& node, size_t childResult) { return computeHashSeq<14>(computeLimitSkipHash(node.getProperty()), childResult); } size_t transport(const ExchangeNode& node, size_t childResult, size_t /*refsResult*/) { return computeHashSeq<43>(computeDistributionHash(node.getProperty()), childResult); } size_t transport(const RootNode& node, size_t childResult, size_t /*refsResult*/) { return computeHashSeq<15>(computeProjectionRequirementHash(node.getProperty()), childResult); } /** * Expressions */ size_t transport(const Blackhole& expr) { return computeHashSeq<16>(); } size_t transport(const Constant& expr) { auto [tag, val] = expr.get(); return computeHashSeq<17>(sbe::value::hashValue(tag, val)); } size_t transport(const Variable& expr) { return computeHashSeq<18>(ProjectionName::Hasher()(expr.name())); } size_t transport(const UnaryOp& expr, size_t inResult) { return computeHashSeq<19>(std::hash()(expr.op()), inResult); } size_t transport(const BinaryOp& expr, size_t leftResult, size_t rightResult) { return computeHashSeq<20>(std::hash()(expr.op()), leftResult, rightResult); } size_t transport(const If& expr, size_t condResult, size_t thenResult, size_t elseResult) { return computeHashSeq<21>(condResult, thenResult, elseResult); } size_t transport(const Let& expr, size_t bindResult, size_t exprResult) { return computeHashSeq<22>(ProjectionName::Hasher()(expr.varName()), bindResult, exprResult); } size_t transport(const LambdaAbstraction& expr, size_t inResult) { return computeHashSeq<23>(ProjectionName::Hasher()(expr.varName()), inResult); } size_t transport(const LambdaApplication& expr, size_t lambdaResult, size_t argumentResult) { return computeHashSeq<24>(lambdaResult, argumentResult); } size_t transport(const FunctionCall& expr, std::vector argResults) { return computeHashSeq<25>(std::hash()(expr.name()), computeVectorHash(argResults)); } size_t transport(const EvalPath& expr, size_t pathResult, size_t inputResult) { return computeHashSeq<26>(pathResult, inputResult); } size_t transport(const EvalFilter& expr, size_t pathResult, size_t inputResult) { return computeHashSeq<27>(pathResult, inputResult); } size_t transport(const Source& expr) { return computeHashSeq<28>(); } /** * Paths */ size_t transport(const PathConstant& path, size_t inResult) { return computeHashSeq<29>(inResult); } size_t transport(const PathLambda& path, size_t inResult) { return computeHashSeq<30>(inResult); } size_t transport(const PathIdentity& path) { return computeHashSeq<31>(); } size_t transport(const PathDefault& path, size_t inResult) { return computeHashSeq<32>(inResult); } size_t transport(const PathCompare& path, size_t valueResult) { return computeHashSeq<33>(std::hash()(path.op()), valueResult); } size_t transport(const PathDrop& path) { size_t namesHash = 17; for (const FieldNameType& name : path.getNames()) { updateHash(namesHash, FieldNameType::Hasher()(name)); } return computeHashSeq<34>(namesHash); } size_t transport(const PathKeep& path) { size_t namesHash = 17; for (const FieldNameType& name : path.getNames()) { updateHash(namesHash, FieldNameType::Hasher()(name)); } return computeHashSeq<35>(namesHash); } size_t transport(const PathObj& path) { return computeHashSeq<36>(); } size_t transport(const PathArr& path) { return computeHashSeq<37>(); } size_t transport(const PathTraverse& path, size_t inResult) { return computeHashSeq<38>(inResult, std::hash()(path.getMaxDepth())); } size_t transport(const PathField& path, size_t inResult) { return computeHashSeq<39>(FieldNameType::Hasher()(path.name()), inResult); } size_t transport(const PathGet& path, size_t inResult) { return computeHashSeq<40>(FieldNameType::Hasher()(path.name()), inResult); } size_t transport(const PathComposeM& path, size_t leftResult, size_t rightResult) { return computeHashSeq<41>(leftResult, rightResult); } size_t transport(const PathComposeA& path, size_t leftResult, size_t rightResult) { return computeHashSeq<42>(leftResult, rightResult); } size_t generate(const ABT& node) { return algebra::transport(node, *this); } size_t generate(const ABT::reference_type& nodeRef) { return algebra::transport(nodeRef, *this); } }; size_t ABTHashGenerator::generate(const ABT& node) { ABTHashTransporter gen; return gen.generate(node); } size_t ABTHashGenerator::generate(const ABT::reference_type& nodeRef) { ABTHashTransporter gen; return gen.generate(nodeRef); } class PhysPropsHasher { public: size_t operator()(const properties::PhysProperty&, const properties::CollationRequirement& prop) { return computeHashSeq<1>(computeCollationHash(prop)); } size_t operator()(const properties::PhysProperty&, const properties::LimitSkipRequirement& prop) { return computeHashSeq<2>(computeLimitSkipHash(prop)); } size_t operator()(const properties::PhysProperty&, const properties::ProjectionRequirement& prop) { return computeHashSeq<3>(computeProjectionRequirementHash(prop)); } size_t operator()(const properties::PhysProperty&, const properties::DistributionRequirement& prop) { return computeHashSeq<4>(computeDistributionHash(prop)); } size_t operator()(const properties::PhysProperty&, const properties::IndexingRequirement& prop) { return computeHashSeq<5>(std::hash()(prop.getIndexReqTarget()), std::hash()(prop.getDedupRID())); } size_t operator()(const properties::PhysProperty&, const properties::RepetitionEstimate& prop) { return computeHashSeq<6>(std::hash()(prop.getEstimate())); } size_t operator()(const properties::PhysProperty&, const properties::LimitEstimate& prop) { return computeHashSeq<7>(CEType::Hasher()(prop.getEstimate())); } static size_t computeHash(const properties::PhysProps& props) { PhysPropsHasher visitor; size_t result = 17; for (const auto& prop : props) { updateHashUnordered(result, prop.second.visit(visitor)); } return result; } }; size_t ABTHashGenerator::generateForPhysProps(const properties::PhysProps& props) { return PhysPropsHasher::computeHash(props); } } // namespace mongo::optimizer