diff options
-rw-r--r-- | src/couch/src/couch_db.erl | 10 | ||||
-rw-r--r-- | src/couch/src/couch_db_updater.erl | 33 | ||||
-rw-r--r-- | src/couch/src/couch_key_tree.erl | 74 |
3 files changed, 95 insertions, 22 deletions
diff --git a/src/couch/src/couch_db.erl b/src/couch/src/couch_db.erl index 93ea07e65..b47cc7ece 100644 --- a/src/couch/src/couch_db.erl +++ b/src/couch/src/couch_db.erl @@ -870,16 +870,10 @@ prep_and_validate_replicated_updates(Db, [Bucket|RestBuckets], [OldInfo|RestOldI {[], AccErrors}, Bucket), prep_and_validate_replicated_updates(Db, RestBuckets, RestOldInfo, [ValidatedBucket | AccPrepped], AccErrors3); #full_doc_info{rev_tree=OldTree} -> - RevsLimit = get_revs_limit(Db), OldLeafs = couch_key_tree:get_all_leafs_full(OldTree), OldLeafsLU = [{Start, RevId} || {Start, [{RevId, _}|_]} <- OldLeafs], - NewRevTree = lists:foldl( - fun(NewDoc, AccTree) -> - {NewTree, _} = couch_key_tree:merge(AccTree, - couch_doc:to_path(NewDoc), RevsLimit), - NewTree - end, - OldTree, Bucket), + NewPaths = lists:map(fun couch_doc:to_path/1, Bucket), + NewRevTree = couch_key_tree:multi_merge(OldTree, NewPaths), Leafs = couch_key_tree:get_all_leafs_full(NewRevTree), LeafRevsFullDict = dict:from_list( [{{Start, RevId}, FullPath} || {Start, [{RevId, _}|_]}=FullPath <- Leafs]), {ValidatedBucket, AccErrors3} = diff --git a/src/couch/src/couch_db_updater.erl b/src/couch/src/couch_db_updater.erl index a2de3bc60..fba99a773 100644 --- a/src/couch/src/couch_db_updater.erl +++ b/src/couch/src/couch_db_updater.erl @@ -504,23 +504,24 @@ merge_rev_trees(Limit, MergeConflicts, [NewDocs|RestDocsList], [OldDocInfo|RestOldInfo], AccNewInfos, AccRemoveSeqs, AccSeq) -> erlang:put(last_id_merged, OldDocInfo#full_doc_info.id), % for debugging NewDocInfo0 = lists:foldl(fun({Client, NewDoc}, OldInfoAcc) -> - merge_rev_tree(OldInfoAcc, NewDoc, Client, Limit, MergeConflicts) + merge_rev_tree(OldInfoAcc, NewDoc, Client, MergeConflicts) end, OldDocInfo, NewDocs), + NewDocInfo1 = stem_full_doc_info(NewDocInfo0, Limit), % When MergeConflicts is false, we updated #full_doc_info.deleted on every % iteration of merge_rev_tree. However, merge_rev_tree does not update % #full_doc_info.deleted when MergeConflicts is true, since we don't need % to know whether the doc is deleted between iterations. Since we still % need to know if the doc is deleted after the merge happens, we have to % set it here. - NewDocInfo1 = case MergeConflicts of + NewDocInfo2 = case MergeConflicts of true -> - NewDocInfo0#full_doc_info{ - deleted = couch_doc:is_deleted(NewDocInfo0) + NewDocInfo1#full_doc_info{ + deleted = couch_doc:is_deleted(NewDocInfo1) }; false -> - NewDocInfo0 + NewDocInfo1 end, - if NewDocInfo1 == OldDocInfo -> + if NewDocInfo2 == OldDocInfo -> % nothing changed merge_rev_trees(Limit, MergeConflicts, RestDocsList, RestOldInfo, AccNewInfos, AccRemoveSeqs, AccSeq); @@ -529,7 +530,7 @@ merge_rev_trees(Limit, MergeConflicts, [NewDocs|RestDocsList], % important to note that the update_seq on OldDocInfo should % be identical to the value on NewDocInfo1. OldSeq = OldDocInfo#full_doc_info.update_seq, - NewDocInfo2 = NewDocInfo1#full_doc_info{ + NewDocInfo3 = NewDocInfo2#full_doc_info{ update_seq = AccSeq + 1 }, RemoveSeqs = case OldSeq of @@ -537,10 +538,10 @@ merge_rev_trees(Limit, MergeConflicts, [NewDocs|RestDocsList], _ -> [OldSeq | AccRemoveSeqs] end, merge_rev_trees(Limit, MergeConflicts, RestDocsList, RestOldInfo, - [NewDocInfo2|AccNewInfos], RemoveSeqs, AccSeq+1) + [NewDocInfo3|AccNewInfos], RemoveSeqs, AccSeq+1) end. -merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) +merge_rev_tree(OldInfo, NewDoc, Client, false) when OldInfo#full_doc_info.deleted -> % We're recreating a document that was previously % deleted. To check that this is a recreation from @@ -574,7 +575,7 @@ merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) % Merge our modified new doc into the tree #full_doc_info{rev_tree=OldTree} = OldInfo, NewTree0 = couch_doc:to_path(NewDoc2), - case couch_key_tree:merge(OldTree, NewTree0, Limit) of + case couch_key_tree:merge(OldTree, NewTree0) of {NewTree1, new_leaf} -> % We changed the revision id so inform the caller send_result(Client, NewDoc, {ok, {OldPos+1, NewRevId}}), @@ -589,7 +590,7 @@ merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) send_result(Client, NewDoc, conflict), OldInfo end; -merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) -> +merge_rev_tree(OldInfo, NewDoc, Client, false) -> % We're attempting to merge a new revision into an % undeleted document. To not be a conflict we require % that the merge results in extending a branch. @@ -597,7 +598,7 @@ merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) -> OldTree = OldInfo#full_doc_info.rev_tree, NewTree0 = couch_doc:to_path(NewDoc), NewDeleted = NewDoc#doc.deleted, - case couch_key_tree:merge(OldTree, NewTree0, Limit) of + case couch_key_tree:merge(OldTree, NewTree0) of {NewTree, new_leaf} when not NewDeleted -> OldInfo#full_doc_info{ rev_tree = NewTree, @@ -615,14 +616,18 @@ merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) -> send_result(Client, NewDoc, conflict), OldInfo end; -merge_rev_tree(OldInfo, NewDoc, _Client, Limit, true) -> +merge_rev_tree(OldInfo, NewDoc, _Client, true) -> % We're merging in revisions without caring about % conflicts. Most likely this is a replication update. OldTree = OldInfo#full_doc_info.rev_tree, NewTree0 = couch_doc:to_path(NewDoc), - {NewTree, _} = couch_key_tree:merge(OldTree, NewTree0, Limit), + {NewTree, _} = couch_key_tree:merge(OldTree, NewTree0), OldInfo#full_doc_info{rev_tree = NewTree}. +stem_full_doc_info(#full_doc_info{rev_tree = Tree} = Info, Limit) -> + Stemmed = couch_key_tree:stem(Tree, Limit), + Info#full_doc_info{rev_tree = Stemmed}. + update_docs_int(Db, DocsList, LocalDocs, MergeConflicts, FullCommit) -> UpdateSeq = couch_db_engine:get_update_seq(Db), RevsLimit = couch_db_engine:get_revs_limit(Db), diff --git a/src/couch/src/couch_key_tree.erl b/src/couch/src/couch_key_tree.erl index cd661e29a..5d5361507 100644 --- a/src/couch/src/couch_key_tree.erl +++ b/src/couch/src/couch_key_tree.erl @@ -59,6 +59,7 @@ get_key_leafs/2, map/2, map_leafs/2, mapfold/3, +multi_merge/2, merge/3, merge/2, remove_leafs/2, @@ -71,6 +72,15 @@ stem/2 -type revtree() :: [tree()]. +%% @doc Merge multiple paths into the given tree. +-spec multi_merge(revtree(), tree()) -> revtree(). +multi_merge(RevTree, Trees) -> + lists:foldl(fun(Tree, RevTreeAcc) -> + {NewRevTree, _} = merge(RevTreeAcc, Tree), + NewRevTree + end, RevTree, lists:sort(Trees)). + + %% @doc Merge a path into the given tree and then stem the result. %% Although Tree is of type tree(), it must not contain any branches. -spec merge(revtree(), tree() | path(), pos_integer()) -> @@ -470,6 +480,70 @@ map_leafs_simple(Fun, Pos, [{Key, Value, SubTree} | RestTree]) -> stem(Trees, Limit) -> + try + {_, Branches} = lists:foldl(fun(Tree, {Seen, TreeAcc}) -> + {NewSeen, NewBranches} = stem_tree(Tree, Limit, Seen), + {NewSeen, NewBranches ++ TreeAcc} + end, {sets:new(), []}, Trees), + lists:sort(Branches) + catch throw:dupe_keys -> + repair_tree(Trees, Limit) + end. + + +stem_tree({Depth, Child}, Limit, Seen) -> + case stem_tree(Depth, Child, Limit, Seen) of + {NewSeen, _, NewChild, NewBranches} -> + {NewSeen, [{Depth, NewChild} | NewBranches]}; + {NewSeen, _, NewBranches} -> + {NewSeen, NewBranches} + end. + + +stem_tree(_Depth, {Key, _Val, []} = Leaf, Limit, Seen) -> + {check_key(Key, Seen), Limit - 1, Leaf, []}; + +stem_tree(Depth, {Key, Val, Children}, Limit, Seen0) -> + Seen1 = check_key(Key, Seen0), + FinalAcc = lists:foldl(fun(Child, Acc) -> + {SeenAcc, LimitPosAcc, ChildAcc, BranchAcc} = Acc, + case stem_tree(Depth + 1, Child, Limit, SeenAcc) of + {NewSeenAcc, LimitPos, NewChild, NewBranches} -> + NewLimitPosAcc = erlang:max(LimitPos, LimitPosAcc), + NewChildAcc = [NewChild | ChildAcc], + NewBranchAcc = NewBranches ++ BranchAcc, + {NewSeenAcc, NewLimitPosAcc, NewChildAcc, NewBranchAcc}; + {NewSeenAcc, LimitPos, NewBranches} -> + NewLimitPosAcc = erlang:max(LimitPos, LimitPosAcc), + NewBranchAcc = NewBranches ++ BranchAcc, + {NewSeenAcc, NewLimitPosAcc, ChildAcc, NewBranchAcc} + end + end, {Seen1, -1, [], []}, Children), + {FinalSeen, FinalLimitPos, FinalChildren, FinalBranches} = FinalAcc, + case FinalLimitPos of + N when N > 0, length(FinalChildren) > 0 -> + FinalNode = {Key, Val, lists:reverse(FinalChildren)}, + {FinalSeen, FinalLimitPos - 1, FinalNode, FinalBranches}; + 0 when length(FinalChildren) > 0 -> + NewBranches = lists:map(fun(Child) -> + {Depth + 1, Child} + end, lists:reverse(FinalChildren)), + {FinalSeen, -1, NewBranches ++ FinalBranches}; + N when N < 0, length(FinalChildren) == 0 -> + {FinalSeen, FinalLimitPos - 1, FinalBranches} + end. + + +check_key(Key, Seen) -> + case sets:is_element(Key, Seen) of + true -> + throw(dupe_keys); + false -> + sets:add_element(Key, Seen) + end. + + +repair_tree(Trees, Limit) -> % flatten each branch in a tree into a tree path, sort by starting rev # Paths = lists:sort(lists:map(fun({Pos, Path}) -> StemmedPath = lists:sublist(Path, Limit), |