From c2a5894f2dbe9982830066ab9347b059e6e7d845 Mon Sep 17 00:00:00 2001 From: John Firebaugh Date: Tue, 25 Apr 2017 18:20:26 -0700 Subject: [core] Immutable Impls --- src/mbgl/util/longest_common_subsequence.hpp | 106 +++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 src/mbgl/util/longest_common_subsequence.hpp (limited to 'src/mbgl/util/longest_common_subsequence.hpp') diff --git a/src/mbgl/util/longest_common_subsequence.hpp b/src/mbgl/util/longest_common_subsequence.hpp new file mode 100644 index 0000000000..ac127c6356 --- /dev/null +++ b/src/mbgl/util/longest_common_subsequence.hpp @@ -0,0 +1,106 @@ +#pragma once + +#include +#include +#include +#include + +namespace mbgl { + +/* + Computes the longest common subsequence (LCS) of sequences A and B, represented + by pairs of random access iterators. The result is output to the provided output + iterator. Equality of elements is determined by the provided comparator, defaulting + to ==. + + The algorithm used is the O(ND) time and space algorithm from: + + Myers, Eugene W. An O(ND) Difference Algorithm and Its Variations. Algorithmica + (1986) 1: 251. http://xmailserver.org/diff2.pdf + + For understanding this algorithm, http://simplygenius.net/Article/DiffTutorial1 is + also helpful. + + TODO: implement the O(N) space refinement presented in the paper. +*/ +template +OutIt longest_common_subsequence(InIt a, InIt endA, + InIt b, InIt endB, + OutIt outIt, + Equal eq) { + const std::ptrdiff_t N = endA - a; + const std::ptrdiff_t M = endB - b; + const std::ptrdiff_t D = N + M; + + if (D == 0) { + return outIt; + } + + std::vector> vs; + + // Self-executing lambda to allow `return` to break from inner loop, and avoid shadowing `v`. + [&] () { + std::vector v; + v.resize(2 * D + 1); + v[1] = 0; + + // Core of the algorithm: greedily find farthest-reaching D-paths for increasing + // values of D. Store the farthest-reaching endpoints found in each iteration for + // later reconstructing the LCS. + for (std::ptrdiff_t d = 0; d <= D; ++d) { + for (std::ptrdiff_t k = -d; k <= d; k += 2) { + std::ptrdiff_t x = (k == -d || (k != d && v.at(k - 1 + D) < v.at(k + 1 + D))) + ? v.at(k + 1 + D) // moving down + : v.at(k - 1 + D) + 1; // moving right + + std::ptrdiff_t y = x - k; + + while (x < N && y < M && eq(a[x], b[y])) { + x++; + y++; + } + + v[k + D] = x; + + if (x >= N && y >= M) { + vs.push_back(v); + return; + } + } + + vs.push_back(v); + } + }(); + + std::ptrdiff_t x = N; + std::ptrdiff_t y = M; + + using E = typename std::iterator_traits::value_type; + std::vector lcsReverse; + + // Reconstruct the LCS using the farthest-reaching endpoints stored above. + for (std::ptrdiff_t d = vs.size() - 1; x > 0 || y > 0; --d) { + const std::vector& v = vs.at(d); + const std::ptrdiff_t k = x - y; + const bool down = (k == -d || (k != d && v.at(k - 1 + D) < v.at(k + 1 + D))); + const std::ptrdiff_t kPrev = down ? k + 1 : k - 1; + + x = v.at(kPrev + D); + y = x - kPrev; + + for (std::ptrdiff_t c = v[k + D]; c != (down ? x : x + 1); --c) { + lcsReverse.push_back(a[c - 1]); + } + } + + return std::copy(lcsReverse.rbegin(), lcsReverse.rend(), outIt); +} + +template < typename InIt, typename OutIt > +OutIt longest_common_subsequence(InIt a, InIt endA, + InIt b, InIt endB, + OutIt outIt) { + return longest_common_subsequence(a, endA, b, endB, outIt, std::equal_to<>()); +} + +} -- cgit v1.2.1