1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
|
#pragma once
#include <cstddef>
#include <functional>
#include <iterator>
#include <vector>
namespace mbgl {
/*
Computes the longest common subsequence (LCS) of sequences A and B, represented
by pairs of random access iterators. The result is output to the provided output
iterator. Equality of elements is determined by the provided comparator, defaulting
to ==.
The algorithm used is the O(ND) time and space algorithm from:
Myers, Eugene W. An O(ND) Difference Algorithm and Its Variations. Algorithmica
(1986) 1: 251. http://xmailserver.org/diff2.pdf
For understanding this algorithm, http://simplygenius.net/Article/DiffTutorial1 is
also helpful.
TODO: implement the O(N) space refinement presented in the paper.
*/
template <class InIt, class OutIt, class Equal>
OutIt longest_common_subsequence(InIt a, InIt endA,
InIt b, InIt endB,
OutIt outIt,
Equal eq) {
const std::ptrdiff_t N = endA - a;
const std::ptrdiff_t M = endB - b;
const std::ptrdiff_t D = N + M;
if (D == 0) {
return outIt;
}
std::vector<std::vector<std::ptrdiff_t>> vs;
// Self-executing lambda to allow `return` to break from inner loop, and avoid shadowing `v`.
[&] () {
std::vector<std::ptrdiff_t> v;
v.resize(2 * D + 1);
v[1] = 0;
// Core of the algorithm: greedily find farthest-reaching D-paths for increasing
// values of D. Store the farthest-reaching endpoints found in each iteration for
// later reconstructing the LCS.
for (std::ptrdiff_t d = 0; d <= D; ++d) {
for (std::ptrdiff_t k = -d; k <= d; k += 2) {
std::ptrdiff_t x = (k == -d || (k != d && v.at(k - 1 + D) < v.at(k + 1 + D)))
? v.at(k + 1 + D) // moving down
: v.at(k - 1 + D) + 1; // moving right
std::ptrdiff_t y = x - k;
while (x < N && y < M && eq(a[x], b[y])) {
x++;
y++;
}
v[k + D] = x;
if (x >= N && y >= M) {
vs.push_back(v);
return;
}
}
vs.push_back(v);
}
}();
std::ptrdiff_t x = N;
std::ptrdiff_t y = M;
using E = typename std::iterator_traits<InIt>::value_type;
std::vector<E> lcsReverse;
// Reconstruct the LCS using the farthest-reaching endpoints stored above.
for (std::ptrdiff_t d = vs.size() - 1; x > 0 || y > 0; --d) {
const std::vector<std::ptrdiff_t>& v = vs.at(d);
const std::ptrdiff_t k = x - y;
const bool down = (k == -d || (k != d && v.at(k - 1 + D) < v.at(k + 1 + D)));
const std::ptrdiff_t kPrev = down ? k + 1 : k - 1;
x = v.at(kPrev + D);
y = x - kPrev;
for (std::ptrdiff_t c = v[k + D]; c != (down ? x : x + 1); --c) {
lcsReverse.push_back(a[c - 1]);
}
}
return std::copy(lcsReverse.rbegin(), lcsReverse.rend(), outIt);
}
template < typename InIt, typename OutIt >
OutIt longest_common_subsequence(InIt a, InIt endA,
InIt b, InIt endB,
OutIt outIt) {
return longest_common_subsequence(a, endA, b, endB, outIt, std::equal_to<>());
}
} // namespace mbgl
|