summaryrefslogtreecommitdiff
path: root/chromium/v8/src/debug/liveedit-diff.cc
blob: ff2f8aa88a9a9c1b9ee4d203a897cd02c9400af8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
// Copyright 2022 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "src/debug/liveedit-diff.h"

#include <cmath>
#include <map>
#include <vector>

#include "src/base/logging.h"
#include "src/base/optional.h"

namespace v8 {
namespace internal {

namespace {

// Implements Myer's Algorithm from
// "An O(ND) Difference Algorithm and Its Variations", particularly the
// linear space refinement mentioned in section 4b.
//
// The differ is input agnostic.
//
// The algorithm works by finding the shortest edit string (SES) in the edit
// graph. The SES describes how to get from a string A of length N to a string
// B of length M via deleting from A and inserting from B.
//
// Example: A = "abbaa", B = "abab"
//
//                  A
//
//          a   b   b   a    a
//        o---o---o---o---o---o
//      a | \ |   |   | \ | \ |
//        o---o---o---o---o---o
//      b |   | \ | \ |   |   |
//  B     o---o---o---o---o---o
//      a | \ |   |   | \ | \ |
//        o---o---o---o---o---o
//      b |   | \ | \ |   |   |
//        o---o---o---o---o---o
//
// The edit graph is constructed with the characters from string A on the x-axis
// and the characters from string B on the y-axis. Starting from (0, 0) we can:
//
//     - Move right, which is equivalent to deleting from A
//     - Move downwards, which is equivalent to inserting from B
//     - Move diagonally if the characters from string A and B match, which
//       means no insertion or deletion.
//
// Any path from (0, 0) to (N, M) describes a valid edit string, but we try to
// find the path with the most diagonals, conversely that is the path with the
// least insertions or deletions.
// Note that a path with "D" insertions/deletions is called a D-path.
class MyersDiffer {
 private:
  // A point in the edit graph.
  struct Point {
    int x, y;

    // Less-than for a point in the edit graph is defined as less than in both
    // components (i.e. at least one diagonal away).
    bool operator<(const Point& other) const {
      return x < other.x && y < other.y;
    }
  };

  // Describes a rectangle in the edit graph.
  struct EditGraphArea {
    Point top_left, bottom_right;

    int width() const { return bottom_right.x - top_left.x; }
    int height() const { return bottom_right.y - top_left.y; }
    int size() const { return width() + height(); }
    int delta() const { return width() - height(); }
  };

  // A path or path-segment through the edit graph. Not all points along
  // the path are necessarily listed since it is trivial to figure out all
  // the concrete points along a snake.
  struct Path {
    std::vector<Point> points;

    void Add(const Point& p) { points.push_back(p); }
    void Add(const Path& p) {
      points.insert(points.end(), p.points.begin(), p.points.end());
    }
  };

  // A snake is a path between two points that is either:
  //
  //     - A single right or down move followed by a (possibly empty) list of
  //       diagonals (in the normal case).
  //     - A (possibly empty) list of diagonals followed by a single right or
  //       or down move (in the reverse case).
  struct Snake {
    Point from, to;
  };

  // A thin wrapper around std::vector<int> that allows negative indexing.
  //
  // This class stores the x-value of the furthest reaching path
  // for each k-diagonal. k-diagonals are numbered from -M to N and defined
  // by y(x) = x - k.
  //
  // We only store the x-value instead of the full point since we can
  // calculate y via y = x - k.
  class FurthestReaching {
   public:
    explicit FurthestReaching(std::vector<int>::size_type size) : v_(size) {}

    int& operator[](int index) {
      const size_t idx = index >= 0 ? index : v_.size() + index;
      return v_[idx];
    }

    const int& operator[](int index) const {
      const size_t idx = index >= 0 ? index : v_.size() + index;
      return v_[idx];
    }

   private:
    std::vector<int> v_;
  };

  class ResultWriter;

  Comparator::Input* input_;
  Comparator::Output* output_;

  // Stores the x-value of the furthest reaching path for each k-diagonal.
  // k-diagonals are numbered from '-height' to 'width', centered on (0,0) and
  // are defined by y(x) = x - k.
  FurthestReaching fr_forward_;

  // Stores the x-value of the furthest reaching reverse path for each
  // l-diagonal. l-diagonals are numbered from '-width' to 'height' and centered
  // on 'bottom_right' of the edit graph area.
  // k-diagonals and l-diagonals represent the same diagonals. While we refer to
  // the diagonals as k-diagonals when calculating SES from (0,0), we refer to
  // the diagonals as l-diagonals when calculating SES from (M,N).
  // The corresponding k-diagonal name of an l-diagonal is: k = l + delta
  // where delta = width -height.
  FurthestReaching fr_reverse_;

  MyersDiffer(Comparator::Input* input, Comparator::Output* output)
      : input_(input),
        output_(output),
        fr_forward_(input->GetLength1() + input->GetLength2() + 1),
        fr_reverse_(input->GetLength1() + input->GetLength2() + 1) {
    // Length1 + Length2 + 1 is the upper bound for our work arrays.
    // We allocate the work arrays once and re-use them for all invocations of
    // `FindMiddleSnake`.
  }

  base::Optional<Path> FindEditPath() {
    return FindEditPath(Point{0, 0},
                        Point{input_->GetLength1(), input_->GetLength2()});
  }

  // Returns the path of the SES between `from` and `to`.
  base::Optional<Path> FindEditPath(Point from, Point to) {
    // Divide the area described by `from` and `to` by finding the
    // middle snake ...
    base::Optional<Snake> snake = FindMiddleSnake(from, to);

    if (!snake) return base::nullopt;

    // ... and then conquer the two resulting sub-areas.
    base::Optional<Path> head = FindEditPath(from, snake->from);
    base::Optional<Path> tail = FindEditPath(snake->to, to);

    // Combine `head` and `tail` or use the snake start/end points for
    // zero-size areas.
    Path result;
    if (head) {
      result.Add(*head);
    } else {
      result.Add(snake->from);
    }

    if (tail) {
      result.Add(*tail);
    } else {
      result.Add(snake->to);
    }
    return result;
  }

  // Returns the snake in the middle of the area described by `from` and `to`.
  //
  // Incrementally calculates the D-paths (starting from 'from') and the
  // "reverse" D-paths (starting from 'to') until we find a "normal" and a
  // "reverse" path that overlap. That is we first calculate the normal
  // and reverse 0-path, then the normal and reverse 1-path and so on.
  //
  // If a step from a (d-1)-path to a d-path overlaps with a reverse path on
  // the same diagonal (or the other way around), then we consider that step
  // our middle snake and return it immediately.
  base::Optional<Snake> FindMiddleSnake(Point from, Point to) {
    EditGraphArea area{from, to};
    if (area.size() == 0) return base::nullopt;

    // Initialise the furthest reaching vectors with an "artificial" edge
    // from (0, -1) -> (0, 0) and (N, -M) -> (N, M) to serve as the initial
    // snake when d = 0.
    fr_forward_[1] = area.top_left.x;
    fr_reverse_[-1] = area.bottom_right.x;

    for (int d = 0; d <= std::ceil(area.size() / 2.0f); ++d) {
      if (auto snake = ShortestEditForward(area, d)) return snake;
      if (auto snake = ShortestEditReverse(area, d)) return snake;
    }

    return base::nullopt;
  }

  // Greedily calculates the furthest reaching `d`-paths for each k-diagonal
  // where k is in [-d, d].  For each k-diagonal we look at the furthest
  // reaching `d-1`-path on the `k-1` and `k+1` depending on which is further
  // along the x-axis we either add an insertion from the `k+1`-diagonal or
  // a deletion from the `k-1`-diagonal. Then we follow all possible diagonal
  // moves and finally record the result as the furthest reaching path on the
  // k-diagonal.
  base::Optional<Snake> ShortestEditForward(const EditGraphArea& area, int d) {
    Point from, to;
    // We alternate between looking at odd and even k-diagonals. That is
    // because when we extend a `d-path` by a single move we can at most move
    // one diagonal over. That is either move from `k-1` to `k` or from `k+1` to
    // `k`. That is if `d` is even (odd) then we require only the odd (even)
    // k-diagonals calculated in step `d-1`.
    for (int k = -d; k <= d; k += 2) {
      if (k == -d || (k != d && fr_forward_[k - 1] < fr_forward_[k + 1])) {
        // Move downwards, i.e. add an insertion, because either we are at the
        // edge and downwards is the only way we can move, or because the
        // `d-1`-path along the `k+1` diagonal reaches further on the x-axis
        // than the `d-1`-path along the `k-1` diagonal.
        from.x = to.x = fr_forward_[k + 1];
      } else {
        // Move right, i.e. add a deletion.
        from.x = fr_forward_[k - 1];
        to.x = from.x + 1;
      }

      // Calculate y via y = x - k. We need to adjust k though since the k=0
      // diagonal is centered on `area.top_left` and not (0, 0).
      to.y = area.top_left.y + (to.x - area.top_left.x) - k;
      from.y = (d == 0 || from.x != to.x) ? to.y : to.y - 1;

      // Extend the snake diagonally as long as we can.
      while (to < area.bottom_right && input_->Equals(to.x, to.y)) {
        ++to.x;
        ++to.y;
      }

      fr_forward_[k] = to.x;

      // Check whether there is a reverse path on this k-diagonal which we
      // are overlapping with. If yes, that is our snake.
      const bool odd = area.delta() % 2 != 0;
      const int l = k - area.delta();
      if (odd && l >= (-d + 1) && l <= d - 1 && to.x >= fr_reverse_[l]) {
        return Snake{from, to};
      }
    }
    return base::nullopt;
  }

  // Greedily calculates the furthest reaching reverse `d`-paths for each
  // l-diagonal where l is in [-d, d].
  // Works the same as `ShortestEditForward` but we move upwards and left
  // instead.
  base::Optional<Snake> ShortestEditReverse(const EditGraphArea& area, int d) {
    Point from, to;
    // We alternate between looking at odd and even l-diagonals. That is
    // because when we extend a `d-path` by a single move we can at most move
    // one diagonal over. That is either move from `l-1` to `l` or from `l+1` to
    // `l`. That is if `d` is even (odd) then we require only the odd (even)
    // l-diagonals calculated in step `d-1`.
    for (int l = d; l >= -d; l -= 2) {
      if (l == d || (l != -d && fr_reverse_[l - 1] > fr_reverse_[l + 1])) {
        // Move upwards, i.e. add an insertion, because either we are at the
        // edge and upwards is the only way we can move, or because the
        // `d-1`-path along the `l-1` diagonal reaches further on the x-axis
        // than the `d-1`-path along the `l+1` diagonal.
        from.x = to.x = fr_reverse_[l - 1];
      } else {
        // Move left, i.e. add a deletion.
        from.x = fr_reverse_[l + 1];
        to.x = from.x - 1;
      }

      // Calculate y via y = x - k. We need to adjust k though since the k=0
      // diagonal is centered on `area.top_left` and not (0, 0).
      const int k = l + area.delta();
      to.y = area.top_left.y + (to.x - area.top_left.x) - k;
      from.y = (d == 0 || from.x != to.x) ? to.y : to.y + 1;

      // Extend the snake diagonally as long as we can.
      while (area.top_left < to && input_->Equals(to.x - 1, to.y - 1)) {
        --to.x;
        --to.y;
      }

      fr_reverse_[l] = to.x;

      // Check whether there is a path on this k-diagonal which we
      // are overlapping with. If yes, that is our snake.
      const bool even = area.delta() % 2 == 0;
      if (even && k >= -d && k <= d && to.x <= fr_forward_[k]) {
        // Invert the points so the snake goes left to right, top to bottom.
        return Snake{to, from};
      }
    }
    return base::nullopt;
  }

  // Small helper class that converts a "shortest edit script" path into a
  // source mapping. The result is a list of "chunks" where each "chunk"
  // describes a range in the input string and where it can now be found
  // in the output string.
  //
  // The list of chunks can be calculated in a simple pass over all the points
  // of the edit path:
  //
  //     - For any diagonal we close and report the current chunk if there is
  //       one open at the moment.
  //     - For an insertion or deletion we open a new chunk if none is ongoing.
  class ResultWriter {
   public:
    explicit ResultWriter(Comparator::Output* output) : output_(output) {}

    void RecordNoModification(const Point& from) {
      if (!change_is_ongoing_) return;

      // We close the current chunk, going from `change_start_` to `from`.
      CHECK(change_start_);
      output_->AddChunk(change_start_->x, change_start_->y,
                        from.x - change_start_->x, from.y - change_start_->y);
      change_is_ongoing_ = false;
    }

    void RecordInsertionOrDeletion(const Point& from) {
      if (change_is_ongoing_) return;

      // We start a new chunk beginning at `from`.
      change_start_ = from;
      change_is_ongoing_ = true;
    }

   private:
    Comparator::Output* output_;
    bool change_is_ongoing_ = false;
    base::Optional<Point> change_start_;
  };

  // Takes an edit path and "fills in the blanks". That is we notify the
  // `ResultWriter` after each single downwards, left or diagonal move.
  void WriteResult(const Path& path) {
    ResultWriter writer(output_);

    for (size_t i = 1; i < path.points.size(); ++i) {
      Point p1 = path.points[i - 1];
      Point p2 = path.points[i];

      p1 = WalkDiagonal(writer, p1, p2);
      const int cmp = (p2.x - p1.x) - (p2.y - p1.y);
      if (cmp == -1) {
        writer.RecordInsertionOrDeletion(p1);
        p1.y++;
      } else if (cmp == 1) {
        writer.RecordInsertionOrDeletion(p1);
        p1.x++;
      }

      p1 = WalkDiagonal(writer, p1, p2);
      DCHECK(p1.x == p2.x && p1.y == p2.y);
    }

    // Write one diagonal in the end to flush out any open chunk.
    writer.RecordNoModification(path.points.back());
  }

  Point WalkDiagonal(ResultWriter& writer, Point p1, Point p2) {
    while (p1.x < p2.x && p1.y < p2.y && input_->Equals(p1.x, p1.y)) {
      writer.RecordNoModification(p1);
      p1.x++;
      p1.y++;
    }
    return p1;
  }

 public:
  static void MyersDiff(Comparator::Input* input, Comparator::Output* output) {
    MyersDiffer differ(input, output);
    auto result = differ.FindEditPath();
    if (!result) return;  // Empty input doesn't produce a path.

    differ.WriteResult(*result);
  }
};

}  // namespace

void Comparator::CalculateDifference(Comparator::Input* input,
                                     Comparator::Output* result_writer) {
  MyersDiffer::MyersDiff(input, result_writer);
}

}  // namespace internal
}  // namespace v8