diff options
Diffstat (limited to 'diff-lcs/tags/release-1.1.1/lib/diff/lcs.rb')
-rw-r--r-- | diff-lcs/tags/release-1.1.1/lib/diff/lcs.rb | 1105 |
1 files changed, 0 insertions, 1105 deletions
diff --git a/diff-lcs/tags/release-1.1.1/lib/diff/lcs.rb b/diff-lcs/tags/release-1.1.1/lib/diff/lcs.rb deleted file mode 100644 index 78fe1cd..0000000 --- a/diff-lcs/tags/release-1.1.1/lib/diff/lcs.rb +++ /dev/null @@ -1,1105 +0,0 @@ -#! /usr/env/bin ruby -#-- -# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca> -# adapted from: -# Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com> -# Smalltalk by Mario I. Wolczko <mario@wolczko.com> -# implements McIlroy-Hunt diff algorithm -# -# This program is free software. It may be redistributed and/or modified -# under the terms of the GPL version 2 (or later), the Perl Artistic -# licence, or the Ruby licence. -# -# $Id$ -#++ - -module Diff - # = Diff::LCS 1.1.1 - # Computes "intelligent" differences between two sequenced Enumerables. - # This is an implementation of the McIlroy-Hunt "diff" algorithm for - # Enumerable objects that include Diffable. - # - # Based on Mario I. Wolczko's <mario@wolczko.com> Smalltalk version - # (1.2, 1993) and Ned Konz's <perl@bike-nomad.com> Perl version - # (Algorithm::Diff). - # - # == Synopsis - # require 'diff/lcs' - # - # seq1 = %w(a b c e h j l m n p) - # seq2 = %w(b c d e f j k l m r s t) - # - # lcs = Diff::LCS.LCS(seq1, seq2) - # diffs = Diff::LCS.diff(seq1, seq2) - # sdiff = Diff::LCS.sdiff(seq1, seq2) - # seq = Diff::LCS.traverse_sequences(seq1, seq2, callback_obj) - # bal = Diff::LCS.traverse_balanced(seq1, seq2, callback_obj) - # seq2 == Diff::LCS.patch(seq1, diffs) - # seq2 == Diff::LCS.patch!(seq1, diffs) - # seq1 == Diff::LCS.unpatch(seq2, diffs) - # seq1 == Diff::LCS.unpatch!(seq2, diffs) - # seq2 == Diff::LCS.patch(seq1, sdiff) - # seq2 == Diff::LCS.patch!(seq1, sdiff) - # seq1 == Diff::LCS.unpatch(seq2, sdiff) - # seq1 == Diff::LCS.unpatch!(seq2, sdiff) - # - # Alternatively, objects can be extended with Diff::LCS: - # - # seq1.extend(Diff::LCS) - # lcs = seq1.lcs(seq2) - # diffs = seq1.diff(seq2) - # sdiff = seq1.sdiff(seq2) - # seq = seq1.traverse_sequences(seq2, callback_obj) - # bal = seq1.traverse_balanced(seq2, callback_obj) - # seq2 == seq1.patch(diffs) - # seq2 == seq1.patch!(diffs) - # seq1 == seq2.unpatch(diffs) - # seq1 == seq2.unpatch!(diffs) - # seq2 == seq1.patch(sdiff) - # seq2 == seq1.patch!(sdiff) - # seq1 == seq2.unpatch(sdiff) - # seq1 == seq2.unpatch!(sdiff) - # - # Default extensions are provided for Array and String objects through - # the use of 'diff/lcs/array' and 'diff/lcs/string'. - # - # == Introduction (by Mark-Jason Dominus) - # - # <em>The following text is from the Perl documentation. The only - # changes have been to make the text appear better in Rdoc</em>. - # - # I once read an article written by the authors of +diff+; they said - # that they hard worked very hard on the algorithm until they found the - # right one. - # - # I think what they ended up using (and I hope someone will correct me, - # because I am not very confident about this) was the `longest common - # subsequence' method. In the LCS problem, you have two sequences of - # items: - # - # a b c d f g h j q z - # a b c d e f g i j k r x y z - # - # and you want to find the longest sequence of items that is present in - # both original sequences in the same order. That is, you want to find a - # new sequence *S* which can be obtained from the first sequence by - # deleting some items, and from the second sequence by deleting other - # items. You also want *S* to be as long as possible. In this case *S* - # is: - # - # a b c d f g j z - # - # From there it's only a small step to get diff-like output: - # - # e h i k q r x y - # + - + + - + + + - # - # This module solves the LCS problem. It also includes a canned function - # to generate +diff+-like output. - # - # It might seem from the example above that the LCS of two sequences is - # always pretty obvious, but that's not always the case, especially when - # the two sequences have many repeated elements. For example, consider - # - # a x b y c z p d q - # a b c a x b y c z - # - # A naive approach might start by matching up the +a+ and +b+ that - # appear at the beginning of each sequence, like this: - # - # a x b y c z p d q - # a b c a b y c z - # - # This finds the common subsequence +a b c z+. But actually, the LCS is - # +a x b y c z+: - # - # a x b y c z p d q - # a b c a x b y c z - # - # == Author - # This version is by Austin Ziegler <diff-lcs@halostatue.ca>. - # - # It is based on the Perl Algorithm::Diff by Ned Konz - # <perl@bike-nomad.com>, copyright © 2000 - 2002 and the Smalltalk - # diff version by Mario I. Wolczko <mario@wolczko.com>, copyright © - # 1993. Documentation includes work by Mark-Jason Dominus. - # - # == Licence - # Copyright © 2004 Austin Ziegler - # This program is free software; you can redistribute it and/or modify it - # under the same terms as Ruby, or alternatively under the Perl Artistic - # licence. - # - # == Credits - # Much of the documentation is taken directly from the Perl - # Algorithm::Diff implementation and was written originally by Mark-Jason - # Dominus <mjd-perl-diff@plover.com> and later by Ned Konz. The basic Ruby - # implementation was re-ported from the Smalltalk implementation, available - # at ftp://st.cs.uiuc.edu/pub/Smalltalk/MANCHESTER/manchester/4.0/diff.st - # - # #sdiff and #traverse_balanced were written for the Perl version by Mike - # Schilli <m@perlmeister.com>. - # - # "The algorithm is described in <em>A Fast Algorithm for Computing Longest - # Common Subsequences</em>, CACM, vol.20, no.5, pp.350-353, May 1977, with - # a few minor improvements to improve the speed." - module LCS - VERSION = '1.1.1' - end -end - -require 'diff/lcs/callbacks' - -module Diff::LCS - # Returns an Array containing the longest common subsequence(s) between - # +self+ and +other+. See Diff::LCS#LCS. - # - # lcs = seq1.lcs(seq2) - def lcs(other, &block) #:yields self[ii] if there are matched subsequences: - Diff::LCS.LCS(self, other, &block) - end - - # Returns the difference set between +self+ and +other+. See - # Diff::LCS#diff. - def diff(other, callbacks = nil, &block) - Diff::LCS::diff(self, other, callbacks, &block) - end - - # Returns the balanced ("side-by-side") difference set between +self+ and - # +other+. See Diff::LCS#sdiff. - def sdiff(other, callbacks = nil, &block) - Diff::LCS::sdiff(self, other, callbacks, &block) - end - - # Traverses the discovered longest common subsequences between +self+ and - # +other+. See Diff::LCS#traverse_sequences. - def traverse_sequences(other, callbacks = nil, &block) - traverse_sequences(self, other, callbacks || Diff::LCS::YieldingCallbacks, - &block) - end - - # Traverses the discovered longest common subsequences between +self+ and - # +other+ using the alternate, balanced algorithm. See - # Diff::LCS#traverse_balanced. - def traverse_balanced(other, callbacks = nil, &block) - traverse_balanced(self, other, callbacks || Diff::LCS::YieldingCallbacks, - &block) - end - - # Attempts to patch a copy of +self+ with the provided +patchset+. See - # Diff::LCS#patch. - def patch(patchset) - Diff::LCS::patch(self.dup, patchset) - end - - # Attempts to unpatch a copy of +self+ with the provided +patchset+. - # See Diff::LCS#patch. - def unpatch(patchset) - Diff::LCS::unpatch(self.dup, patchset) - end - - # Attempts to patch +self+ with the provided +patchset+. See - # Diff::LCS#patch!. Does no autodiscovery. - def patch!(patchset) - Diff::LCS::patch!(self, patchset) - end - - # Attempts to unpatch +self+ with the provided +patchset+. See - # Diff::LCS#unpatch. Does no autodiscovery. - def unpatch!(patchset) - Diff::LCS::unpatch!(self, patchset) - end -end - -module Diff::LCS - class << self - # Given two sequenced Enumerables, LCS returns an Array containing their - # longest common subsequences. - # - # lcs = Diff::LCS.LCS(seq1, seq2) - # - # This array whose contents is such that: - # - # lcs.each_with_index do |ee, ii| - # assert(ee.nil? || (seq1[ii] == seq2[ee])) - # end - # - # If a block is provided, the matching subsequences will be yielded from - # +seq1+ in turn and may be modified before they are placed into the - # returned Array of subsequences. - def LCS(seq1, seq2, &block) #:yields seq1[ii] for each matched: - matches = Diff::LCS.__lcs(seq1, seq2) - ret = [] - matches.each_with_index do |ee, ii| - unless matches[ii].nil? - if block_given? - ret << (yield seq1[ii]) - else - ret << seq1[ii] - end - end - end - ret - end - - # Diff::LCS.diff computes the smallest set of additions and deletions - # necessary to turn the first sequence into the second, and returns a - # description of these changes. - # - # See Diff::LCS::DiffCallbacks for the default behaviour. An alternate - # behaviour may be implemented with Diff::LCS::ContextDiffCallbacks. - # If a Class argument is provided for +callbacks+, #diff will attempt - # to initialise it. If the +callbacks+ object (possibly initialised) - # responds to #finish, it will be called. - def diff(seq1, seq2, callbacks = nil, &block) # :yields diff changes: - callbacks ||= Diff::LCS::DiffCallbacks - if callbacks.kind_of?(Class) - cb = callbacks.new rescue callbacks - callbacks = cb - end - traverse_sequences(seq1, seq2, callbacks) - callbacks.finish if callbacks.respond_to?(:finish) - - if block_given? - res = callbacks.diffs.map do |hunk| - if hunk.kind_of?(Array) - hunk = hunk.map { |block| yield block } - else - yield hunk - end - end - res - else - callbacks.diffs - end - end - - # Diff::LCS.sdiff computes all necessary components to show two sequences - # and their minimized differences side by side, just like the Unix - # utility <em>sdiff</em> does: - # - # old < - - # same same - # before | after - # - > new - # - # See Diff::LCS::SDiffCallbacks for the default behaviour. An alternate - # behaviour may be implemented with Diff::LCS::ContextDiffCallbacks. If - # a Class argument is provided for +callbacks+, #diff will attempt to - # initialise it. If the +callbacks+ object (possibly initialised) - # responds to #finish, it will be called. - def sdiff(seq1, seq2, callbacks = nil, &block) #:yields diff changes: - callbacks ||= Diff::LCS::SDiffCallbacks - if callbacks.kind_of?(Class) - cb = callbacks.new rescue callbacks - callbacks = cb - end - traverse_balanced(seq1, seq2, callbacks) - callbacks.finish if callbacks.respond_to?(:finish) - - if block_given? - res = callbacks.diffs.map do |hunk| - if hunk.kind_of?(Array) - hunk = hunk.map { |block| yield block } - else - yield hunk - end - end - res - else - callbacks.diffs - end - end - - # Diff::LCS.traverse_sequences is the most general facility provided by this - # module; +diff+ and +LCS+ are implemented as calls to it. - # - # The arguments to #traverse_sequences are the two sequences to - # traverse, and a callback object, like this: - # - # traverse_sequences(seq1, seq2, Diff::LCS::ContextDiffCallbacks.new) - # - # #diff is implemented with #traverse_sequences. - # - # == Callback Methods - # Optional callback methods are <em>emphasized</em>. - # - # callbacks#match:: Called when +a+ and +b+ are pointing - # to common elements in +A+ and +B+. - # callbacks#discard_a:: Called when +a+ is pointing to an - # element not in +B+. - # callbacks#discard_b:: Called when +b+ is pointing to an - # element not in +A+. - # <em>callbacks#finished_a</em>:: Called when +a+ has reached the end of - # sequence +A+. - # <em>callbacks#finished_b</em>:: Called when +b+ has reached the end of - # sequence +B+. - # - # == Algorithm - # a---+ - # v - # A = a b c e h j l m n p - # B = b c d e f j k l m r s t - # ^ - # b---+ - # - # If there are two arrows (+a+ and +b+) pointing to elements of - # sequences +A+ and +B+, the arrows will initially point to the first - # elements of their respective sequences. #traverse_sequences will - # advance the arrows through the sequences one element at a time, - # calling a method on the user-specified callback object before each - # advance. It will advance the arrows in such a way that if there are - # elements <tt>A[ii]</tt> and <tt>B[jj]</tt> which are both equal and - # part of the longest common subsequence, there will be some moment - # during the execution of #traverse_sequences when arrow +a+ is pointing - # to <tt>A[ii]</tt> and arrow +b+ is pointing to <tt>B[jj]</tt>. When - # this happens, #traverse_sequences will call <tt>callbacks#match</tt> - # and then it will advance both arrows. - # - # Otherwise, one of the arrows is pointing to an element of its sequence - # that is not part of the longest common subsequence. - # #traverse_sequences will advance that arrow and will call - # <tt>callbacks#discard_a</tt> or <tt>callbacks#discard_b</tt>, depending - # on which arrow it advanced. If both arrows point to elements that are - # not part of the longest common subsequence, then #traverse_sequences - # will advance one of them and call the appropriate callback, but it is - # not specified which it will call. - # - # The methods for <tt>callbacks#match</tt>, <tt>callbacks#discard_a</tt>, - # and <tt>callbacks#discard_b</tt> are invoked with an event comprising - # the action ("=", "+", or "-", respectively), the indicies +ii+ and - # +jj+, and the elements <tt>A[ii]</tt> and <tt>B[jj]</tt>. Return - # values are discarded by #traverse_sequences. - # - # === End of Sequences - # If arrow +a+ reaches the end of its sequence before arrow +b+ does, - # #traverse_sequence try to call <tt>callbacks#finished_a</tt> with the - # last index and element of +A+ (<tt>A[-1]</tt>) and the current index - # and element of +B+ (<tt>B[jj]</tt>). If <tt>callbacks#finished_a</tt> - # does not exist, then <tt>callbacks#discard_b</tt> will be called on - # each element of +B+ until the end of the sequence is reached (the call - # will be done with <tt>A[-1]</tt> and <tt>B[jj]</tt> for each element). - # - # If +b+ reaches the end of +B+ before +a+ reaches the end of +A+, - # <tt>callbacks#finished_b</tt> will be called with the current index - # and element of +A+ (<tt>A[ii]</tt>) and the last index and element of - # +B+ (<tt>A[-1]</tt>). Again, if <tt>callbacks#finished_b</tt> does not - # exist on the callback object, then <tt>callbacks#discard_a</tt> will - # be called on each element of +A+ until the end of the sequence is - # reached (<tt>A[ii]</tt> and <tt>B[-1]</tt>). - # - # There is a chance that one additional <tt>callbacks#discard_a</tt> or - # <tt>callbacks#discard_b</tt> will be called after the end of the - # sequence is reached, if +a+ has not yet reached the end of +A+ or +b+ - # has not yet reached the end of +B+. - def traverse_sequences(seq1, seq2, callbacks = Diff::LCS::SequenceCallbacks, &block) #:yields change events: - matches = Diff::LCS.__lcs(seq1, seq2) - - run_finished_a = run_finished_b = false - string = seq1.kind_of?(String) - - a_size = seq1.size - b_size = seq2.size - ai = bj = 0 - - (0 .. matches.size).each do |ii| - b_line = matches[ii] - - ax = string ? seq1[ii, 1] : seq1[ii] - bx = string ? seq2[bj, 1] : seq2[bj] - - if b_line.nil? - unless ax.nil? - event = Diff::LCS::ContextChange.new('-', ii, ax, bj, bx) - event = yield event if block_given? - callbacks.discard_a(event) - end - else - loop do - break unless bj < b_line - bx = string ? seq2[bj, 1] : seq2[bj] - event = Diff::LCS::ContextChange.new('+', ii, ax, bj, bx) - event = yield event if block_given? - callbacks.discard_b(event) - bj += 1 - end - bx = string ? seq2[bj, 1] : seq2[bj] - event = Diff::LCS::ContextChange.new('=', ii, ax, bj, bx) - event = yield event if block_given? - callbacks.match(event) - bj += 1 - end - ai = ii - end - ai += 1 - - # The last entry (if any) processed was a match. +ai+ and +bj+ point - # just past the last matching lines in their sequences. - while (ai < a_size) or (bj < b_size) - # last A? - if ai == a_size and bj < b_size - if callbacks.respond_to?(:finished_a) and not run_finished_a - ax = string ? seq1[-1, 1] : seq1[-1] - bx = string ? seq2[bj, 1] : seq2[bj] - event = Diff::LCS::ContextChange.new('>', (a_size - 1), ax, bj, bx) - event = yield event if block_given? - callbacks.finished_a(event) - run_finished_a = true - else - ax = string ? seq1[ai, 1] : seq1[ai] - loop do - bx = string ? seq2[bj, 1] : seq2[bj] - event = Diff::LCS::ContextChange.new('+', ai, ax, bj, bx) - event = yield event if block_given? - callbacks.discard_b(event) - bj += 1 - break unless bj < b_size - end - end - end - - # last B? - if bj == b_size and ai < a_size - if callbacks.respond_to?(:finished_b) and not run_finished_b - ax = string ? seq1[ai, 1] : seq1[ai] - bx = string ? seq2[-1, 1] : seq2[-1] - event = Diff::LCS::ContextChange.new('<', ai, ax, (b_size - 1), bx) - event = yield event if block_given? - callbacks.finished_b(event) - run_finished_b = true - else - bx = string ? seq2[bj, 1] : seq2[bj] - loop do - ax = string ? seq1[ai, 1] : seq1[ai] - event = Diff::LCS::ContextChange.new('-', ai, ax, bj, bx) - event = yield event if block_given? - callbacks.discard_a(event) - ai += 1 - break unless bj < b_size - end - end - end - - if ai < a_size - ax = string ? seq1[ai, 1] : seq1[ai] - bx = string ? seq2[bj, 1] : seq2[bj] - event = Diff::LCS::ContextChange.new('-', ai, ax, bj, bx) - event = yield event if block_given? - callbacks.discard_a(event) - ai += 1 - end - - if bj < b_size - ax = string ? seq1[ai, 1] : seq1[ai] - bx = string ? seq2[bj, 1] : seq2[bj] - event = Diff::LCS::ContextChange.new('+', ai, ax, bj, bx) - event = yield event if block_given? - callbacks.discard_b(event) - bj += 1 - end - end - end - - # #traverse_balanced is an alternative to #traverse_sequences. It - # uses a different algorithm to iterate through the entries in the - # computed longest common subsequence. Instead of viewing the changes as - # insertions or deletions from one of the sequences, #traverse_balanced - # will report <em>changes</em> between the sequences. To represent a - # - # The arguments to #traverse_balanced are the two sequences to traverse - # and a callback object, like this: - # - # traverse_balanced(seq1, seq2, Diff::LCS::ContextDiffCallbacks.new) - # - # #sdiff is implemented with #traverse_balanced. - # - # == Callback Methods - # Optional callback methods are <em>emphasized</em>. - # - # callbacks#match:: Called when +a+ and +b+ are pointing - # to common elements in +A+ and +B+. - # callbacks#discard_a:: Called when +a+ is pointing to an - # element not in +B+. - # callbacks#discard_b:: Called when +b+ is pointing to an - # element not in +A+. - # <em>callbacks#change</em>:: Called when +a+ and +b+ are pointing - # to the same relative position, but - # <tt>A[a]</tt> and <tt>B[b]</tt> are - # not the same; a <em>change</em> has - # occurred. - # - # #traverse_balanced might be a bit slower than #traverse_sequences, - # noticable only while processing huge amounts of data. - # - # The +sdiff+ function of this module is implemented as call to - # #traverse_balanced. - # - # == Algorithm - # a---+ - # v - # A = a b c e h j l m n p - # B = b c d e f j k l m r s t - # ^ - # b---+ - # - # === Matches - # If there are two arrows (+a+ and +b+) pointing to elements of - # sequences +A+ and +B+, the arrows will initially point to the first - # elements of their respective sequences. #traverse_sequences will - # advance the arrows through the sequences one element at a time, - # calling a method on the user-specified callback object before each - # advance. It will advance the arrows in such a way that if there are - # elements <tt>A[ii]</tt> and <tt>B[jj]</tt> which are both equal and - # part of the longest common subsequence, there will be some moment - # during the execution of #traverse_sequences when arrow +a+ is pointing - # to <tt>A[ii]</tt> and arrow +b+ is pointing to <tt>B[jj]</tt>. When - # this happens, #traverse_sequences will call <tt>callbacks#match</tt> - # and then it will advance both arrows. - # - # === Discards - # Otherwise, one of the arrows is pointing to an element of its sequence - # that is not part of the longest common subsequence. - # #traverse_sequences will advance that arrow and will call - # <tt>callbacks#discard_a</tt> or <tt>callbacks#discard_b</tt>, - # depending on which arrow it advanced. - # - # === Changes - # If both +a+ and +b+ point to elements that are not part of the longest - # common subsequence, then #traverse_sequences will try to call - # <tt>callbacks#change</tt> and advance both arrows. If - # <tt>callbacks#change</tt> is not implemented, then - # <tt>callbacks#discard_a</tt> and <tt>callbacks#discard_b</tt> will be - # called in turn. - # - # The methods for <tt>callbacks#match</tt>, <tt>callbacks#discard_a</tt>, - # <tt>callbacks#discard_b</tt>, and <tt>callbacks#change</tt> are - # invoked with an event comprising the action ("=", "+", "-", or "!", - # respectively), the indicies +ii+ and +jj+, and the elements - # <tt>A[ii]</tt> and <tt>B[jj]</tt>. Return values are discarded by - # #traverse_balanced. - # - # === Context - # Note that +ii+ and +jj+ may not be the same index position, even if - # +a+ and +b+ are considered to be pointing to matching or changed - # elements. - def traverse_balanced(seq1, seq2, callbacks = Diff::LCS::BalancedCallbacks) - matches = Diff::LCS.__lcs(seq1, seq2) - a_size = seq1.size - b_size = seq2.size - ai = bj = mb = 0 - ma = -1 - string = seq1.kind_of?(String) - - # Process all the lines in the match vector. - loop do - # Find next match indices +ma+ and +mb+ - loop do - ma += 1 - break unless ma < matches.size and matches[ma].nil? - end - - break if ma >= matches.size # end of matches? - mb = matches[ma] - - # Change(seq2) - while (ai < ma) or (bj < mb) - ax = string ? seq1[ai, 1] : seq1[ai] - bx = string ? seq2[bj, 1] : seq2[bj] - - case [(ai < ma), (bj < mb)] - when [true, true] - if callbacks.respond_to?(:change) - event = Diff::LCS::ContextChange.new('!', ai, ax, bj, bx) - event = yield event if block_given? - callbacks.change(event) - ai += 1 - bj += 1 - else - event = Diff::LCS::ContextChange.new('-', ai, ax, bj, bx) - event = yield event if block_given? - callbacks.discard_a(event) - ai += 1 - ax = string ? seq1[ai, 1] : seq1[ai] - event = Diff::LCS::ContextChange.new('+', ai, ax, bj, bx) - event = yield event if block_given? - callbacks.discard_b(event) - bj += 1 - end - when [true, false] - event = Diff::LCS::ContextChange.new('-', ai, ax, bj, bx) - event = yield event if block_given? - callbacks.discard_a(event) - ai += 1 - when [false, true] - event = Diff::LCS::ContextChange.new('+', ai, ax, bj, bx) - event = yield event if block_given? - callbacks.discard_b(event) - bj += 1 - end - end - - # Match - ax = string ? seq1[ai, 1] : seq1[ai] - bx = string ? seq2[bj, 1] : seq2[bj] - event = Diff::LCS::ContextChange.new('=', ai, ax, bj, bx) - event = yield event if block_given? - callbacks.match(event) - ai += 1 - bj += 1 - end - - while (ai < a_size) or (bj < b_size) - ax = string ? seq1[ai, 1] : seq1[ai] - bx = string ? seq2[bj, 1] : seq2[bj] - - case [(ai < a_size), (bj < b_size)] - when [true, true] - if callbacks.respond_to?(:change) - event = Diff::LCS::ContextChange.new('!', ai, ax, bj, bx) - event = yield event if block_given? - callbacks.change(event) - ai += 1 - bj += 1 - else - event = Diff::LCS::ContextChange.new('-', ai, ax, bj, bx) - event = yield event if block_given? - callbacks.discard_a(event) - ai += 1 - ax = string ? seq1[ai, 1] : seq1[ai] - event = Diff::LCS::ContextChange.new('+', ai, ax, bj, bx) - event = yield event if block_given? - callbacks.discard_b(event) - bj += 1 - end - when [true, false] - event = Diff::LCS::ContextChange.new('-', ai, ax, bj, bx) - event = yield event if block_given? - callbacks.discard_a(event) - ai += 1 - when [false, true] - event = Diff::LCS::ContextChange.new('+', ai, ax, bj, bx) - event = yield event if block_given? - callbacks.discard_b(event) - bj += 1 - end - end - end - - PATCH_MAP = { #:nodoc: - :patch => { '+' => '+', '-' => '-', '!' => '!', '=' => '=' }, - :unpatch => { '+' => '-', '-' => '+', '!' => '!', '=' => '=' } - } - - # Given a patchset, convert the current version to the new - # version. If +direction+ is not specified (must be - # <tt>:patch</tt> or <tt>:unpatch</tt>), then discovery of the - # direction of the patch will be attempted. - def patch(src, patchset, direction = nil) - string = src.kind_of?(String) - # Start with a new empty type of the source's class - res = src.class.new - - # Normalize the patchset. - patchset = __normalize_patchset(patchset) - - direction ||= Diff::LCS.__diff_direction(src, patchset) - direction ||= :patch - - ai = bj = 0 - - patchset.each do |change| - # Both Change and ContextChange support #action - action = PATCH_MAP[direction][change.action] - - case change - when Diff::LCS::ContextChange - case direction - when :patch - el = change.new_element - op = change.old_position - np = change.new_position - when :unpatch - el = change.old_element - op = change.new_position - np = change.old_position - end - - case action - when '-' # Remove details from the old string - while ai < op - res << (string ? src[ai, 1] : src[ai]) - ai += 1 - bj += 1 - end - ai += 1 - when '+' - while bj < np - res << (string ? src[ai, 1] : src[ai]) - ai += 1 - bj += 1 - end - - res << el - bj += 1 - when '=' - # This only appears in sdiff output with the SDiff callback. - # Therefore, we only need to worry about dealing with a single - # element. - res << el - - ai += 1 - bj += 1 - when '!' - while ai < op - res << (string ? src[ai, 1] : src[ai]) - ai += 1 - bj += 1 - end - - bj += 1 - ai += 1 - - res << el - end - when Diff::LCS::Change - case action - when '-' - while ai < change.position - res << (string ? src[ai, 1] : src[ai]) - ai += 1 - bj += 1 - end - ai += 1 - when '+' - while bj < change.position - res << (string ? src[ai, 1] : src[ai]) - ai += 1 - bj += 1 - end - - bj += 1 - - res << change.element - end - end - end - - while ai < src.size - res << (string ? src[ai, 1] : src[ai]) - ai += 1 - bj += 1 - end - - res - end - - # Given a set of patchset, convert the current version to the prior - # version. Does no auto-discovery. - def unpatch!(src, patchset) - Diff::LCS.patch(src, patchset, :unpatch) - end - - # Given a set of patchset, convert the current version to the next - # version. Does no auto-discovery. - def patch!(src, patchset) - Diff::LCS.patch(src, patchset, :patch) - end - -# private - # Compute the longest common subsequence between the sequenced Enumerables - # +a+ and +b+. The result is an array whose contents is such that - # - # result = Diff::LCS.__lcs(a, b) - # result.each_with_index do |e, ii| - # assert_equal(a[ii], b[e]) unless e.nil? - # end - def __lcs(a, b) - a_start = b_start = 0 - a_finish = a.size - 1 - b_finish = b.size - 1 - vector = [] - - # Prune off any common elements at the beginning... - while (a_start <= a_finish) and - (b_start <= b_finish) and - (a[a_start] == b[b_start]) - vector[a_start] = b_start - a_start += 1 - b_start += 1 - end - - # Now the end... - while (a_start <= a_finish) and - (b_start <= b_finish) and - (a[a_finish] == b[b_finish]) - vector[a_finish] = b_finish - a_finish -= 1 - b_finish -= 1 - end - - # Now, compute the equivalence classes of positions of elements. - b_matches = Diff::LCS.__position_hash(b, b_start .. b_finish) - - thresh = [] - links = [] - - (a_start .. a_finish).each do |ii| - ai = a.kind_of?(String) ? a[ii, 1] : a[ii] - bm = b_matches[ai] - kk = nil - bm.reverse_each do |jj| - if kk and (thresh[kk] > jj) and (thresh[kk - 1] < jj) - thresh[kk] = jj - else - kk = Diff::LCS.__replace_next_larger(thresh, jj, kk) - end - links[kk] = [ (kk > 0) ? links[kk - 1] : nil, ii, jj ] unless kk.nil? - end - end - - unless thresh.empty? - link = links[thresh.size - 1] - while not link.nil? - vector[link[1]] = link[2] - link = link[0] - end - end - - vector - end - - # Find the place at which +value+ would normally be inserted into the - # Enumerable. If that place is already occupied by +value+, do nothing - # and return +nil+. If the place does not exist (i.e., it is off the end - # of the Enumerable), add it to the end. Otherwise, replace the element - # at that point with +value+. It is assumed that the Enumerable's values - # are numeric. - # - # This operation preserves the sort order. - def __replace_next_larger(enum, value, last_index = nil) - # Off the end? - if enum.empty? or (value > enum[-1]) - enum << value - return enum.size - 1 - end - - # Binary search for the insertion point - last_index ||= enum.size - first_index = 0 - while (first_index <= last_index) - ii = (first_index + last_index) >> 1 - - found = enum[ii] - - if value == found - return nil - elsif value > found - first_index = ii + 1 - else - last_index = ii - 1 - end - end - - # The insertion point is in first_index; overwrite the next larger - # value. - enum[first_index] = value - return first_index - end - - # If +vector+ maps the matching elements of another collection onto this - # Enumerable, compute the inverse +vector+ that maps this Enumerable - # onto the collection. (Currently unused.) - def __inverse_vector(a, vector) - inverse = a.dup - (0 ... vector.size).each do |ii| - inverse[vector[ii]] = ii unless vector[ii].nil? - end - inverse - end - - # Returns a hash mapping each element of an Enumerable to the set of - # positions it occupies in the Enumerable, optionally restricted to the - # elements specified in the range of indexes specified by +interval+. - def __position_hash(enum, interval = 0 .. -1) - hash = Hash.new { |hh, kk| hh[kk] = [] } - interval.each do |ii| - kk = enum.kind_of?(String) ? enum[ii, 1] : enum[ii] - hash[kk] << ii - end - hash - end - - # Examine the patchset and the source to see in which direction the - # patch should be applied. - # - # WARNING: By default, this examines the whole patch, so this could take - # some time. This also works better with Diff::LCS::ContextChange or - # Diff::LCS::Change as its source, as an array will cause the creation - # of one of the above. - def __diff_direction(src, patchset, limit = nil) - count = left = left_miss = right = right_miss = 0 - string = src.kind_of?(String) - - patchset.each do |change| - count += 1 - - case change - when Diff::LCS::Change - # With a simplistic change, we can't tell the difference between - # the left and right on '!' actions, so we ignore those. On '=' - # actions, if there's a miss, we miss both left and right. - element = string ? src[change.position, 1] : src[change.position] - - case change.action - when '-' - if element == change.element - left += 1 - else - left_miss += 1 - end - when '+' - if element == change.element - right += 1 - else - right_miss += 1 - end - when '=' - if element != change.element - left_miss += 1 - right_miss += 1 - end - end - when Diff::LCS::ContextChange - case change.action - when '-' # Remove details from the old string - element = string ? src[change.old_position, 1] : src[change.old_position] - if element == change.old_element - left += 1 - else - left_miss += 1 - end - when '+' - element = string ? src[change.new_position, 1] : src[change.new_position] - if element == change.new_element - right += 1 - else - right_miss += 1 - end - when '=' - le = string ? src[change.old_position, 1] : src[change.old_position] - re = string ? src[change.new_position, 1] : src[change.new_position] - - left_miss += 1 if le != change.old_element - right_miss += 1 if re != change.new_element - when '!' - element = string ? src[change.old_position, 1] : src[change.old_position] - if element == change.old_element - left += 1 - else - element = string ? src[change.new_position, 1] : src[change.new_position] - if element == change.new_element - right += 1 - else - left_miss += 1 - right_miss += 1 - end - end - end - end - - break if not limit.nil? and count > limit - end - - no_left = (left == 0) and (left_miss >= 0) - no_right = (right == 0) and (right_miss >= 0) - - case [no_left, no_right] - when [false, true] - return :patch - when [true, false] - return :unpatch - else - raise "The provided patchset does not appear to apply to the provided value as either source or destination value." - end - end - - # Normalize the patchset. A patchset is always a sequence of changes, but - # how those changes are represented may vary, depending on how they were - # generated. In all cases we support, we also support the array - # representation of the changes. The formats are: - # - # [ # patchset <- Diff::LCS.diff(a, b) - # [ # one or more hunks - # Diff::LCS::Change # one or more changes - # ] ] - # - # [ # patchset, equivalent to the above - # [ # one or more hunks - # [ action, line, value ] # one or more changes - # ] ] - # - # [ # patchset <- Diff::LCS.diff(a, b, Diff::LCS::ContextDiffCallbacks) - # # OR <- Diff::LCS.sdiff(a, b, Diff::LCS::ContextDiffCallbacks) - # [ # one or more hunks - # Diff::LCS::ContextChange # one or more changes - # ] ] - # - # [ # patchset, equivalent to the above - # [ # one or more hunks - # [ action, [ old line, old value ], [ new line, new value ] ] - # # one or more changes - # ] ] - # - # [ # patchset <- Diff::LCS.sdiff(a, b) - # # OR <- Diff::LCS.diff(a, b, Diff::LCS::SDiffCallbacks) - # Diff::LCS::ContextChange # one or more changes - # ] - # - # [ # patchset, equivalent to the above - # [ action, [ old line, old value ], [ new line, new value ] ] - # # one or more changes - # ] - # - # The result of this will be either of the following. - # - # [ # patchset - # Diff::LCS::ContextChange # one or more changes - # ] - # - # [ # patchset - # Diff::LCS::Change # one or more changes - # ] - # - # If either of the above is provided, it will be returned as such. - # - def __normalize_patchset(patchset) - patchset.map do |hunk| - case hunk - when Diff::LCS::ContextChange, Diff::LCS::Change - hunk - when Array - if (not hunk[0].kind_of?(Array)) and hunk[1].kind_of?(Array) and hunk[2].kind_of?(Array) - Diff::LCS::ContextChange.from_a(hunk) - else - hunk.map do |change| - case change - when Diff::LCS::ContextChange, Diff::LCS::Change - change - when Array - # change[1] will ONLY be an array in a ContextChange#to_a call. - # In Change#to_a, it represents the line (singular). - if change[1].kind_of?(Array) - Diff::LCS::ContextChange.from_a(change) - else - Diff::LCS::Change.from_a(change) - end - end - end - end - else - raise ArgumentError, "Cannot normalise a hunk of class #{hunk.class}." - end - end.flatten - end - end -end |