#! /usr/env/bin ruby #-- # Copyright 2004 Austin Ziegler # adapted from: # Algorithm::Diff (Perl) by Ned Konz # Smalltalk by Mario I. Wolczko # implements McIlroy-Hunt diff algorithm # # This program is free software. It may be redistributed and/or modified # under the terms of the GPL version 2 (or later), the Perl Artistic # licence, or the Ruby licence. # # $Id: lcs.rb,v 1.9 2004/10/17 20:31:10 austin Exp $ #++ module Diff # = Diff::LCS 1.1.2 # Computes "intelligent" differences between two sequenced Enumerables. # This is an implementation of the McIlroy-Hunt "diff" algorithm for # Enumerable objects that include Diffable. # # Based on Mario I. Wolczko's Smalltalk version # (1.2, 1993) and Ned Konz's Perl version # (Algorithm::Diff). # # == Synopsis # require 'diff/lcs' # # seq1 = %w(a b c e h j l m n p) # seq2 = %w(b c d e f j k l m r s t) # # lcs = Diff::LCS.LCS(seq1, seq2) # diffs = Diff::LCS.diff(seq1, seq2) # sdiff = Diff::LCS.sdiff(seq1, seq2) # seq = Diff::LCS.traverse_sequences(seq1, seq2, callback_obj) # bal = Diff::LCS.traverse_balanced(seq1, seq2, callback_obj) # seq2 == Diff::LCS.patch(seq1, diffs) # seq2 == Diff::LCS.patch!(seq1, diffs) # seq1 == Diff::LCS.unpatch(seq2, diffs) # seq1 == Diff::LCS.unpatch!(seq2, diffs) # seq2 == Diff::LCS.patch(seq1, sdiff) # seq2 == Diff::LCS.patch!(seq1, sdiff) # seq1 == Diff::LCS.unpatch(seq2, sdiff) # seq1 == Diff::LCS.unpatch!(seq2, sdiff) # # Alternatively, objects can be extended with Diff::LCS: # # seq1.extend(Diff::LCS) # lcs = seq1.lcs(seq2) # diffs = seq1.diff(seq2) # sdiff = seq1.sdiff(seq2) # seq = seq1.traverse_sequences(seq2, callback_obj) # bal = seq1.traverse_balanced(seq2, callback_obj) # seq2 == seq1.patch(diffs) # seq2 == seq1.patch!(diffs) # seq1 == seq2.unpatch(diffs) # seq1 == seq2.unpatch!(diffs) # seq2 == seq1.patch(sdiff) # seq2 == seq1.patch!(sdiff) # seq1 == seq2.unpatch(sdiff) # seq1 == seq2.unpatch!(sdiff) # # Default extensions are provided for Array and String objects through # the use of 'diff/lcs/array' and 'diff/lcs/string'. # # == Introduction (by Mark-Jason Dominus) # # The following text is from the Perl documentation. The only # changes have been to make the text appear better in Rdoc. # # I once read an article written by the authors of +diff+; they said # that they hard worked very hard on the algorithm until they found the # right one. # # I think what they ended up using (and I hope someone will correct me, # because I am not very confident about this) was the `longest common # subsequence' method. In the LCS problem, you have two sequences of # items: # # a b c d f g h j q z # a b c d e f g i j k r x y z # # and you want to find the longest sequence of items that is present in # both original sequences in the same order. That is, you want to find a # new sequence *S* which can be obtained from the first sequence by # deleting some items, and from the second sequence by deleting other # items. You also want *S* to be as long as possible. In this case *S* # is: # # a b c d f g j z # # From there it's only a small step to get diff-like output: # # e h i k q r x y # + - + + - + + + # # This module solves the LCS problem. It also includes a canned function # to generate +diff+-like output. # # It might seem from the example above that the LCS of two sequences is # always pretty obvious, but that's not always the case, especially when # the two sequences have many repeated elements. For example, consider # # a x b y c z p d q # a b c a x b y c z # # A naive approach might start by matching up the +a+ and +b+ that # appear at the beginning of each sequence, like this: # # a x b y c z p d q # a b c a b y c z # # This finds the common subsequence +a b c z+. But actually, the LCS is # +a x b y c z+: # # a x b y c z p d q # a b c a x b y c z # # == Author # This version is by Austin Ziegler . # # It is based on the Perl Algorithm::Diff by Ned Konz # , copyright © 2000 - 2002 and the Smalltalk # diff version by Mario I. Wolczko , copyright © # 1993. Documentation includes work by Mark-Jason Dominus. # # == Licence # Copyright © 2004 Austin Ziegler # This program is free software; you can redistribute it and/or modify it # under the same terms as Ruby, or alternatively under the Perl Artistic # licence. # # == Credits # Much of the documentation is taken directly from the Perl # Algorithm::Diff implementation and was written originally by Mark-Jason # Dominus and later by Ned Konz. The basic Ruby # implementation was re-ported from the Smalltalk implementation, available # at ftp://st.cs.uiuc.edu/pub/Smalltalk/MANCHESTER/manchester/4.0/diff.st # # #sdiff and #traverse_balanced were written for the Perl version by Mike # Schilli . # # "The algorithm is described in A Fast Algorithm for Computing Longest # Common Subsequences, CACM, vol.20, no.5, pp.350-353, May 1977, with # a few minor improvements to improve the speed." module LCS VERSION = '1.1.2' end end require 'diff/lcs/callbacks' module Diff::LCS # Returns an Array containing the longest common subsequence(s) between # +self+ and +other+. See Diff::LCS#LCS. # # lcs = seq1.lcs(seq2) def lcs(other, &block) #:yields self[ii] if there are matched subsequences: Diff::LCS.LCS(self, other, &block) end # Returns the difference set between +self+ and +other+. See # Diff::LCS#diff. def diff(other, callbacks = nil, &block) Diff::LCS::diff(self, other, callbacks, &block) end # Returns the balanced ("side-by-side") difference set between +self+ and # +other+. See Diff::LCS#sdiff. def sdiff(other, callbacks = nil, &block) Diff::LCS::sdiff(self, other, callbacks, &block) end # Traverses the discovered longest common subsequences between +self+ and # +other+. See Diff::LCS#traverse_sequences. def traverse_sequences(other, callbacks = nil, &block) traverse_sequences(self, other, callbacks || Diff::LCS::YieldingCallbacks, &block) end # Traverses the discovered longest common subsequences between +self+ and # +other+ using the alternate, balanced algorithm. See # Diff::LCS#traverse_balanced. def traverse_balanced(other, callbacks = nil, &block) traverse_balanced(self, other, callbacks || Diff::LCS::YieldingCallbacks, &block) end # Attempts to patch a copy of +self+ with the provided +patchset+. See # Diff::LCS#patch. def patch(patchset) Diff::LCS::patch(self.dup, patchset) end # Attempts to unpatch a copy of +self+ with the provided +patchset+. # See Diff::LCS#patch. def unpatch(patchset) Diff::LCS::unpatch(self.dup, patchset) end # Attempts to patch +self+ with the provided +patchset+. See # Diff::LCS#patch!. Does no autodiscovery. def patch!(patchset) Diff::LCS::patch!(self, patchset) end # Attempts to unpatch +self+ with the provided +patchset+. See # Diff::LCS#unpatch. Does no autodiscovery. def unpatch!(patchset) Diff::LCS::unpatch!(self, patchset) end end module Diff::LCS class << self # Given two sequenced Enumerables, LCS returns an Array containing their # longest common subsequences. # # lcs = Diff::LCS.LCS(seq1, seq2) # # This array whose contents is such that: # # lcs.each_with_index do |ee, ii| # assert(ee.nil? || (seq1[ii] == seq2[ee])) # end # # If a block is provided, the matching subsequences will be yielded from # +seq1+ in turn and may be modified before they are placed into the # returned Array of subsequences. def LCS(seq1, seq2, &block) #:yields seq1[ii] for each matched: matches = Diff::LCS.__lcs(seq1, seq2) ret = [] matches.each_with_index do |ee, ii| unless matches[ii].nil? if block_given? ret << (yield seq1[ii]) else ret << seq1[ii] end end end ret end # Diff::LCS.diff computes the smallest set of additions and deletions # necessary to turn the first sequence into the second, and returns a # description of these changes. # # See Diff::LCS::DiffCallbacks for the default behaviour. An alternate # behaviour may be implemented with Diff::LCS::ContextDiffCallbacks. # If a Class argument is provided for +callbacks+, #diff will attempt # to initialise it. If the +callbacks+ object (possibly initialised) # responds to #finish, it will be called. def diff(seq1, seq2, callbacks = nil, &block) # :yields diff changes: callbacks ||= Diff::LCS::DiffCallbacks if callbacks.kind_of?(Class) cb = callbacks.new rescue callbacks callbacks = cb end traverse_sequences(seq1, seq2, callbacks) callbacks.finish if callbacks.respond_to?(:finish) if block_given? res = callbacks.diffs.map do |hunk| if hunk.kind_of?(Array) hunk = hunk.map { |block| yield block } else yield hunk end end res else callbacks.diffs end end # Diff::LCS.sdiff computes all necessary components to show two sequences # and their minimized differences side by side, just like the Unix # utility sdiff does: # # old < - # same same # before | after # - > new # # See Diff::LCS::SDiffCallbacks for the default behaviour. An alternate # behaviour may be implemented with Diff::LCS::ContextDiffCallbacks. If # a Class argument is provided for +callbacks+, #diff will attempt to # initialise it. If the +callbacks+ object (possibly initialised) # responds to #finish, it will be called. def sdiff(seq1, seq2, callbacks = nil, &block) #:yields diff changes: callbacks ||= Diff::LCS::SDiffCallbacks if callbacks.kind_of?(Class) cb = callbacks.new rescue callbacks callbacks = cb end traverse_balanced(seq1, seq2, callbacks) callbacks.finish if callbacks.respond_to?(:finish) if block_given? res = callbacks.diffs.map do |hunk| if hunk.kind_of?(Array) hunk = hunk.map { |block| yield block } else yield hunk end end res else callbacks.diffs end end # Diff::LCS.traverse_sequences is the most general facility provided by this # module; +diff+ and +LCS+ are implemented as calls to it. # # The arguments to #traverse_sequences are the two sequences to # traverse, and a callback object, like this: # # traverse_sequences(seq1, seq2, Diff::LCS::ContextDiffCallbacks.new) # # #diff is implemented with #traverse_sequences. # # == Callback Methods # Optional callback methods are emphasized. # # callbacks#match:: Called when +a+ and +b+ are pointing # to common elements in +A+ and +B+. # callbacks#discard_a:: Called when +a+ is pointing to an # element not in +B+. # callbacks#discard_b:: Called when +b+ is pointing to an # element not in +A+. # callbacks#finished_a:: Called when +a+ has reached the end of # sequence +A+. # callbacks#finished_b:: Called when +b+ has reached the end of # sequence +B+. # # == Algorithm # a---+ # v # A = a b c e h j l m n p # B = b c d e f j k l m r s t # ^ # b---+ # # If there are two arrows (+a+ and +b+) pointing to elements of # sequences +A+ and +B+, the arrows will initially point to the first # elements of their respective sequences. #traverse_sequences will # advance the arrows through the sequences one element at a time, # calling a method on the user-specified callback object before each # advance. It will advance the arrows in such a way that if there are # elements A[ii] and B[jj] which are both equal and # part of the longest common subsequence, there will be some moment # during the execution of #traverse_sequences when arrow +a+ is pointing # to A[ii] and arrow +b+ is pointing to B[jj]. When # this happens, #traverse_sequences will call callbacks#match # and then it will advance both arrows. # # Otherwise, one of the arrows is pointing to an element of its sequence # that is not part of the longest common subsequence. # #traverse_sequences will advance that arrow and will call # callbacks#discard_a or callbacks#discard_b, depending # on which arrow it advanced. If both arrows point to elements that are # not part of the longest common subsequence, then #traverse_sequences # will advance one of them and call the appropriate callback, but it is # not specified which it will call. # # The methods for callbacks#match, callbacks#discard_a, # and callbacks#discard_b are invoked with an event comprising # the action ("=", "+", or "-", respectively), the indicies +ii+ and # +jj+, and the elements A[ii] and B[jj]. Return # values are discarded by #traverse_sequences. # # === End of Sequences # If arrow +a+ reaches the end of its sequence before arrow +b+ does, # #traverse_sequence try to call callbacks#finished_a with the # last index and element of +A+ (A[-1]) and the current index # and element of +B+ (B[jj]). If callbacks#finished_a # does not exist, then callbacks#discard_b will be called on # each element of +B+ until the end of the sequence is reached (the call # will be done with A[-1] and B[jj] for each element). # # If +b+ reaches the end of +B+ before +a+ reaches the end of +A+, # callbacks#finished_b will be called with the current index # and element of +A+ (A[ii]) and the last index and element of # +B+ (A[-1]). Again, if callbacks#finished_b does not # exist on the callback object, then callbacks#discard_a will # be called on each element of +A+ until the end of the sequence is # reached (A[ii] and B[-1]). # # There is a chance that one additional callbacks#discard_a or # callbacks#discard_b will be called after the end of the # sequence is reached, if +a+ has not yet reached the end of +A+ or +b+ # has not yet reached the end of +B+. def traverse_sequences(seq1, seq2, callbacks = Diff::LCS::SequenceCallbacks, &block) #:yields change events: matches = Diff::LCS.__lcs(seq1, seq2) run_finished_a = run_finished_b = false string = seq1.kind_of?(String) a_size = seq1.size b_size = seq2.size ai = bj = 0 (0 .. matches.size).each do |ii| b_line = matches[ii] ax = string ? seq1[ii, 1] : seq1[ii] bx = string ? seq2[bj, 1] : seq2[bj] if b_line.nil? unless ax.nil? event = Diff::LCS::ContextChange.new('-', ii, ax, bj, bx) event = yield event if block_given? callbacks.discard_a(event) end else loop do break unless bj < b_line bx = string ? seq2[bj, 1] : seq2[bj] event = Diff::LCS::ContextChange.new('+', ii, ax, bj, bx) event = yield event if block_given? callbacks.discard_b(event) bj += 1 end bx = string ? seq2[bj, 1] : seq2[bj] event = Diff::LCS::ContextChange.new('=', ii, ax, bj, bx) event = yield event if block_given? callbacks.match(event) bj += 1 end ai = ii end ai += 1 # The last entry (if any) processed was a match. +ai+ and +bj+ point # just past the last matching lines in their sequences. while (ai < a_size) or (bj < b_size) # last A? if ai == a_size and bj < b_size if callbacks.respond_to?(:finished_a) and not run_finished_a ax = string ? seq1[-1, 1] : seq1[-1] bx = string ? seq2[bj, 1] : seq2[bj] event = Diff::LCS::ContextChange.new('>', (a_size - 1), ax, bj, bx) event = yield event if block_given? callbacks.finished_a(event) run_finished_a = true else ax = string ? seq1[ai, 1] : seq1[ai] loop do bx = string ? seq2[bj, 1] : seq2[bj] event = Diff::LCS::ContextChange.new('+', ai, ax, bj, bx) event = yield event if block_given? callbacks.discard_b(event) bj += 1 break unless bj < b_size end end end # last B? if bj == b_size and ai < a_size if callbacks.respond_to?(:finished_b) and not run_finished_b ax = string ? seq1[ai, 1] : seq1[ai] bx = string ? seq2[-1, 1] : seq2[-1] event = Diff::LCS::ContextChange.new('<', ai, ax, (b_size - 1), bx) event = yield event if block_given? callbacks.finished_b(event) run_finished_b = true else bx = string ? seq2[bj, 1] : seq2[bj] loop do ax = string ? seq1[ai, 1] : seq1[ai] event = Diff::LCS::ContextChange.new('-', ai, ax, bj, bx) event = yield event if block_given? callbacks.discard_a(event) ai += 1 break unless bj < b_size end end end if ai < a_size ax = string ? seq1[ai, 1] : seq1[ai] bx = string ? seq2[bj, 1] : seq2[bj] event = Diff::LCS::ContextChange.new('-', ai, ax, bj, bx) event = yield event if block_given? callbacks.discard_a(event) ai += 1 end if bj < b_size ax = string ? seq1[ai, 1] : seq1[ai] bx = string ? seq2[bj, 1] : seq2[bj] event = Diff::LCS::ContextChange.new('+', ai, ax, bj, bx) event = yield event if block_given? callbacks.discard_b(event) bj += 1 end end end # #traverse_balanced is an alternative to #traverse_sequences. It # uses a different algorithm to iterate through the entries in the # computed longest common subsequence. Instead of viewing the changes as # insertions or deletions from one of the sequences, #traverse_balanced # will report changes between the sequences. To represent a # # The arguments to #traverse_balanced are the two sequences to traverse # and a callback object, like this: # # traverse_balanced(seq1, seq2, Diff::LCS::ContextDiffCallbacks.new) # # #sdiff is implemented with #traverse_balanced. # # == Callback Methods # Optional callback methods are emphasized. # # callbacks#match:: Called when +a+ and +b+ are pointing # to common elements in +A+ and +B+. # callbacks#discard_a:: Called when +a+ is pointing to an # element not in +B+. # callbacks#discard_b:: Called when +b+ is pointing to an # element not in +A+. # callbacks#change:: Called when +a+ and +b+ are pointing # to the same relative position, but # A[a] and B[b] are # not the same; a change has # occurred. # # #traverse_balanced might be a bit slower than #traverse_sequences, # noticable only while processing huge amounts of data. # # The +sdiff+ function of this module is implemented as call to # #traverse_balanced. # # == Algorithm # a---+ # v # A = a b c e h j l m n p # B = b c d e f j k l m r s t # ^ # b---+ # # === Matches # If there are two arrows (+a+ and +b+) pointing to elements of # sequences +A+ and +B+, the arrows will initially point to the first # elements of their respective sequences. #traverse_sequences will # advance the arrows through the sequences one element at a time, # calling a method on the user-specified callback object before each # advance. It will advance the arrows in such a way that if there are # elements A[ii] and B[jj] which are both equal and # part of the longest common subsequence, there will be some moment # during the execution of #traverse_sequences when arrow +a+ is pointing # to A[ii] and arrow +b+ is pointing to B[jj]. When # this happens, #traverse_sequences will call callbacks#match # and then it will advance both arrows. # # === Discards # Otherwise, one of the arrows is pointing to an element of its sequence # that is not part of the longest common subsequence. # #traverse_sequences will advance that arrow and will call # callbacks#discard_a or callbacks#discard_b, # depending on which arrow it advanced. # # === Changes # If both +a+ and +b+ point to elements that are not part of the longest # common subsequence, then #traverse_sequences will try to call # callbacks#change and advance both arrows. If # callbacks#change is not implemented, then # callbacks#discard_a and callbacks#discard_b will be # called in turn. # # The methods for callbacks#match, callbacks#discard_a, # callbacks#discard_b, and callbacks#change are # invoked with an event comprising the action ("=", "+", "-", or "!", # respectively), the indicies +ii+ and +jj+, and the elements # A[ii] and B[jj]. Return values are discarded by # #traverse_balanced. # # === Context # Note that +ii+ and +jj+ may not be the same index position, even if # +a+ and +b+ are considered to be pointing to matching or changed # elements. def traverse_balanced(seq1, seq2, callbacks = Diff::LCS::BalancedCallbacks) matches = Diff::LCS.__lcs(seq1, seq2) a_size = seq1.size b_size = seq2.size ai = bj = mb = 0 ma = -1 string = seq1.kind_of?(String) # Process all the lines in the match vector. loop do # Find next match indices +ma+ and +mb+ loop do ma += 1 break unless ma < matches.size and matches[ma].nil? end break if ma >= matches.size # end of matches? mb = matches[ma] # Change(seq2) while (ai < ma) or (bj < mb) ax = string ? seq1[ai, 1] : seq1[ai] bx = string ? seq2[bj, 1] : seq2[bj] case [(ai < ma), (bj < mb)] when [true, true] if callbacks.respond_to?(:change) event = Diff::LCS::ContextChange.new('!', ai, ax, bj, bx) event = yield event if block_given? callbacks.change(event) ai += 1 bj += 1 else event = Diff::LCS::ContextChange.new('-', ai, ax, bj, bx) event = yield event if block_given? callbacks.discard_a(event) ai += 1 ax = string ? seq1[ai, 1] : seq1[ai] event = Diff::LCS::ContextChange.new('+', ai, ax, bj, bx) event = yield event if block_given? callbacks.discard_b(event) bj += 1 end when [true, false] event = Diff::LCS::ContextChange.new('-', ai, ax, bj, bx) event = yield event if block_given? callbacks.discard_a(event) ai += 1 when [false, true] event = Diff::LCS::ContextChange.new('+', ai, ax, bj, bx) event = yield event if block_given? callbacks.discard_b(event) bj += 1 end end # Match ax = string ? seq1[ai, 1] : seq1[ai] bx = string ? seq2[bj, 1] : seq2[bj] event = Diff::LCS::ContextChange.new('=', ai, ax, bj, bx) event = yield event if block_given? callbacks.match(event) ai += 1 bj += 1 end while (ai < a_size) or (bj < b_size) ax = string ? seq1[ai, 1] : seq1[ai] bx = string ? seq2[bj, 1] : seq2[bj] case [(ai < a_size), (bj < b_size)] when [true, true] if callbacks.respond_to?(:change) event = Diff::LCS::ContextChange.new('!', ai, ax, bj, bx) event = yield event if block_given? callbacks.change(event) ai += 1 bj += 1 else event = Diff::LCS::ContextChange.new('-', ai, ax, bj, bx) event = yield event if block_given? callbacks.discard_a(event) ai += 1 ax = string ? seq1[ai, 1] : seq1[ai] event = Diff::LCS::ContextChange.new('+', ai, ax, bj, bx) event = yield event if block_given? callbacks.discard_b(event) bj += 1 end when [true, false] event = Diff::LCS::ContextChange.new('-', ai, ax, bj, bx) event = yield event if block_given? callbacks.discard_a(event) ai += 1 when [false, true] event = Diff::LCS::ContextChange.new('+', ai, ax, bj, bx) event = yield event if block_given? callbacks.discard_b(event) bj += 1 end end end PATCH_MAP = { #:nodoc: :patch => { '+' => '+', '-' => '-', '!' => '!', '=' => '=' }, :unpatch => { '+' => '-', '-' => '+', '!' => '!', '=' => '=' } } # Given a patchset, convert the current version to the new # version. If +direction+ is not specified (must be # :patch or :unpatch), then discovery of the # direction of the patch will be attempted. def patch(src, patchset, direction = nil) string = src.kind_of?(String) # Start with a new empty type of the source's class res = src.class.new # Normalize the patchset. patchset = __normalize_patchset(patchset) direction ||= Diff::LCS.__diff_direction(src, patchset) direction ||= :patch ai = bj = 0 patchset.each do |change| # Both Change and ContextChange support #action action = PATCH_MAP[direction][change.action] case change when Diff::LCS::ContextChange case direction when :patch el = change.new_element op = change.old_position np = change.new_position when :unpatch el = change.old_element op = change.new_position np = change.old_position end case action when '-' # Remove details from the old string while ai < op res << (string ? src[ai, 1] : src[ai]) ai += 1 bj += 1 end ai += 1 when '+' while bj < np res << (string ? src[ai, 1] : src[ai]) ai += 1 bj += 1 end res << el bj += 1 when '=' # This only appears in sdiff output with the SDiff callback. # Therefore, we only need to worry about dealing with a single # element. res << el ai += 1 bj += 1 when '!' while ai < op res << (string ? src[ai, 1] : src[ai]) ai += 1 bj += 1 end bj += 1 ai += 1 res << el end when Diff::LCS::Change case action when '-' while ai < change.position res << (string ? src[ai, 1] : src[ai]) ai += 1 bj += 1 end ai += 1 when '+' while bj < change.position res << (string ? src[ai, 1] : src[ai]) ai += 1 bj += 1 end bj += 1 res << change.element end end end while ai < src.size res << (string ? src[ai, 1] : src[ai]) ai += 1 bj += 1 end res end # Given a set of patchset, convert the current version to the prior # version. Does no auto-discovery. def unpatch!(src, patchset) Diff::LCS.patch(src, patchset, :unpatch) end # Given a set of patchset, convert the current version to the next # version. Does no auto-discovery. def patch!(src, patchset) Diff::LCS.patch(src, patchset, :patch) end # private # Compute the longest common subsequence between the sequenced Enumerables # +a+ and +b+. The result is an array whose contents is such that # # result = Diff::LCS.__lcs(a, b) # result.each_with_index do |e, ii| # assert_equal(a[ii], b[e]) unless e.nil? # end def __lcs(a, b) a_start = b_start = 0 a_finish = a.size - 1 b_finish = b.size - 1 vector = [] # Prune off any common elements at the beginning... while (a_start <= a_finish) and (b_start <= b_finish) and (a[a_start] == b[b_start]) vector[a_start] = b_start a_start += 1 b_start += 1 end # Now the end... while (a_start <= a_finish) and (b_start <= b_finish) and (a[a_finish] == b[b_finish]) vector[a_finish] = b_finish a_finish -= 1 b_finish -= 1 end # Now, compute the equivalence classes of positions of elements. b_matches = Diff::LCS.__position_hash(b, b_start .. b_finish) thresh = [] links = [] (a_start .. a_finish).each do |ii| ai = a.kind_of?(String) ? a[ii, 1] : a[ii] bm = b_matches[ai] kk = nil bm.reverse_each do |jj| if kk and (thresh[kk] > jj) and (thresh[kk - 1] < jj) thresh[kk] = jj else kk = Diff::LCS.__replace_next_larger(thresh, jj, kk) end links[kk] = [ (kk > 0) ? links[kk - 1] : nil, ii, jj ] unless kk.nil? end end unless thresh.empty? link = links[thresh.size - 1] while not link.nil? vector[link[1]] = link[2] link = link[0] end end vector end # Find the place at which +value+ would normally be inserted into the # Enumerable. If that place is already occupied by +value+, do nothing # and return +nil+. If the place does not exist (i.e., it is off the end # of the Enumerable), add it to the end. Otherwise, replace the element # at that point with +value+. It is assumed that the Enumerable's values # are numeric. # # This operation preserves the sort order. def __replace_next_larger(enum, value, last_index = nil) # Off the end? if enum.empty? or (value > enum[-1]) enum << value return enum.size - 1 end # Binary search for the insertion point last_index ||= enum.size first_index = 0 while (first_index <= last_index) ii = (first_index + last_index) >> 1 found = enum[ii] if value == found return nil elsif value > found first_index = ii + 1 else last_index = ii - 1 end end # The insertion point is in first_index; overwrite the next larger # value. enum[first_index] = value return first_index end # If +vector+ maps the matching elements of another collection onto this # Enumerable, compute the inverse +vector+ that maps this Enumerable # onto the collection. (Currently unused.) def __inverse_vector(a, vector) inverse = a.dup (0 ... vector.size).each do |ii| inverse[vector[ii]] = ii unless vector[ii].nil? end inverse end # Returns a hash mapping each element of an Enumerable to the set of # positions it occupies in the Enumerable, optionally restricted to the # elements specified in the range of indexes specified by +interval+. def __position_hash(enum, interval = 0 .. -1) hash = Hash.new { |hh, kk| hh[kk] = [] } interval.each do |ii| kk = enum.kind_of?(String) ? enum[ii, 1] : enum[ii] hash[kk] << ii end hash end # Examine the patchset and the source to see in which direction the # patch should be applied. # # WARNING: By default, this examines the whole patch, so this could take # some time. This also works better with Diff::LCS::ContextChange or # Diff::LCS::Change as its source, as an array will cause the creation # of one of the above. def __diff_direction(src, patchset, limit = nil) count = left = left_miss = right = right_miss = 0 string = src.kind_of?(String) patchset.each do |change| count += 1 case change when Diff::LCS::Change # With a simplistic change, we can't tell the difference between # the left and right on '!' actions, so we ignore those. On '=' # actions, if there's a miss, we miss both left and right. element = string ? src[change.position, 1] : src[change.position] case change.action when '-' if element == change.element left += 1 else left_miss += 1 end when '+' if element == change.element right += 1 else right_miss += 1 end when '=' if element != change.element left_miss += 1 right_miss += 1 end end when Diff::LCS::ContextChange case change.action when '-' # Remove details from the old string element = string ? src[change.old_position, 1] : src[change.old_position] if element == change.old_element left += 1 else left_miss += 1 end when '+' element = string ? src[change.new_position, 1] : src[change.new_position] if element == change.new_element right += 1 else right_miss += 1 end when '=' le = string ? src[change.old_position, 1] : src[change.old_position] re = string ? src[change.new_position, 1] : src[change.new_position] left_miss += 1 if le != change.old_element right_miss += 1 if re != change.new_element when '!' element = string ? src[change.old_position, 1] : src[change.old_position] if element == change.old_element left += 1 else element = string ? src[change.new_position, 1] : src[change.new_position] if element == change.new_element right += 1 else left_miss += 1 right_miss += 1 end end end end break if not limit.nil? and count > limit end no_left = (left == 0) and (left_miss >= 0) no_right = (right == 0) and (right_miss >= 0) case [no_left, no_right] when [false, true] return :patch when [true, false] return :unpatch else raise "The provided patchset does not appear to apply to the provided value as either source or destination value." end end # Normalize the patchset. A patchset is always a sequence of changes, but # how those changes are represented may vary, depending on how they were # generated. In all cases we support, we also support the array # representation of the changes. The formats are: # # [ # patchset <- Diff::LCS.diff(a, b) # [ # one or more hunks # Diff::LCS::Change # one or more changes # ] ] # # [ # patchset, equivalent to the above # [ # one or more hunks # [ action, line, value ] # one or more changes # ] ] # # [ # patchset <- Diff::LCS.diff(a, b, Diff::LCS::ContextDiffCallbacks) # # OR <- Diff::LCS.sdiff(a, b, Diff::LCS::ContextDiffCallbacks) # [ # one or more hunks # Diff::LCS::ContextChange # one or more changes # ] ] # # [ # patchset, equivalent to the above # [ # one or more hunks # [ action, [ old line, old value ], [ new line, new value ] ] # # one or more changes # ] ] # # [ # patchset <- Diff::LCS.sdiff(a, b) # # OR <- Diff::LCS.diff(a, b, Diff::LCS::SDiffCallbacks) # Diff::LCS::ContextChange # one or more changes # ] # # [ # patchset, equivalent to the above # [ action, [ old line, old value ], [ new line, new value ] ] # # one or more changes # ] # # The result of this will be either of the following. # # [ # patchset # Diff::LCS::ContextChange # one or more changes # ] # # [ # patchset # Diff::LCS::Change # one or more changes # ] # # If either of the above is provided, it will be returned as such. # def __normalize_patchset(patchset) patchset.map do |hunk| case hunk when Diff::LCS::ContextChange, Diff::LCS::Change hunk when Array if (not hunk[0].kind_of?(Array)) and hunk[1].kind_of?(Array) and hunk[2].kind_of?(Array) Diff::LCS::ContextChange.from_a(hunk) else hunk.map do |change| case change when Diff::LCS::ContextChange, Diff::LCS::Change change when Array # change[1] will ONLY be an array in a ContextChange#to_a call. # In Change#to_a, it represents the line (singular). if change[1].kind_of?(Array) Diff::LCS::ContextChange.from_a(change) else Diff::LCS::Change.from_a(change) end end end end else raise ArgumentError, "Cannot normalise a hunk of class #{hunk.class}." end end.flatten end end end