instiki/lib/diff/lcs.rb

#! /usr/env/bin ruby
#--
# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
#   adapted from:
#     Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
#     Smalltalk by Mario I. Wolczko <mario@wolczko.com>
#   implements McIlroy-Hunt diff algorithm
#
# This program is free software. It may be redistributed and/or modified
# under the terms of the GPL version 2 (or later), the Perl Artistic
# licence, or the Ruby licence.
# 
# $Id: lcs.rb,v 1.9 2004/10/17 20:31:10 austin Exp $
#++

module Diff
    # = Diff::LCS 1.1.2
    # Computes "intelligent" differences between two sequenced Enumerables.
    # This is an implementation of the McIlroy-Hunt "diff" algorithm for
    # Enumerable objects that include Diffable.
    #
    # Based on Mario I. Wolczko's <mario@wolczko.com> Smalltalk version
    # (1.2, 1993) and Ned Konz's <perl@bike-nomad.com> Perl version
    # (Algorithm::Diff).
    #
    # == Synopsis
    #   require 'diff/lcs'
    #
    #   seq1 = %w(a b c e h j l m n p)
    #   seq2 = %w(b c d e f j k l m r s t)
    #
    #   lcs = Diff::LCS.LCS(seq1, seq2)
    #   diffs = Diff::LCS.diff(seq1, seq2)
    #   sdiff = Diff::LCS.sdiff(seq1, seq2)
    #   seq = Diff::LCS.traverse_sequences(seq1, seq2, callback_obj)
    #   bal = Diff::LCS.traverse_balanced(seq1, seq2, callback_obj)
    #   seq2 == Diff::LCS.patch(seq1, diffs)
    #   seq2 == Diff::LCS.patch!(seq1, diffs)
    #   seq1 == Diff::LCS.unpatch(seq2, diffs)
    #   seq1 == Diff::LCS.unpatch!(seq2, diffs)
    #   seq2 == Diff::LCS.patch(seq1, sdiff)
    #   seq2 == Diff::LCS.patch!(seq1, sdiff)
    #   seq1 == Diff::LCS.unpatch(seq2, sdiff)
    #   seq1 == Diff::LCS.unpatch!(seq2, sdiff)
    #
    # Alternatively, objects can be extended with Diff::LCS:
    #
    #   seq1.extend(Diff::LCS)
    #   lcs = seq1.lcs(seq2)
    #   diffs = seq1.diff(seq2)
    #   sdiff = seq1.sdiff(seq2)
    #   seq = seq1.traverse_sequences(seq2, callback_obj)
    #   bal = seq1.traverse_balanced(seq2, callback_obj)
    #   seq2 == seq1.patch(diffs)
    #   seq2 == seq1.patch!(diffs)
    #   seq1 == seq2.unpatch(diffs)
    #   seq1 == seq2.unpatch!(diffs)
    #   seq2 == seq1.patch(sdiff)
    #   seq2 == seq1.patch!(sdiff)
    #   seq1 == seq2.unpatch(sdiff)
    #   seq1 == seq2.unpatch!(sdiff)
    # 
    # Default extensions are provided for Array and String objects through
    # the use of 'diff/lcs/array' and 'diff/lcs/string'.
    #
    # == Introduction (by Mark-Jason Dominus)
    # 
    # <em>The following text is from the Perl documentation. The only
    # changes have been to make the text appear better in Rdoc</em>.
    #
    # I once read an article written by the authors of +diff+; they said
    # that they hard worked very hard on the algorithm until they found the
    # right one.
    #
    # I think what they ended up using (and I hope someone will correct me,
    # because I am not very confident about this) was the `longest common
    # subsequence' method. In the LCS problem, you have two sequences of
    # items:
    #
    #    a b c d f g h j q z
    #    a b c d e f g i j k r x y z
    #
    # and you want to find the longest sequence of items that is present in
    # both original sequences in the same order. That is, you want to find a
    # new sequence *S* which can be obtained from the first sequence by
    # deleting some items, and from the second sequence by deleting other
    # items. You also want *S* to be as long as possible. In this case *S*
    # is:
    # 
    #    a b c d f g j z
    #
    # From there it's only a small step to get diff-like output:
    #
    #    e   h i   k   q r x y
    #    +   - +   +   - + + +
    #
    # This module solves the LCS problem. It also includes a canned function
    # to generate +diff+-like output.
    #
    # It might seem from the example above that the LCS of two sequences is
    # always pretty obvious, but that's not always the case, especially when
    # the two sequences have many repeated elements. For example, consider
    #
    #    a x b y c z p d q
    #    a b c a x b y c z
    #
    # A naive approach might start by matching up the +a+ and +b+ that
    # appear at the beginning of each sequence, like this:
    # 
    #    a x b y c         z p d q
    #    a   b   c a b y c z
    #
    # This finds the common subsequence +a b c z+. But actually, the LCS is
    # +a x b y c z+:
    #
    #          a x b y c z p d q
    #    a b c a x b y c z
    #
    # == Author
    # This version is by Austin Ziegler <diff-lcs@halostatue.ca>.
    #
    # It is based on the Perl Algorithm::Diff by Ned Konz
    # <perl@bike-nomad.com>, copyright &copy; 2000 - 2002 and the Smalltalk
    # diff version by Mario I. Wolczko <mario@wolczko.com>, copyright &copy;
    # 1993. Documentation includes work by Mark-Jason Dominus.
    #
    # == Licence
    # Copyright &copy; 2004 Austin Ziegler
    # This program is free software; you can redistribute it and/or modify it
    # under the same terms as Ruby, or alternatively under the Perl Artistic
    # licence.
    #
    # == Credits
    # Much of the documentation is taken directly from the Perl
    # Algorithm::Diff implementation and was written originally by Mark-Jason
    # Dominus <mjd-perl-diff@plover.com> and later by Ned Konz. The basic Ruby
    # implementation was re-ported from the Smalltalk implementation, available
    # at ftp://st.cs.uiuc.edu/pub/Smalltalk/MANCHESTER/manchester/4.0/diff.st
    #
    # #sdiff and #traverse_balanced were written for the Perl version by Mike
    # Schilli <m@perlmeister.com>.
    #
    # "The algorithm is described in <em>A Fast Algorithm for Computing Longest
    # Common Subsequences</em>, CACM, vol.20, no.5, pp.350-353, May 1977, with
    # a few minor improvements to improve the speed."
  module LCS
    VERSION = '1.1.2'
  end
end

require 'diff/lcs/callbacks'

module Diff::LCS
    # Returns an Array containing the longest common subsequence(s) between
    # +self+ and +other+. See Diff::LCS#LCS.
    #
    #   lcs = seq1.lcs(seq2)
  def lcs(other, &block) #:yields self[ii] if there are matched subsequences:
    Diff::LCS.LCS(self, other, &block)
  end

    # Returns the difference set between +self+ and +other+. See
    # Diff::LCS#diff.
  def diff(other, callbacks = nil, &block)
    Diff::LCS::diff(self, other, callbacks, &block)
  end

    # Returns the balanced ("side-by-side") difference set between +self+ and
    # +other+. See Diff::LCS#sdiff.
  def sdiff(other, callbacks = nil, &block)
    Diff::LCS::sdiff(self, other, callbacks, &block)
  end

    # Traverses the discovered longest common subsequences between +self+ and
    # +other+. See Diff::LCS#traverse_sequences.
  def traverse_sequences(other, callbacks = nil, &block)
    traverse_sequences(self, other, callbacks || Diff::LCS::YieldingCallbacks,
                       &block)
  end

    # Traverses the discovered longest common subsequences between +self+ and
    # +other+ using the alternate, balanced algorithm. See
    # Diff::LCS#traverse_balanced.
  def traverse_balanced(other, callbacks = nil, &block)
    traverse_balanced(self, other, callbacks || Diff::LCS::YieldingCallbacks,
                      &block)
  end

    # Attempts to patch a copy of +self+ with the provided +patchset+. See
    # Diff::LCS#patch.
  def patch(patchset)
    Diff::LCS::patch(self.dup, patchset)
  end

    # Attempts to unpatch a copy of +self+ with the provided +patchset+.
    # See Diff::LCS#patch.
  def unpatch(patchset)
    Diff::LCS::unpatch(self.dup, patchset)
  end

    # Attempts to patch +self+ with the provided +patchset+. See
    # Diff::LCS#patch!. Does no autodiscovery.
  def patch!(patchset)
    Diff::LCS::patch!(self, patchset)
  end

    # Attempts to unpatch +self+ with the provided +patchset+. See
    # Diff::LCS#unpatch. Does no autodiscovery.
  def unpatch!(patchset)
    Diff::LCS::unpatch!(self, patchset)
  end
end

module Diff::LCS
  class << self
      # Given two sequenced Enumerables, LCS returns an Array containing their
      # longest common subsequences.
      #
      #   lcs = Diff::LCS.LCS(seq1, seq2)
      #
      # This array whose contents is such that:
      #
      #   lcs.each_with_index do |ee, ii|
      #     assert(ee.nil? || (seq1[ii] == seq2[ee]))
      #   end
      #
      # If a block is provided, the matching subsequences will be yielded from
      # +seq1+ in turn and may be modified before they are placed into the
      # returned Array of subsequences.
    def LCS(seq1, seq2, &block) #:yields seq1[ii] for each matched:
      matches = Diff::LCS.__lcs(seq1, seq2)
      ret = []
      matches.each_with_index do |ee, ii|
        unless matches[ii].nil?
          if block_given?
            ret << (yield seq1[ii])
          else
            ret << seq1[ii]
          end
        end
      end
      ret
    end

      # Diff::LCS.diff computes the smallest set of additions and deletions
      # necessary to turn the first sequence into the second, and returns a
      # description of these changes.
      # 
      # See Diff::LCS::DiffCallbacks for the default behaviour. An alternate
      # behaviour may be implemented with Diff::LCS::ContextDiffCallbacks.
      # If a Class argument is provided for +callbacks+, #diff will attempt
      # to initialise it. If the +callbacks+ object (possibly initialised)
      # responds to #finish, it will be called.
    def diff(seq1, seq2, callbacks = nil, &block) # :yields diff changes:
      callbacks ||= Diff::LCS::DiffCallbacks
      if callbacks.kind_of?(Class)
        cb = callbacks.new rescue callbacks
        callbacks = cb
      end
      traverse_sequences(seq1, seq2, callbacks)
      callbacks.finish if callbacks.respond_to?(:finish)

      if block_given?
        res = callbacks.diffs.map do |hunk|
          if hunk.kind_of?(Array)
            hunk = hunk.map { |block| yield block }
          else
            yield hunk
          end
        end
        res
      else
        callbacks.diffs
      end
    end

      # Diff::LCS.sdiff computes all necessary components to show two sequences
      # and their minimized differences side by side, just like the Unix
      # utility <em>sdiff</em> does:
      #
      #     old        <     -
      #     same             same
      #     before     |     after
      #     -          >     new
      #
      # See Diff::LCS::SDiffCallbacks for the default behaviour. An alternate
      # behaviour may be implemented with Diff::LCS::ContextDiffCallbacks. If
      # a Class argument is provided for +callbacks+, #diff will attempt to
      # initialise it. If the +callbacks+ object (possibly initialised)
      # responds to #finish, it will be called.
    def sdiff(seq1, seq2, callbacks = nil, &block) #:yields diff changes:
      callbacks ||= Diff::LCS::SDiffCallbacks
      if callbacks.kind_of?(Class)
        cb = callbacks.new rescue callbacks
        callbacks = cb
      end
      traverse_balanced(seq1, seq2, callbacks)
      callbacks.finish if callbacks.respond_to?(:finish)

      if block_given?
        res = callbacks.diffs.map do |hunk|
          if hunk.kind_of?(Array)
            hunk = hunk.map { |block| yield block }
          else
            yield hunk
          end
        end
        res
      else
        callbacks.diffs
      end
    end

      # Diff::LCS.traverse_sequences is the most general facility provided by this
      # module; +diff+ and +LCS+ are implemented as calls to it.
      #
      # The arguments to #traverse_sequences are the two sequences to
      # traverse, and a callback object, like this:
      #
      #   traverse_sequences(seq1, seq2, Diff::LCS::ContextDiffCallbacks.new)
      #
      # #diff is implemented with #traverse_sequences.
      #
      # == Callback Methods
      # Optional callback methods are <em>emphasized</em>.
      #
      # callbacks#match::               Called when +a+ and +b+ are pointing
      #                                 to common elements in +A+ and +B+.
      # callbacks#discard_a::           Called when +a+ is pointing to an
      #                                 element not in +B+.
      # callbacks#discard_b::           Called when +b+ is pointing to an
      #                                 element not in +A+.
      # <em>callbacks#finished_a</em>:: Called when +a+ has reached the end of
      #                                 sequence +A+.
      # <em>callbacks#finished_b</em>:: Called when +b+ has reached the end of
      #                                 sequence +B+.
      #
      # == Algorithm
      #       a---+
      #           v
      #       A = a b c e h j l m n p
      #       B = b c d e f j k l m r s t
      #           ^
      #       b---+
      #
      # If there are two arrows (+a+ and +b+) pointing to elements of
      # sequences +A+ and +B+, the arrows will initially point to the first
      # elements of their respective sequences. #traverse_sequences will
      # advance the arrows through the sequences one element at a time,
      # calling a method on the user-specified callback object before each
      # advance. It will advance the arrows in such a way that if there are
      # elements <tt>A[ii]</tt> and <tt>B[jj]</tt> which are both equal and
      # part of the longest common subsequence, there will be some moment
      # during the execution of #traverse_sequences when arrow +a+ is pointing
      # to <tt>A[ii]</tt> and arrow +b+ is pointing to <tt>B[jj]</tt>. When
      # this happens, #traverse_sequences will call <tt>callbacks#match</tt>
      # and then it will advance both arrows.
      #
      # Otherwise, one of the arrows is pointing to an element of its sequence
      # that is not part of the longest common subsequence.
      # #traverse_sequences will advance that arrow and will call
      # <tt>callbacks#discard_a</tt> or <tt>callbacks#discard_b</tt>, depending
      # on which arrow it advanced. If both arrows point to elements that are
      # not part of the longest common subsequence, then #traverse_sequences
      # will advance one of them and call the appropriate callback, but it is
      # not specified which it will call.
      #
      # The methods for <tt>callbacks#match</tt>, <tt>callbacks#discard_a</tt>,
      # and <tt>callbacks#discard_b</tt> are invoked with an event comprising
      # the action ("=", "+", or "-", respectively), the indicies +ii+ and
      # +jj+, and the elements <tt>A[ii]</tt> and <tt>B[jj]</tt>. Return
      # values are discarded by #traverse_sequences.
      #
      # === End of Sequences
      # If arrow +a+ reaches the end of its sequence before arrow +b+ does,
      # #traverse_sequence try to call <tt>callbacks#finished_a</tt> with the
      # last index and element of +A+ (<tt>A[-1]</tt>) and the current index
      # and element of +B+ (<tt>B[jj]</tt>). If <tt>callbacks#finished_a</tt>
      # does not exist, then <tt>callbacks#discard_b</tt> will be called on
      # each element of +B+ until the end of the sequence is reached (the call
      # will be done with <tt>A[-1]</tt> and <tt>B[jj]</tt> for each element).
      #
      # If +b+ reaches the end of +B+ before +a+ reaches the end of +A+,
      # <tt>callbacks#finished_b</tt> will be called with the current index
      # and element of +A+ (<tt>A[ii]</tt>) and the last index and element of
      # +B+ (<tt>A[-1]</tt>). Again, if <tt>callbacks#finished_b</tt> does not
      # exist on the callback object, then <tt>callbacks#discard_a</tt> will
      # be called on each element of +A+ until the end of the sequence is
      # reached (<tt>A[ii]</tt> and <tt>B[-1]</tt>).
      #
      # There is a chance that one additional <tt>callbacks#discard_a</tt> or
      # <tt>callbacks#discard_b</tt> will be called after the end of the
      # sequence is reached, if +a+ has not yet reached the end of +A+ or +b+
      # has not yet reached the end of +B+.
    def traverse_sequences(seq1, seq2, callbacks = Diff::LCS::SequenceCallbacks, &block) #:yields change events:
      matches = Diff::LCS.__lcs(seq1, seq2)

      run_finished_a = run_finished_b = false
      string = seq1.kind_of?(String)

      a_size = seq1.size
      b_size = seq2.size
      ai = bj = 0

      (0 .. matches.size).each do |ii|
        b_line = matches[ii]

        ax = string ? seq1[ii, 1] : seq1[ii]
        bx = string ? seq2[bj, 1] : seq2[bj]

        if b_line.nil?
          unless ax.nil?
            event = Diff::LCS::ContextChange.new('-', ii, ax, bj, bx)
            event = yield event if block_given?
            callbacks.discard_a(event)
          end
        else
          loop do
            break unless bj < b_line
            bx = string ? seq2[bj, 1] : seq2[bj]
            event = Diff::LCS::ContextChange.new('+', ii, ax, bj, bx)
            event = yield event if block_given?
            callbacks.discard_b(event)
            bj += 1
          end
          bx = string ? seq2[bj, 1] : seq2[bj]
          event = Diff::LCS::ContextChange.new('=', ii, ax, bj, bx)
          event = yield event if block_given?
          callbacks.match(event)
          bj += 1
        end
        ai = ii
      end
      ai += 1

        # The last entry (if any) processed was a match. +ai+ and +bj+ point
        # just past the last matching lines in their sequences.
      while (ai < a_size) or (bj < b_size)
          # last A?
        if ai == a_size and bj < b_size
          if callbacks.respond_to?(:finished_a) and not run_finished_a
            ax = string ? seq1[-1, 1] : seq1[-1]
            bx = string ? seq2[bj, 1] : seq2[bj]
            event = Diff::LCS::ContextChange.new('>', (a_size - 1), ax, bj, bx)
            event = yield event if block_given?
            callbacks.finished_a(event)
            run_finished_a = true
          else
            ax = string ? seq1[ai, 1] : seq1[ai]
            loop do
              bx = string ? seq2[bj, 1] : seq2[bj]
              event = Diff::LCS::ContextChange.new('+', ai, ax, bj, bx)
              event = yield event if block_given?
              callbacks.discard_b(event)
              bj += 1
              break unless bj < b_size
            end
          end
        end

          # last B?
        if bj == b_size and ai < a_size
          if callbacks.respond_to?(:finished_b) and not run_finished_b
            ax = string ? seq1[ai, 1] : seq1[ai]
            bx = string ? seq2[-1, 1] : seq2[-1]
            event = Diff::LCS::ContextChange.new('<', ai, ax, (b_size - 1), bx)
            event = yield event if block_given?
            callbacks.finished_b(event)
            run_finished_b = true
          else
            bx = string ? seq2[bj, 1] : seq2[bj]
            loop do
              ax = string ? seq1[ai, 1] : seq1[ai]
              event = Diff::LCS::ContextChange.new('-', ai, ax, bj, bx)
              event = yield event if block_given?
              callbacks.discard_a(event)
              ai += 1
              break unless bj < b_size
            end
          end
        end

        if ai < a_size
          ax = string ? seq1[ai, 1] : seq1[ai]
          bx = string ? seq2[bj, 1] : seq2[bj]
          event = Diff::LCS::ContextChange.new('-', ai, ax, bj, bx)
          event = yield event if block_given?
          callbacks.discard_a(event)
          ai += 1
        end

        if bj < b_size
          ax = string ? seq1[ai, 1] : seq1[ai]
          bx = string ? seq2[bj, 1] : seq2[bj]
          event = Diff::LCS::ContextChange.new('+', ai, ax, bj, bx)
          event = yield event if block_given?
          callbacks.discard_b(event)
          bj += 1
        end
      end
    end

      # #traverse_balanced is an alternative to #traverse_sequences. It
      # uses a different algorithm to iterate through the entries in the
      # computed longest common subsequence. Instead of viewing the changes as
      # insertions or deletions from one of the sequences, #traverse_balanced
      # will report <em>changes</em> between the sequences. To represent a
      #
      # The arguments to #traverse_balanced are the two sequences to traverse
      # and a callback object, like this:
      #
      #   traverse_balanced(seq1, seq2, Diff::LCS::ContextDiffCallbacks.new)
      #
      # #sdiff is implemented with #traverse_balanced.
      #
      # == Callback Methods
      # Optional callback methods are <em>emphasized</em>.
      #
      # callbacks#match::               Called when +a+ and +b+ are pointing
      #                                 to common elements in +A+ and +B+.
      # callbacks#discard_a::           Called when +a+ is pointing to an
      #                                 element not in +B+.
      # callbacks#discard_b::           Called when +b+ is pointing to an
      #                                 element not in +A+.
      # <em>callbacks#change</em>::     Called when +a+ and +b+ are pointing
      #                                 to the same relative position, but
      #                                 <tt>A[a]</tt> and <tt>B[b]</tt> are
      #                                 not the same; a <em>change</em> has
      #                                 occurred.
      #
      # #traverse_balanced might be a bit slower than #traverse_sequences,
      # noticable only while processing huge amounts of data.
      #
      # The +sdiff+ function of this module is implemented as call to
      # #traverse_balanced.
      #
      # == Algorithm
      #       a---+
      #           v
      #       A = a b c e h j l m n p
      #       B = b c d e f j k l m r s t
      #           ^
      #       b---+
      #
      # === Matches
      # If there are two arrows (+a+ and +b+) pointing to elements of
      # sequences +A+ and +B+, the arrows will initially point to the first
      # elements of their respective sequences. #traverse_sequences will
      # advance the arrows through the sequences one element at a time,
      # calling a method on the user-specified callback object before each
      # advance. It will advance the arrows in such a way that if there are
      # elements <tt>A[ii]</tt> and <tt>B[jj]</tt> which are both equal and
      # part of the longest common subsequence, there will be some moment
      # during the execution of #traverse_sequences when arrow +a+ is pointing
      # to <tt>A[ii]</tt> and arrow +b+ is pointing to <tt>B[jj]</tt>. When
      # this happens, #traverse_sequences will call <tt>callbacks#match</tt>
      # and then it will advance both arrows.
      #
      # === Discards
      # Otherwise, one of the arrows is pointing to an element of its sequence
      # that is not part of the longest common subsequence.
      # #traverse_sequences will advance that arrow and will call
      # <tt>callbacks#discard_a</tt> or <tt>callbacks#discard_b</tt>,
      # depending on which arrow it advanced.
      #
      # === Changes
      # If both +a+ and +b+ point to elements that are not part of the longest
      # common subsequence, then #traverse_sequences will try to call
      # <tt>callbacks#change</tt> and advance both arrows. If
      # <tt>callbacks#change</tt> is not implemented, then
      # <tt>callbacks#discard_a</tt> and <tt>callbacks#discard_b</tt> will be
      # called in turn.
      #
      # The methods for <tt>callbacks#match</tt>, <tt>callbacks#discard_a</tt>,
      # <tt>callbacks#discard_b</tt>, and <tt>callbacks#change</tt> are
      # invoked with an event comprising the action ("=", "+", "-", or "!",
      # respectively), the indicies +ii+ and +jj+, and the elements
      # <tt>A[ii]</tt> and <tt>B[jj]</tt>. Return values are discarded by
      # #traverse_balanced.
      #
      # === Context
      # Note that +ii+ and +jj+ may not be the same index position, even if
      # +a+ and +b+ are considered to be pointing to matching or changed
      # elements.
    def traverse_balanced(seq1, seq2, callbacks = Diff::LCS::BalancedCallbacks)
      matches = Diff::LCS.__lcs(seq1, seq2)
      a_size = seq1.size
      b_size = seq2.size
      ai = bj = mb = 0
      ma = -1
      string = seq1.kind_of?(String)

        # Process all the lines in the match vector.
      loop do
          # Find next match indices +ma+ and +mb+
        loop do
          ma += 1
          break unless ma < matches.size and matches[ma].nil?
        end

        break if ma >= matches.size # end of matches?
        mb = matches[ma]

          # Change(seq2)
        while (ai < ma) or (bj < mb)
          ax = string ? seq1[ai, 1] : seq1[ai]
          bx = string ? seq2[bj, 1] : seq2[bj]

          case [(ai < ma), (bj < mb)]
          when [true, true]
            if callbacks.respond_to?(:change)
              event = Diff::LCS::ContextChange.new('!', ai, ax, bj, bx)
              event = yield event if block_given?
              callbacks.change(event)
              ai += 1
              bj += 1
            else
              event = Diff::LCS::ContextChange.new('-', ai, ax, bj, bx)
              event = yield event if block_given?
              callbacks.discard_a(event)
              ai += 1
              ax = string ? seq1[ai, 1] : seq1[ai]
              event = Diff::LCS::ContextChange.new('+', ai, ax, bj, bx)
              event = yield event if block_given?
              callbacks.discard_b(event)
              bj += 1
            end
          when [true, false]
            event = Diff::LCS::ContextChange.new('-', ai, ax, bj, bx)
            event = yield event if block_given?
            callbacks.discard_a(event)
            ai += 1
          when [false, true]
            event = Diff::LCS::ContextChange.new('+', ai, ax, bj, bx)
            event = yield event if block_given?
            callbacks.discard_b(event)
            bj += 1
          end
        end

          # Match
        ax = string ? seq1[ai, 1] : seq1[ai]
        bx = string ? seq2[bj, 1] : seq2[bj]
        event = Diff::LCS::ContextChange.new('=', ai, ax, bj, bx)
        event = yield event if block_given?
        callbacks.match(event)
        ai += 1
        bj += 1
      end

      while (ai < a_size) or (bj < b_size)
        ax = string ? seq1[ai, 1] : seq1[ai]
        bx = string ? seq2[bj, 1] : seq2[bj]

        case [(ai < a_size), (bj < b_size)]
        when [true, true]
          if callbacks.respond_to?(:change)
            event = Diff::LCS::ContextChange.new('!', ai, ax, bj, bx)
            event = yield event if block_given?
            callbacks.change(event)
            ai += 1
            bj += 1
          else
            event = Diff::LCS::ContextChange.new('-', ai, ax, bj, bx)
            event = yield event if block_given?
            callbacks.discard_a(event)
            ai += 1
            ax = string ? seq1[ai, 1] : seq1[ai]
            event = Diff::LCS::ContextChange.new('+', ai, ax, bj, bx)
            event = yield event if block_given?
            callbacks.discard_b(event)
            bj += 1
          end
        when [true, false]
          event = Diff::LCS::ContextChange.new('-', ai, ax, bj, bx)
          event = yield event if block_given?
          callbacks.discard_a(event)
          ai += 1
        when [false, true]
          event = Diff::LCS::ContextChange.new('+', ai, ax, bj, bx)
          event = yield event if block_given?
          callbacks.discard_b(event)
          bj += 1
        end
      end
    end

    PATCH_MAP = { #:nodoc:
      :patch => { '+' => '+', '-' => '-', '!' => '!', '=' => '=' },
      :unpatch => { '+' => '-', '-' => '+', '!' => '!', '=' => '=' }
    }

      # Given a patchset, convert the current version to the new
      # version. If +direction+ is not specified (must be
      # <tt>:patch</tt> or <tt>:unpatch</tt>), then discovery of the
      # direction of the patch will be attempted.
    def patch(src, patchset, direction = nil)
      string = src.kind_of?(String)
        # Start with a new empty type of the source's class
      res = src.class.new

        # Normalize the patchset.
      patchset = __normalize_patchset(patchset)

      direction ||= Diff::LCS.__diff_direction(src, patchset)
      direction ||= :patch

      ai = bj = 0

      patchset.each do |change|
          # Both Change and ContextChange support #action
        action = PATCH_MAP[direction][change.action]

        case change
        when Diff::LCS::ContextChange
          case direction
          when :patch
            el = change.new_element
            op = change.old_position
            np = change.new_position
          when :unpatch
            el = change.old_element
            op = change.new_position
            np = change.old_position
          end

          case action
          when '-' # Remove details from the old string
            while ai < op
              res << (string ? src[ai, 1] : src[ai])
              ai += 1
              bj += 1
            end
            ai += 1
          when '+'
            while bj < np
              res << (string ? src[ai, 1] : src[ai])
              ai += 1
              bj += 1
            end

            res << el
            bj += 1
          when '='
              # This only appears in sdiff output with the SDiff callback.
              # Therefore, we only need to worry about dealing with a single
              # element.
            res << el

            ai += 1
            bj += 1
          when '!'
            while ai < op
              res << (string ? src[ai, 1] : src[ai])
              ai += 1
              bj += 1
            end

            bj += 1
            ai += 1

            res << el
          end
        when Diff::LCS::Change
          case action
          when '-'
            while ai < change.position
              res << (string ? src[ai, 1] : src[ai])
              ai += 1
              bj += 1
            end
            ai += 1
          when '+'
            while bj < change.position
              res << (string ? src[ai, 1] : src[ai])
              ai += 1
              bj += 1
            end

            bj += 1

            res << change.element
          end
        end
      end

      while ai < src.size
        res << (string ? src[ai, 1] : src[ai])
        ai += 1
        bj += 1
      end

      res
    end

      # Given a set of patchset, convert the current version to the prior
      # version. Does no auto-discovery.
    def unpatch!(src, patchset)
      Diff::LCS.patch(src, patchset, :unpatch)
    end

      # Given a set of patchset, convert the current version to the next
      # version. Does no auto-discovery.
    def patch!(src, patchset)
      Diff::LCS.patch(src, patchset, :patch)
    end

# private
      # Compute the longest common subsequence between the sequenced Enumerables
      # +a+ and +b+. The result is an array whose contents is such that
      #
      #     result = Diff::LCS.__lcs(a, b)
      #     result.each_with_index do |e, ii|
      #       assert_equal(a[ii], b[e]) unless e.nil?
      #     end
    def __lcs(a, b)
      a_start = b_start = 0
      a_finish = a.size - 1
      b_finish = b.size - 1
      vector = []

        # Prune off any common elements at the beginning...
      while (a_start <= a_finish) and
            (b_start <= b_finish) and
            (a[a_start] == b[b_start])
        vector[a_start] = b_start
        a_start += 1
        b_start += 1
      end

        # Now the end...
      while (a_start <= a_finish) and
            (b_start <= b_finish) and
            (a[a_finish] == b[b_finish])
        vector[a_finish] = b_finish
        a_finish -= 1
        b_finish -= 1
      end

        # Now, compute the equivalence classes of positions of elements.
      b_matches = Diff::LCS.__position_hash(b, b_start .. b_finish)

      thresh = []
      links = []

      (a_start .. a_finish).each do |ii|
        ai = a.kind_of?(String) ? a[ii, 1] : a[ii]
        bm = b_matches[ai]
        kk = nil
        bm.reverse_each do |jj|
          if kk and (thresh[kk] > jj) and (thresh[kk - 1] < jj)
            thresh[kk] = jj
          else
            kk = Diff::LCS.__replace_next_larger(thresh, jj, kk)
          end
          links[kk] = [ (kk > 0) ? links[kk - 1] : nil, ii, jj ] unless kk.nil?
        end
      end

      unless thresh.empty?
        link = links[thresh.size - 1]
        while not link.nil?
          vector[link[1]] = link[2]
          link = link[0]
        end
      end

      vector
    end

      # Find the place at which +value+ would normally be inserted into the
      # Enumerable. If that place is already occupied by +value+, do nothing
      # and return +nil+. If the place does not exist (i.e., it is off the end
      # of the Enumerable), add it to the end. Otherwise, replace the element
      # at that point with +value+. It is assumed that the Enumerable's values
      # are numeric.
      #
      # This operation preserves the sort order.
    def __replace_next_larger(enum, value, last_index = nil)
        # Off the end?
      if enum.empty? or (value > enum[-1])
        enum << value
        return enum.size - 1
      end

        # Binary search for the insertion point
      last_index ||= enum.size
      first_index = 0
      while (first_index <= last_index)
        ii = (first_index + last_index) >> 1

        found = enum[ii]

        if value == found
          return nil
        elsif value > found
          first_index = ii + 1
        else
          last_index = ii - 1
        end
      end

        # The insertion point is in first_index; overwrite the next larger
        # value.
      enum[first_index] = value
      return first_index
    end

      # If +vector+ maps the matching elements of another collection onto this
      # Enumerable, compute the inverse +vector+ that maps this Enumerable
      # onto the collection. (Currently unused.)
    def __inverse_vector(a, vector)
      inverse = a.dup
      (0 ... vector.size).each do |ii|
        inverse[vector[ii]] = ii unless vector[ii].nil?
      end
      inverse
    end

      # Returns a hash mapping each element of an Enumerable to the set of
      # positions it occupies in the Enumerable, optionally restricted to the
      # elements specified in the range of indexes specified by +interval+.
    def __position_hash(enum, interval = 0 .. -1)
      hash = Hash.new { |hh, kk| hh[kk] = [] }
      interval.each do |ii|
        kk = enum.kind_of?(String) ? enum[ii, 1] : enum[ii]
        hash[kk] << ii
      end
      hash
    end

      # Examine the patchset and the source to see in which direction the
      # patch should be applied.
      #
      # WARNING: By default, this examines the whole patch, so this could take
      # some time. This also works better with Diff::LCS::ContextChange or
      # Diff::LCS::Change as its source, as an array will cause the creation
      # of one of the above.
    def __diff_direction(src, patchset, limit = nil)
      count = left = left_miss = right = right_miss = 0
      string = src.kind_of?(String)

      patchset.each do |change|
        count += 1

        case change
        when Diff::LCS::Change
            # With a simplistic change, we can't tell the difference between
            # the left and right on '!' actions, so we ignore those. On '='
            # actions, if there's a miss, we miss both left and right.
          element = string ? src[change.position, 1] : src[change.position]

          case change.action
          when '-'
            if element == change.element
              left += 1
            else
              left_miss += 1
            end
          when '+'
            if element == change.element
              right += 1
            else
              right_miss += 1
            end
          when '='
            if element != change.element
              left_miss += 1
              right_miss += 1
            end
          end
        when Diff::LCS::ContextChange
          case change.action
          when '-' # Remove details from the old string
            element = string ? src[change.old_position, 1] : src[change.old_position]
            if element == change.old_element
              left += 1
            else
              left_miss += 1
            end
          when '+'
            element = string ? src[change.new_position, 1] : src[change.new_position]
            if element == change.new_element
              right += 1
            else
              right_miss += 1
            end
          when '='
            le = string ? src[change.old_position, 1] : src[change.old_position]
            re = string ? src[change.new_position, 1] : src[change.new_position]

            left_miss += 1 if le != change.old_element
            right_miss += 1 if re != change.new_element
          when '!'
            element = string ? src[change.old_position, 1] : src[change.old_position]
            if element == change.old_element
              left += 1
            else
              element = string ? src[change.new_position, 1] : src[change.new_position]
              if element == change.new_element
                right += 1
              else
                left_miss += 1
                right_miss += 1
              end
            end
          end
        end

        break if not limit.nil? and count > limit
      end

      no_left = (left == 0) and (left_miss >= 0)
      no_right = (right == 0) and (right_miss >= 0)

      case [no_left, no_right]
      when [false, true]
        return :patch
      when [true, false]
        return :unpatch
      else
        raise "The provided patchset does not appear to apply to the provided value as either source or destination value."
      end
    end

      # Normalize the patchset. A patchset is always a sequence of changes, but
      # how those changes are represented may vary, depending on how they were
      # generated. In all cases we support, we also support the array
      # representation of the changes. The formats are:
      #
      #   [ # patchset <- Diff::LCS.diff(a, b)
      #     [ # one or more hunks
      #       Diff::LCS::Change # one or more changes
      #     ] ]
      #
      #   [ # patchset, equivalent to the above
      #     [ # one or more hunks
      #       [ action, line, value ] # one or more changes
      #     ] ]
      #
      #   [ # patchset <- Diff::LCS.diff(a, b, Diff::LCS::ContextDiffCallbacks)
      #     #       OR <- Diff::LCS.sdiff(a, b, Diff::LCS::ContextDiffCallbacks)
      #     [ # one or more hunks
      #       Diff::LCS::ContextChange # one or more changes
      #     ] ]
      #
      #   [ # patchset, equivalent to the above
      #     [ # one or more hunks
      #       [ action, [ old line, old value ], [ new line, new value ] ]
      #         # one or more changes
      #     ] ]
      #
      #   [ # patchset <- Diff::LCS.sdiff(a, b)
      #     #       OR <- Diff::LCS.diff(a, b, Diff::LCS::SDiffCallbacks)
      #     Diff::LCS::ContextChange # one or more changes
      #   ]
      #
      #   [ # patchset, equivalent to the above
      #     [ action, [ old line, old value ], [ new line, new value ] ]
      #       # one or more changes
      #   ]
      #
      # The result of this will be either of the following.
      #
      #   [ # patchset
      #     Diff::LCS::ContextChange # one or more changes
      #   ]
      #
      #   [ # patchset
      #     Diff::LCS::Change # one or more changes
      #   ]
      #
      # If either of the above is provided, it will be returned as such.
      #
    def __normalize_patchset(patchset)
      patchset.map do |hunk|
        case hunk
        when Diff::LCS::ContextChange, Diff::LCS::Change
          hunk
        when Array
          if (not hunk[0].kind_of?(Array)) and hunk[1].kind_of?(Array) and hunk[2].kind_of?(Array)
            Diff::LCS::ContextChange.from_a(hunk)
          else
            hunk.map do |change|
              case change
              when Diff::LCS::ContextChange, Diff::LCS::Change
                change
              when Array
                  # change[1] will ONLY be an array in a ContextChange#to_a call.
                  # In Change#to_a, it represents the line (singular).
                if change[1].kind_of?(Array)
                  Diff::LCS::ContextChange.from_a(change)
                else
                  Diff::LCS::Change.from_a(change)
                end
              end
            end
          end
        else
          raise ArgumentError, "Cannot normalise a hunk of class #{hunk.class}."
        end
      end.flatten
    end
  end
end