Replaced diff.rb with xhtmldiff.rb, which (unlike its predecessor) produces well-formed redline documents.

2007-02-03 22:52:48 -06:00 · 2007-02-03 22:52:48 -06:00 · 8c52f28864
commit 8c52f28864
parent 86e9c70a26
12 changed files with 2420 additions and 391 deletions
--- a/lib/diff.rb
+++ b/lib/diff.rb
@ -1,316 +0,0 @@
 module HTMLDiff
  Match = Struct.new(:start_in_old, :start_in_new, :size)
  class Match
    def end_in_old
      self.start_in_old + self.size
    end
    def end_in_new
      self.start_in_new + self.size
    end
  end
  Operation = Struct.new(:action, :start_in_old, :end_in_old, :start_in_new, :end_in_new)
  class DiffBuilder
    def initialize(old_version, new_version)
      @old_version, @new_version = old_version, new_version
      @content = []
    end
    def build
      split_inputs_to_words
      index_new_words
      operations.each { |op| perform_operation(op) }
      return @content.join
    end
    def split_inputs_to_words
      @old_words = convert_html_to_list_of_words(explode(@old_version))
      @new_words = convert_html_to_list_of_words(explode(@new_version))
    end
    def index_new_words
      @word_indices = Hash.new { |h, word| h[word] = [] }
      @new_words.each_with_index { |word, i| @word_indices[word] << i }
    end
    def operations
      position_in_old = position_in_new = 0
      operations = []
      matches = matching_blocks
      # an empty match at the end forces the loop below to handle the unmatched tails
      # I'm sure it can be done more gracefully, but not at 23:52
      matches << Match.new(@old_words.length, @new_words.length, 0)
      matches.each_with_index do |match, i|
        match_starts_at_current_position_in_old = (position_in_old == match.start_in_old)
        match_starts_at_current_position_in_new = (position_in_new == match.start_in_new)
        action_upto_match_positions = 
          case [match_starts_at_current_position_in_old, match_starts_at_current_position_in_new]
          when [false, false]
            :replace
          when [true, false]
            :insert
          when [false, true]
            :delete
          else
            # this happens if the first few words are same in both versions
            :none
          end
        if action_upto_match_positions != :none
          operation_upto_match_positions = 
              Operation.new(action_upto_match_positions, 
                  position_in_old, match.start_in_old, 
                  position_in_new, match.start_in_new)
          operations << operation_upto_match_positions
        end
        if match.size != 0
          match_operation = Operation.new(:equal, 
              match.start_in_old, match.end_in_old, 
              match.start_in_new, match.end_in_new)
          operations << match_operation
        end
        position_in_old = match.end_in_old
        position_in_new = match.end_in_new
      end
      operations
    end
    def matching_blocks
      matching_blocks = []
      recursively_find_matching_blocks(0, @old_words.size, 0, @new_words.size, matching_blocks)
      matching_blocks
    end
    def recursively_find_matching_blocks(start_in_old, end_in_old, start_in_new, end_in_new, matching_blocks)
      match = find_match(start_in_old, end_in_old, start_in_new, end_in_new)
      if match
        if start_in_old < match.start_in_old and start_in_new < match.start_in_new
          recursively_find_matching_blocks(
              start_in_old, match.start_in_old, start_in_new, match.start_in_new, matching_blocks) 
        end
        matching_blocks << match
        if match.end_in_old < end_in_old and match.end_in_new < end_in_new
          recursively_find_matching_blocks(
              match.end_in_old, end_in_old, match.end_in_new, end_in_new, matching_blocks)
        end
      end
    end
    def find_match(start_in_old, end_in_old, start_in_new, end_in_new)
      best_match_in_old = start_in_old
      best_match_in_new = start_in_new
      best_match_size = 0
      match_length_at = Hash.new { |h, index| h[index] = 0 }
      start_in_old.upto(end_in_old - 1) do |index_in_old|
        new_match_length_at = Hash.new { |h, index| h[index] = 0 }
        @word_indices[@old_words[index_in_old]].each do |index_in_new|
          next  if index_in_new < start_in_new
          break if index_in_new >= end_in_new
          new_match_length = match_length_at[index_in_new - 1] + 1
          new_match_length_at[index_in_new] = new_match_length
          if new_match_length > best_match_size
            best_match_in_old = index_in_old - new_match_length + 1
            best_match_in_new = index_in_new - new_match_length + 1
            best_match_size = new_match_length
          end
        end
        match_length_at = new_match_length_at
      end
 #      best_match_in_old, best_match_in_new, best_match_size = add_matching_words_left(
 #          best_match_in_old, best_match_in_new, best_match_size, start_in_old, start_in_new)
 #      best_match_in_old, best_match_in_new, match_size = add_matching_words_right(
 #          best_match_in_old, best_match_in_new, best_match_size, end_in_old, end_in_new)
      return (best_match_size != 0 ? Match.new(best_match_in_old, best_match_in_new, best_match_size) : nil)
    end
    def add_matching_words_left(match_in_old, match_in_new, match_size, start_in_old, start_in_new)
      while match_in_old > start_in_old and 
            match_in_new > start_in_new and 
            @old_words[match_in_old - 1] == @new_words[match_in_new - 1]
        match_in_old -= 1
        match_in_new -= 1
        match_size += 1
      end
      [match_in_old, match_in_new, match_size]
    end
    def add_matching_words_right(match_in_old, match_in_new, match_size, end_in_old, end_in_new)
      while match_in_old + match_size < end_in_old and 
            match_in_new + match_size < end_in_new and
            @old_words[match_in_old + match_size] == @new_words[match_in_new + match_size]
        match_size += 1
      end
      [match_in_old, match_in_new, match_size]
    end
    VALID_METHODS = [:replace, :insert, :delete, :equal]
    def perform_operation(operation)
      @operation = operation
      self.send operation.action, operation
    end
    def replace(operation)
      delete(operation, 'diffmod')
      insert(operation, 'diffmod')
    end
    def insert(operation, tagclass = 'diffins')
      insert_tag('ins', tagclass, @new_words[operation.start_in_new...operation.end_in_new])
    end
    def delete(operation, tagclass = 'diffdel')
       insert_tag('del', tagclass, @old_words[operation.start_in_old...operation.end_in_old])
    end
    def equal(operation)
      # no tags to insert, simply copy the matching words from one of the versions
      @content += @new_words[operation.start_in_new...operation.end_in_new]
    end
    def opening_tag?(item)
      item =~ %r!^\s*<[^>]+>\s*$!
    end
    def closing_tag?(item)
      item =~ %r!^\s*</[^>]+>\s*$!
    end
    def tag?(item)
      opening_tag?(item) or closing_tag?(item)
    end
    def extract_consecutive_words(words, &condition)
      index_of_first_tag = nil
      words.each_with_index do |word, i| 
        if !condition.call(word)
          index_of_first_tag = i
          break
        end
      end
      if index_of_first_tag
        return words.slice!(0...index_of_first_tag)
      else
        return words.slice!(0..words.length)
      end
    end
    # This method encloses words within a specified tag (ins or del), and adds this into @content, 
    # with a twist: if there are words contain tags, it actually creates multiple ins or del, 
    # so that they don't include any ins or del. This handles cases like
    # old: '<p>a</p>'
    # new: '<p>ab</p><p>c</b>'
    # diff result: '<p>a<ins>b</ins></p><p><ins>c</ins></p>'
    # this still doesn't guarantee valid HTML (hint: think about diffing a text containing ins or
    # del tags), but handles correctly more cases than the earlier version.
    # 
    # P.S.: Spare a thought for people who write HTML browsers. They live in this ... every day.
    def insert_tag(tagname, cssclass, words)
      loop do
        break if words.empty?
        non_tags = extract_consecutive_words(words) { |word| not tag?(word) }
        @content << wrap_text(non_tags.join, tagname, cssclass) unless non_tags.empty?
        break if words.empty?
        @content += extract_consecutive_words(words) { |word| tag?(word) }
      end
    end
    def wrap_text(text, tagname, cssclass)
      %(<#{tagname} class="#{cssclass}">#{text}</#{tagname}>)
    end
    def explode(sequence)
      sequence.is_a?(String) ? sequence.split(//) : sequence
    end
    def end_of_tag?(char)
      char == '>'
    end
    def start_of_tag?(char)
      char == '<'
    end
    def whitespace?(char)
      char =~ /\s/
    end
    def convert_html_to_list_of_words(x, use_brackets = false)
      mode = :char
      current_word  = ''
      words = []
      explode(x).each do |char|
        case mode
        when :tag
          if end_of_tag? char
            current_word << (use_brackets ? ']' : '>')
            words << current_word
            current_word = ''
            if whitespace?(char) 
              mode = :whitespace 
            else
              mode = :char
            end
          else
            current_word << char
          end
        when :char
          if start_of_tag? char
            words << current_word unless current_word.empty?
            current_word = (use_brackets ? '[' : '<')
            mode = :tag
          elsif /\s/.match char
            words << current_word unless current_word.empty?
            current_word = char
            mode = :whitespace
          else
            current_word << char
          end
        when :whitespace
          if start_of_tag? char
            words << current_word unless current_word.empty?
            current_word = (use_brackets ? '[' : '<')
            mode = :tag
          elsif /\s/.match char
            current_word << char
          else
            words << current_word unless current_word.empty?
            current_word = char
            mode = :char
          end
        else 
          raise "Unknown mode #{mode.inspect}"
        end
      end
      words << current_word unless current_word.empty?
      words
    end
  end # of class Diff Builder
  def diff(a, b)
    DiffBuilder.new(a, b).build
  end
 end
--- a/lib/diff/lcs.rb
+++ b/lib/diff/lcs.rb
--- a/lib/diff/lcs/array.rb
+++ b/lib/diff/lcs/array.rb
@ -0,0 +1,21 @@
 #! /usr/env/bin ruby
 #--
 # Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
 #   adapted from:
 #     Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
 #     Smalltalk by Mario I. Wolczko <mario@wolczko.com>
 #   implements McIlroy-Hunt diff algorithm
 #
 # This program is free software. It may be redistributed and/or modified under
 # the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
 # Ruby licence.
 # 
 # $Id: array.rb,v 1.3 2004/08/08 20:33:09 austin Exp $
 #++
 # Includes Diff::LCS into the Array built-in class.
 require 'diff/lcs'
 class Array
  include Diff::LCS
 end
--- a/lib/diff/lcs/block.rb
+++ b/lib/diff/lcs/block.rb
@ -0,0 +1,51 @@
 #! /usr/env/bin ruby
 #--
 # Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
 #   adapted from:
 #     Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
 #     Smalltalk by Mario I. Wolczko <mario@wolczko.com>
 #   implements McIlroy-Hunt diff algorithm
 #
 # This program is free software. It may be redistributed and/or modified under
 # the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
 # Ruby licence.
 # 
 # $Id: block.rb,v 1.3 2004/08/08 20:33:09 austin Exp $
 #++
 # Contains Diff::LCS::Block for bin/ldiff.
  # A block is an operation removing, adding, or changing a group of items.
  # Basically, this is just a list of changes, where each change adds or
  # deletes a single item. Used by bin/ldiff.
 class Diff::LCS::Block
  attr_reader :changes, :insert, :remove
  def initialize(chunk)
    @changes = []
    @insert = []
    @remove = []
    chunk.each do |item|
      @changes << item
      @remove << item if item.deleting?
      @insert << item if item.adding?
    end
  end
  def diff_size
    @insert.size - @remove.size
  end
  def op
    case [@remove.empty?, @insert.empty?]
    when [false, false]
      '!'
    when [false, true]
      '-'
    when [true, false]
      '+'
    else # [true, true]
      '^'
    end
  end
 end
--- a/lib/diff/lcs/callbacks.rb
+++ b/lib/diff/lcs/callbacks.rb
@ -0,0 +1,322 @@
 #! /usr/env/bin ruby
 #--
 # Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
 #   adapted from:
 #     Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
 #     Smalltalk by Mario I. Wolczko <mario@wolczko.com>
 #   implements McIlroy-Hunt diff algorithm
 #
 # This program is free software. It may be redistributed and/or modified under
 # the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
 # Ruby licence.
 # 
 # $Id: callbacks.rb,v 1.4 2004/09/14 18:51:26 austin Exp $
 #++
 # Contains definitions for all default callback objects.
 require 'diff/lcs/change'
 module Diff::LCS
    # This callback object implements the default set of callback events, which
    # only returns the event itself. Note that #finished_a and #finished_b are
    # not implemented -- I haven't yet figured out where they would be useful.
    #
    # Note that this is intended to be called as is, e.g.,
    #
    #     Diff::LCS.LCS(seq1, seq2, Diff::LCS::DefaultCallbacks)
  class DefaultCallbacks
    class << self
        # Called when two items match.
      def match(event)
        event
      end
        # Called when the old value is discarded in favour of the new value.
      def discard_a(event)
        event
      end
        # Called when the new value is discarded in favour of the old value.
      def discard_b(event)
        event
      end
        # Called when both the old and new values have changed.
      def change(event)
        event
      end
      private :new
    end
  end
    # An alias for DefaultCallbacks that is used in Diff::LCS#traverse_sequences.
    #
    #     Diff::LCS.LCS(seq1, seq2, Diff::LCS::SequenceCallbacks)
  SequenceCallbacks = DefaultCallbacks
    # An alias for DefaultCallbacks that is used in Diff::LCS#traverse_balanced.
    #
    #     Diff::LCS.LCS(seq1, seq2, Diff::LCS::BalancedCallbacks)
  BalancedCallbacks = DefaultCallbacks
 end
  # This will produce a compound array of simple diff change objects. Each
  # element in the #diffs array is a +hunk+ or +hunk+ array, where each
  # element in each +hunk+ array is a single Change object representing the
  # addition or removal of a single element from one of the two tested
  # sequences. The +hunk+ provides the full context for the changes.
  #
  #     diffs = Diff::LCS.diff(seq1, seq2)
  #       # This example shows a simplified array format.
  #       # [ [ [ '-',  0, 'a' ] ],   # 1
  #       #   [ [ '+',  2, 'd' ] ],   # 2
  #       #   [ [ '-',  4, 'h' ],     # 3
  #       #     [ '+',  4, 'f' ] ],
  #       #   [ [ '+',  6, 'k' ] ],   # 4
  #       #   [ [ '-',  8, 'n' ],     # 5
  #       #     [ '-',  9, 'p' ],
  #       #     [ '+',  9, 'r' ],
  #       #     [ '+', 10, 's' ],
  #       #     [ '+', 11, 't' ] ] ]
  #
  # There are five hunks here. The first hunk says that the +a+ at position 0
  # of the first sequence should be deleted (<tt>'-'</tt>). The second hunk
  # says that the +d+ at position 2 of the second sequence should be inserted
  # (<tt>'+'</tt>). The third hunk says that the +h+ at position 4 of the
  # first sequence should be removed and replaced with the +f+ from position 4
  # of the second sequence. The other two hunks are described similarly.
  #
  # === Use
  # This callback object must be initialised and is used by the Diff::LCS#diff
  # method.
  #
  #     cbo = Diff::LCS::DiffCallbacks.new
  #     Diff::LCS.LCS(seq1, seq2, cbo)
  #     cbo.finish
  #
  # Note that the call to #finish is absolutely necessary, or the last set of
  # changes will not be visible. Alternatively, can be used as:
  #
  #     cbo = Diff::LCS::DiffCallbacks.new { |tcbo| Diff::LCS.LCS(seq1, seq2, tcbo) }
  #
  # The necessary #finish call will be made.
  #
  # === Simplified Array Format
  # The simplified array format used in the example above can be obtained
  # with:
  #
  #     require 'pp'
  #     pp diffs.map { |e| e.map { |f| f.to_a } }
 class Diff::LCS::DiffCallbacks
    # Returns the difference set collected during the diff process.
  attr_reader :diffs
  def initialize # :yields self:
    @hunk = []
    @diffs = []
    if block_given?
      begin
        yield self
      ensure
        self.finish
      end
    end
  end
    # Finalizes the diff process. If an unprocessed hunk still exists, then it
    # is appended to the diff list.
  def finish
    add_nonempty_hunk
  end
  def match(event)
    add_nonempty_hunk
  end
  def discard_a(event)
    @hunk << Diff::LCS::Change.new('-', event.old_position, event.old_element)
  end
  def discard_b(event)
    @hunk << Diff::LCS::Change.new('+', event.new_position, event.new_element)
  end
 private
  def add_nonempty_hunk
    @diffs << @hunk unless @hunk.empty?
    @hunk = []
  end
 end
  # This will produce a compound array of contextual diff change objects. Each
  # element in the #diffs array is a "hunk" array, where each element in each
  # "hunk" array is a single change. Each change is a Diff::LCS::ContextChange
  # that contains both the old index and new index values for the change. The
  # "hunk" provides the full context for the changes. Both old and new objects
  # will be presented for changed objects. +nil+ will be substituted for a
  # discarded object.
  #
  #     seq1 = %w(a b c e h j l m n p)
  #     seq2 = %w(b c d e f j k l m r s t)
  #
  #     diffs = Diff::LCS.diff(seq1, seq2, Diff::LCS::ContextDiffCallbacks)
  #       # This example shows a simplified array format.
  #       # [ [ [ '-', [  0, 'a' ], [  0, nil ] ] ],   # 1
  #       #   [ [ '+', [  3, nil ], [  2, 'd' ] ] ],   # 2
  #       #   [ [ '-', [  4, 'h' ], [  4, nil ] ],     # 3
  #       #     [ '+', [  5, nil ], [  4, 'f' ] ] ],
  #       #   [ [ '+', [  6, nil ], [  6, 'k' ] ] ],   # 4
  #       #   [ [ '-', [  8, 'n' ], [  9, nil ] ],     # 5
  #       #     [ '+', [  9, nil ], [  9, 'r' ] ],
  #       #     [ '-', [  9, 'p' ], [ 10, nil ] ],
  #       #     [ '+', [ 10, nil ], [ 10, 's' ] ],
  #       #     [ '+', [ 10, nil ], [ 11, 't' ] ] ] ]
  #
  # The five hunks shown are comprised of individual changes; if there is a
  # related set of changes, they are still shown individually.
  #
  # This callback can also be used with Diff::LCS#sdiff, which will produce
  # results like:
  #
  #     diffs = Diff::LCS.sdiff(seq1, seq2, Diff::LCS::ContextCallbacks)
  #       # This example shows a simplified array format.
  #       # [ [ [ "-", [  0, "a" ], [  0, nil ] ] ],  # 1
  #       #   [ [ "+", [  3, nil ], [  2, "d" ] ] ],  # 2
  #       #   [ [ "!", [  4, "h" ], [  4, "f" ] ] ],  # 3
  #       #   [ [ "+", [  6, nil ], [  6, "k" ] ] ],  # 4
  #       #   [ [ "!", [  8, "n" ], [  9, "r" ] ],    # 5
  #       #     [ "!", [  9, "p" ], [ 10, "s" ] ],
  #       #     [ "+", [ 10, nil ], [ 11, "t" ] ] ] ]
  #
  # The five hunks are still present, but are significantly shorter in total
  # presentation, because changed items are shown as changes ("!") instead of
  # potentially "mismatched" pairs of additions and deletions.
  #
  # The result of this operation is similar to that of
  # Diff::LCS::SDiffCallbacks. They may be compared as:
  #
  #     s = Diff::LCS.sdiff(seq1, seq2).reject { |e| e.action == "=" }
  #     c = Diff::LCS.sdiff(seq1, seq2, Diff::LCS::ContextDiffCallbacks).flatten
  #
  #     s == c # -> true
  #
  # === Use
  # This callback object must be initialised and can be used by the
  # Diff::LCS#diff or Diff::LCS#sdiff methods.
  #
  #     cbo = Diff::LCS::ContextDiffCallbacks.new
  #     Diff::LCS.LCS(seq1, seq2, cbo)
  #     cbo.finish
  #
  # Note that the call to #finish is absolutely necessary, or the last set of
  # changes will not be visible. Alternatively, can be used as:
  #
  #     cbo = Diff::LCS::ContextDiffCallbacks.new { |tcbo| Diff::LCS.LCS(seq1, seq2, tcbo) }
  #
  # The necessary #finish call will be made.
  #
  # === Simplified Array Format
  # The simplified array format used in the example above can be obtained
  # with:
  #
  #     require 'pp'
  #     pp diffs.map { |e| e.map { |f| f.to_a } }
 class Diff::LCS::ContextDiffCallbacks < Diff::LCS::DiffCallbacks
  def discard_a(event)
    @hunk << Diff::LCS::ContextChange.simplify(event)
  end
  def discard_b(event)
    @hunk << Diff::LCS::ContextChange.simplify(event)
  end
  def change(event)
    @hunk << Diff::LCS::ContextChange.simplify(event)
  end
 end
  # This will produce a simple array of diff change objects. Each element in
  # the #diffs array is a single ContextChange. In the set of #diffs provided
  # by SDiffCallbacks, both old and new objects will be presented for both
  # changed <strong>and unchanged</strong> objects. +nil+ will be substituted
  # for a discarded object.
  #
  # The diffset produced by this callback, when provided to Diff::LCS#sdiff,
  # will compute and display the necessary components to show two sequences
  # and their minimized differences side by side, just like the Unix utility
  # +sdiff+.
  # 
  #     same             same
  #     before     |     after
  #     old        <     -
  #     -          >     new
  #
  #     seq1 = %w(a b c e h j l m n p)
  #     seq2 = %w(b c d e f j k l m r s t)
  #
  #     diffs = Diff::LCS.sdiff(seq1, seq2)
  #       # This example shows a simplified array format.
  #       # [ [ "-", [  0, "a"], [  0, nil ] ],
  #       #   [ "=", [  1, "b"], [  0, "b" ] ],
  #       #   [ "=", [  2, "c"], [  1, "c" ] ],
  #       #   [ "+", [  3, nil], [  2, "d" ] ],
  #       #   [ "=", [  3, "e"], [  3, "e" ] ],
  #       #   [ "!", [  4, "h"], [  4, "f" ] ],
  #       #   [ "=", [  5, "j"], [  5, "j" ] ],
  #       #   [ "+", [  6, nil], [  6, "k" ] ],
  #       #   [ "=", [  6, "l"], [  7, "l" ] ],
  #       #   [ "=", [  7, "m"], [  8, "m" ] ],
  #       #   [ "!", [  8, "n"], [  9, "r" ] ],
  #       #   [ "!", [  9, "p"], [ 10, "s" ] ],
  #       #   [ "+", [ 10, nil], [ 11, "t" ] ] ]
  #
  # The result of this operation is similar to that of
  # Diff::LCS::ContextDiffCallbacks. They may be compared as:
  #
  #     s = Diff::LCS.sdiff(seq1, seq2).reject { |e| e.action == "=" }
  #     c = Diff::LCS.sdiff(seq1, seq2, Diff::LCS::ContextDiffCallbacks).flatten
  #
  #     s == c # -> true
  #
  # === Use
  # This callback object must be initialised and is used by the Diff::LCS#sdiff
  # method.
  #
  #     cbo = Diff::LCS::SDiffCallbacks.new
  #     Diff::LCS.LCS(seq1, seq2, cbo)
  #
  # As with the other initialisable callback objects, Diff::LCS::SDiffCallbacks
  # can be initialised with a block. As there is no "fininishing" to be done,
  # this has no effect on the state of the object.
  #
  #     cbo = Diff::LCS::SDiffCallbacks.new { |tcbo| Diff::LCS.LCS(seq1, seq2, tcbo) }
  #
  # === Simplified Array Format
  # The simplified array format used in the example above can be obtained
  # with:
  #
  #     require 'pp'
  #     pp diffs.map { |e| e.to_a }
 class Diff::LCS::SDiffCallbacks
    # Returns the difference set collected during the diff process.
  attr_reader :diffs
  def initialize #:yields self:
    @diffs = []
    yield self if block_given?
  end
  def match(event)
    @diffs << Diff::LCS::ContextChange.simplify(event)
  end
  def discard_a(event)
    @diffs << Diff::LCS::ContextChange.simplify(event)
  end
  def discard_b(event)
    @diffs << Diff::LCS::ContextChange.simplify(event)
  end
  def change(event)
    @diffs << Diff::LCS::ContextChange.simplify(event)
  end
 end
--- a/lib/diff/lcs/change.rb
+++ b/lib/diff/lcs/change.rb
@ -0,0 +1,169 @@
 #! /usr/env/bin ruby
 #--
 # Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
 #   adapted from:
 #     Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
 #     Smalltalk by Mario I. Wolczko <mario@wolczko.com>
 #   implements McIlroy-Hunt diff algorithm
 #
 # This program is free software. It may be redistributed and/or modified under
 # the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
 # Ruby licence.
 # 
 # $Id: change.rb,v 1.4 2004/08/08 20:33:09 austin Exp $
 #++
 # Provides Diff::LCS::Change and Diff::LCS::ContextChange.
  # Centralises the change test code in Diff::LCS::Change and
  # Diff::LCS::ContextChange, since it's the same for both classes.
 module Diff::LCS::ChangeTypeTests
  def deleting?
    @action == '-'
  end
  def adding?
    @action == '+'
  end
  def unchanged?
    @action == '='
  end
  def changed?
    @changed == '!'
  end
  def finished_a?
    @changed == '>'
  end
  def finished_b?
    @changed == '<'
  end
 end
  # Represents a simplistic (non-contextual) change. Represents the removal or
  # addition of an element from either the old or the new sequenced enumerable.
 class Diff::LCS::Change
    # Returns the action this Change represents. Can be '+' (#adding?), '-'
    # (#deleting?), '=' (#unchanged?), # or '!' (#changed?). When created by
    # Diff::LCS#diff or Diff::LCS#sdiff, it may also be '>' (#finished_a?) or
    # '<' (#finished_b?).
  attr_reader :action
  attr_reader :position
  attr_reader :element
  include Comparable
  def ==(other)
    (self.action == other.action) and
    (self.position == other.position) and
    (self.element == other.element)
  end
  def <=>(other)
    r = self.action <=> other.action
    r = self.position <=> other.position if r.zero?
    r = self.element <=> other.element if r.zero?
    r
  end
  def initialize(action, position, element)
    @action = action
    @position = position
    @element = element
  end
    # Creates a Change from an array produced by Change#to_a.
  def to_a
    [@action, @position, @element]
  end
  def self.from_a(arr)
    Diff::LCS::Change.new(arr[0], arr[1], arr[2])
  end
  include Diff::LCS::ChangeTypeTests
 end
  # Represents a contextual change. Contains the position and values of the
  # elements in the old and the new sequenced enumerables as well as the action
  # taken.
 class Diff::LCS::ContextChange
    # Returns the action this Change represents. Can be '+' (#adding?), '-'
    # (#deleting?), '=' (#unchanged?), # or '!' (#changed?). When
    # created by Diff::LCS#diff or Diff::LCS#sdiff, it may also be '>'
    # (#finished_a?) or '<' (#finished_b?).
  attr_reader :action
  attr_reader :old_position
  attr_reader :old_element
  attr_reader :new_position
  attr_reader :new_element
  include Comparable
  def ==(other)
    (@action == other.action) and
    (@old_position == other.old_position) and
    (@new_position == other.new_position) and
    (@old_element == other.old_element) and
    (@new_element == other.new_element)
  end
  def inspect(*args)
    %Q(#<#{self.class.name}:#{__id__} @action=#{action} positions=#{old_position},#{new_position} elements=#{old_element.inspect},#{new_element.inspect}>)
  end
  def <=>(other)
    r = @action <=> other.action
    r = @old_position <=> other.old_position if r.zero?
    r = @new_position <=> other.new_position if r.zero?
    r = @old_element <=> other.old_element if r.zero?
    r = @new_element <=> other.new_element if r.zero?
    r
  end
  def initialize(action, old_position, old_element, new_position, new_element)
    @action = action
    @old_position = old_position
    @old_element = old_element
    @new_position = new_position
    @new_element = new_element
  end
  def to_a
    [@action, [@old_position, @old_element], [@new_position, @new_element]]
  end
    # Creates a ContextChange from an array produced by ContextChange#to_a.
  def self.from_a(arr)
    if arr.size == 5
      Diff::LCS::ContextChange.new(arr[0], arr[1], arr[2], arr[3], arr[4])
    else
      Diff::LCS::ContextChange.new(arr[0], arr[1][0], arr[1][1], arr[2][0],
                                   arr[2][1])
    end
  end
    # Simplifies a context change for use in some diff callbacks. '<' actions
    # are converted to '-' and '>' actions are converted to '+'. 
  def self.simplify(event)
    ea = event.to_a
    case ea[0]
    when '-'
      ea[2][1] = nil
    when '<'
      ea[0] = '-'
      ea[2][1] = nil
    when '+'
      ea[1][1] = nil
    when '>'
      ea[0] = '+'
      ea[1][1] = nil
    end
    Diff::LCS::ContextChange.from_a(ea)
  end
  include Diff::LCS::ChangeTypeTests
 end
--- a/lib/diff/lcs/hunk.rb
+++ b/lib/diff/lcs/hunk.rb
@ -0,0 +1,257 @@
 #! /usr/env/bin ruby
 #--
 # Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
 #   adapted from:
 #     Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
 #     Smalltalk by Mario I. Wolczko <mario@wolczko.com>
 #   implements McIlroy-Hunt diff algorithm
 #
 # This program is free software. It may be redistributed and/or modified under
 # the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
 # Ruby licence.
 # 
 # $Id: hunk.rb,v 1.2 2004/08/08 20:33:09 austin Exp $
 #++
 # Contains Diff::LCS::Hunk for bin/ldiff.
 require 'diff/lcs/block'
  # A Hunk is a group of Blocks which overlap because of the context
  # surrounding each block. (So if we're not using context, every hunk will
  # contain one block.) Used in the diff program (bin/diff).
 class Diff::LCS::Hunk
    # Create a hunk using references to both the old and new data, as well as
    # the piece of data
  def initialize(data_old, data_new, piece, context, file_length_difference)
      # At first, a hunk will have just one Block in it
    @blocks = [ Diff::LCS::Block.new(piece) ]
    @data_old = data_old
    @data_new = data_new
    before = after = file_length_difference
    after += @blocks[0].diff_size
    @file_length_difference = after # The caller must get this manually
      # Save the start & end of each array. If the array doesn't exist
      # (e.g., we're only adding items in this block), then figure out the
      # line number based on the line number of the other file and the
      # current difference in file lengths.
    if @blocks[0].remove.empty?
      a1 = a2 = nil
    else
      a1 = @blocks[0].remove[0].position
      a2 = @blocks[0].remove[-1].position
    end
    if @blocks[0].insert.empty?
      b1 = b2 = nil
    else
      b1 = @blocks[0].insert[0].position
      b2 = @blocks[0].insert[-1].position
    end
    @start_old = a1 || (b1 - before)
    @start_new = b1 || (a1 + before)
    @end_old   = a2 || (b2 - after)
    @end_new   = b2 || (a2 + after)
    self.flag_context = context
  end
  attr_reader :blocks
  attr_reader :start_old, :start_new
  attr_reader :end_old, :end_new
  attr_reader :file_length_difference
    # Change the "start" and "end" fields to note that context should be added
    # to this hunk
  attr_accessor :flag_context
  def flag_context=(context) #:nodoc:
    return if context.nil? or context.zero?
    add_start = (context > @start_old) ? @start_old : context
    @start_old -= add_start
    @start_new -= add_start
    if (@end_old + context) > @data_old.size
      add_end = @data_old.size - @end_old
    else
      add_end = context
    end
    @end_old += add_end
    @end_new += add_end
  end
  def unshift(hunk)
    @start_old = hunk.start_old
    @start_new = hunk.start_new
    blocks.unshift(*hunk.blocks)
  end
    # Is there an overlap between hunk arg0 and old hunk arg1? Note: if end
    # of old hunk is one less than beginning of second, they overlap
  def overlaps?(hunk = nil)
    return nil if hunk.nil?
    a = (@start_old - hunk.end_old) <= 1
    b = (@start_new - hunk.end_new) <= 1
    return (a or b)
  end
  def diff(format)
    case format
    when :old
      old_diff
    when :unified
      unified_diff
    when :context
      context_diff
    when :ed
      self
    when :reverse_ed, :ed_finish
      ed_diff(format)
    else
      raise "Unknown diff format #{format}."
    end
  end
  def each_old(block)
    @data_old[@start_old .. @end_old].each { |e| yield e }
  end
  private
    # Note that an old diff can't have any context. Therefore, we know that
    # there's only one block in the hunk.
  def old_diff
    warn "Expecting only one block in an old diff hunk!" if @blocks.size > 1
    op_act = { "+" => 'a', "-" => 'd', "!" => "c" }
    block = @blocks[0]
      # Calculate item number range. Old diff range is just like a context
      # diff range, except the ranges are on one line with the action between
      # them.
    s = "#{context_range(:old)}#{op_act[block.op]}#{context_range(:new)}\n"
      # If removing anything, just print out all the remove lines in the hunk
      # which is just all the remove lines in the block.
    @data_old[@start_old .. @end_old].each { |e| s << "< #{e}\n" } unless block.remove.empty?
    s << "---\n" if block.op == "!"
    @data_new[@start_new .. @end_new].each { |e| s << "> #{e}\n" } unless block.insert.empty?
    s
  end
  def unified_diff
      # Calculate item number range.
    s = "@@ -#{unified_range(:old)} +#{unified_range(:new)} @@\n"
      # Outlist starts containing the hunk of the old file. Removing an item
      # just means putting a '-' in front of it. Inserting an item requires
      # getting it from the new file and splicing it in. We splice in
      # +num_added+ items. Remove blocks use +num_added+ because splicing
      # changed the length of outlist.
      #
      # We remove +num_removed+ items. Insert blocks use +num_removed+
      # because their item numbers -- corresponding to positions in the NEW
      # file -- don't take removed items into account.
    lo, hi, num_added, num_removed = @start_old, @end_old, 0, 0
    outlist = @data_old[lo .. hi].collect { |e| e.gsub(/^/, ' ') }
    @blocks.each do |block|
      block.remove.each do |item|
        op = item.action.to_s # -
        offset = item.position - lo + num_added
        outlist[offset].gsub!(/^ /, op.to_s)
        num_removed += 1
      end
      block.insert.each do |item|
        op = item.action.to_s # +
        offset = item.position - @start_new + num_removed
        outlist[offset, 0] = "#{op}#{@data_new[item.position]}"
        num_added += 1
      end
    end
    s << outlist.join("\n")
  end
  def context_diff
    s = "***************\n"
    s << "*** #{context_range(:old)} ****\n"
    r = context_range(:new)
      # Print out file 1 part for each block in context diff format if there
      # are any blocks that remove items
    lo, hi = @start_old, @end_old
    removes = @blocks.select { |e| not e.remove.empty? }
    if removes
      outlist = @data_old[lo .. hi].collect { |e| e.gsub(/^/, '  ') }
      removes.each do |block|
        block.remove.each do |item|
          outlist[item.position - lo].gsub!(/^ /) { block.op } # - or !
        end
      end
      s << outlist.join("\n")
    end
    s << "\n--- #{r} ----\n"
    lo, hi = @start_new, @end_new
    inserts = @blocks.select { |e| not e.insert.empty? }
    if inserts
      outlist = @data_new[lo .. hi].collect { |e| e.gsub(/^/, '  ') }
      inserts.each do |block|
        block.insert.each do |item|
          outlist[item.position - lo].gsub!(/^ /) { block.op } # + or !
        end
      end
      s << outlist.join("\n")
    end
    s
  end
  def ed_diff(format)
    op_act = { "+" => 'a', "-" => 'd', "!" => "c" }
    warn "Expecting only one block in an old diff hunk!" if @blocks.size > 1
    if format == :reverse_ed
      s = "#{op_act[@blocks[0].op]}#{context_range(:old)}\n"
    else
      s = "#{context_range(:old).gsub(/,/, ' ')}#{op_act[@blocks[0].op]}\n"
    end
    unless @blocks[0].insert.empty?
      @data_new[@start_new .. @end_new].each { |e| s << "#{e}\n" }
      s << ".\n"
    end
    s
  end
    # Generate a range of item numbers to print. Only print 1 number if the
    # range has only one item in it. Otherwise, it's 'start,end'
  def context_range(mode)
    case mode
    when :old
      s, e = (@start_old + 1), (@end_old + 1)
    when :new
      s, e = (@start_new + 1), (@end_new + 1)
    end
    (s < e) ? "#{s},#{e}" : "#{e}"
  end
    # Generate a range of item numbers to print for unified diff. Print
    # number where block starts, followed by number of lines in the block
    # (don't print number of lines if it's 1)
  def unified_range(mode)
    case mode
    when :old
      s, e = (@start_old + 1), (@end_old + 1)
    when :new
      s, e = (@start_new + 1), (@end_new + 1)
    end
    length = e - s + 1
    first = (length < 2) ? e : s # "strange, but correct"
    (length == 1) ? "#{first}" : "#{first},#{length}"
  end
 end
--- a/lib/diff/lcs/ldiff.rb
+++ b/lib/diff/lcs/ldiff.rb
@ -0,0 +1,226 @@
 #!/usr/bin/env ruby
 require 'optparse'
 require 'ostruct'
 require 'diff/lcs/hunk'
  # == ldiff Usage
  #   ldiff [options] oldfile newfile
  #
  # -c::                            Displays a context diff with 3 lines of context.
  # -C [LINES], --context [LINES]:: Displays a context diff with LINES lines of context. Default 3 lines.
  # -u::                            Displays a unified diff with 3 lines of context.
  # -U [LINES], --unified [LINES]:: Displays a unified diff with LINES lines of context. Default 3 lines.
  # -e::                            Creates an 'ed' script to change oldfile to newfile.
  # -f::                            Creates an 'ed' script to change oldfile to newfile in reverse order.
  # -a, --text::                    Treats the files as text and compares them line-by-line, even if they do not seem to be text.
  # --binary::                      Treats the files as binary.
  # -q, --brief::                   Reports only whether or not the files differ, not the details.
  # --help::                        Shows the command-line help.
  # --version::                     Shows the version of Diff::LCS.
  #
  # By default, runs produces an "old-style" diff, with output like UNIX diff.
  #
  # == Copyright
  # Copyright &copy; 2004 Austin Ziegler
  #
  #   Part of Diff::LCS <http://rubyforge.org/projects/ruwiki/>
  #   Austin Ziegler <diff-lcs@halostatue.ca>
  #
  # This program is free software. It may be redistributed and/or modified under
  # the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
  # Ruby licence.
 module Diff::LCS::Ldiff
  BANNER = <<-COPYRIGHT
 ldiff #{Diff::LCS::VERSION}
  Copyright © 2004 Austin Ziegler
  Part of Diff::LCS.
  http://rubyforge.org/projects/ruwiki/
  Austin Ziegler <diff-lcs@halostatue.ca>
  This program is free software. It may be redistributed and/or modified under
  the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
  Ruby licence.
 $Id: ldiff.rb,v 1.1 2004/09/26 01:37:49 austin Exp $
              COPYRIGHT
  class << self
    attr_reader   :format, :lines       #:nodoc:
    attr_reader   :file_old, :file_new  #:nodoc:
    attr_reader   :data_old, :data_new  #:nodoc:
    def run(args, input = $stdin, output = $stdout, error = $stderr) #:nodoc:
      args.options do |o|
        o.banner = "Usage: #{File.basename($0)} [options] oldfile newfile"
        o.separator ""
        o.on('-c',
            'Displays a context diff with 3 lines of',
            'context.') do |ctx|
          @format = :context
          @lines  = 3
        end
        o.on('-C', '--context [LINES]', Numeric,
            'Displays a context diff with LINES lines',
            'of context. Default 3 lines.') do |ctx|
          @format = :context
          @lines  = ctx || 3
        end
        o.on('-u',
            'Displays a unified diff with 3 lines of',
            'context.') do |ctx|
          @format = :unified
          @lines  = 3
        end
        o.on('-U', '--unified [LINES]', Numeric,
            'Displays a unified diff with LINES lines',
            'of context. Default 3 lines.') do |ctx|
          @format = :unified
          @lines  = ctx || 3
        end
        o.on('-e',
            'Creates an \'ed\' script to change',
            'oldfile to newfile.') do |ctx|
          @format = :ed
        end
        o.on('-f',
            'Creates an \'ed\' script to change',
            'oldfile to newfile in reverse order.') do |ctx|
          @format = :reverse_ed
        end
        o.on('-a', '--text',
             'Treat the files as text and compare them',
             'line-by-line, even if they do not seem',
             'to be text.') do |txt|
          @binary = false
        end
        o.on('--binary',
             'Treats the files as binary.') do |bin|
          @binary = true
        end
        o.on('-q', '--brief',
             'Report only whether or not the files',
             'differ, not the details.') do |ctx|
          @format = :report
        end
        o.on_tail('--help', 'Shows this text.') do
          error << o
          return 0
        end
        o.on_tail('--version', 'Shows the version of Diff::LCS.') do
          error << BANNER
          return 0
        end
        o.on_tail ""
        o.on_tail 'By default, runs produces an "old-style" diff, with output like UNIX diff.'
        o.parse!
      end
      unless args.size == 2
        error << args.options
        return 127
      end
        # Defaults are for old-style diff
      @format ||= :old
      @lines  ||= 0
      file_old, file_new = *ARGV
      case @format
      when :context
        char_old = '*' * 3
        char_new = '-' * 3
      when :unified
        char_old = '-' * 3
        char_new = '+' * 3
      end
        # After we've read up to a certain point in each file, the number of
        # items we've read from each file will differ by FLD (could be 0).
      file_length_difference = 0
      if @binary.nil? or @binary
        data_old = IO::read(file_old)
        data_new = IO::read(file_new)
          # Test binary status
        if @binary.nil?
          old_txt = data_old[0...4096].grep(/\0/).empty?
          new_txt = data_new[0...4096].grep(/\0/).empty?
          @binary = (not old_txt) or (not new_txt)
          old_txt = new_txt = nil
        end
        unless @binary
          data_old = data_old.split(/\n/).map! { |e| e.chomp }
          data_new = data_new.split(/\n/).map! { |e| e.chomp }
        end
      else
        data_old = IO::readlines(file_old).map! { |e| e.chomp }
        data_new = IO::readlines(file_new).map! { |e| e.chomp }
      end
        # diff yields lots of pieces, each of which is basically a Block object
      if @binary
        diffs = (data_old == data_new)
      else
        diffs = Diff::LCS.diff(data_old, data_new)
        diffs = nil if diffs.empty?
      end
      return 0 unless diffs
      if (@format == :report) and diffs
        output << "Files #{file_old} and #{file_new} differ\n"
        return 1
      end
      if (@format == :unified) or (@format == :context)
        ft = File.stat(file_old).mtime.localtime.strftime('%Y-%m-%d %H:%M:%S %z')
        puts "#{char_old} #{file_old}\t#{ft}"
        ft = File.stat(file_new).mtime.localtime.strftime('%Y-%m-%d %H:%M:%S %z')
        puts "#{char_new} #{file_new}\t#{ft}"
      end
        # Loop over hunks. If a hunk overlaps with the last hunk, join them.
        # Otherwise, print out the old one.
      oldhunk = hunk = nil
      if @format == :ed
        real_output = output
        output = []
      end
      diffs.each do |piece|
      begin
        hunk = Diff::LCS::Hunk.new(data_old, data_new, piece, @lines,
                                   file_length_difference)
        file_length_difference = hunk.file_length_difference
        next unless oldhunk
        if (@lines > 0) and hunk.overlaps?(oldhunk)
          hunk.unshift(oldhunk)
        else
          output << oldhunk.diff(@format)
        end
      ensure
        oldhunk = hunk
        output << "\n"
      end
      end
      output << oldhunk.diff(@format)
      output << "\n"
      if @format == :ed
        output.reverse_each { |e| real_output << e.diff(:ed_finish) }
      end
      return 1
    end
  end
 end
--- a/lib/diff/lcs/string.rb
+++ b/lib/diff/lcs/string.rb
@ -0,0 +1,19 @@
 #! /usr/env/bin ruby
 #--
 # Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
 #   adapted from:
 #     Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
 #     Smalltalk by Mario I. Wolczko <mario@wolczko.com>
 #   implements McIlroy-Hunt diff algorithm
 #
 # This program is free software. It may be redistributed and/or modified under
 # the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
 # Ruby licence.
 # 
 # $Id: string.rb,v 1.3 2004/08/08 20:33:09 austin Exp $
 #++
 # Includes Diff::LCS into String.
 class String
  include Diff::LCS
 end
--- a/lib/page_renderer.rb
+++ b/lib/page_renderer.rb
@ -1,11 +1,9 @@
-require 'diff'
+require 'xhtmldiff'
 # Temporary class containing all rendering stuff from a Revision 
 # I want to shift all rendering loguc to the controller eventually
 class PageRenderer
  include HTMLDiff
  def self.setup_url_generator(url_generator)
    @@url_generator = url_generator
  end
@ -41,8 +39,22 @@ class PageRenderer
  def display_diff
    previous_revision = @revision.page.previous_revision(@revision)
    if previous_revision
-      rendered_previous_revision = WikiContent.new(previous_revision, @@url_generator).render!
+
-      diff(rendered_previous_revision, display_content) 
+      previous_content = "<div>\n" + WikiContent.new(previous_revision, @@url_generator).render!.to_s + "\n</div>"
      current_content = "<div>\n" + display_content.to_s  + "\n</div>"
      diff_doc = REXML::Document.new
      diff_doc << (div = REXML::Element.new 'div')
      hd = XHTMLDiff.new(div)
      parsed_previous_revision = REXML::HashableElementDelegator.new(
           REXML::XPath.first(REXML::Document.new(previous_content), '/div'))
      parsed_display_content = REXML::HashableElementDelegator.new(
           REXML::XPath.first(REXML::Document.new(current_content), '/div'))
      Diff::LCS.traverse_balanced(parsed_previous_revision, parsed_display_content, hd)
      diffs = ''
      diff_doc.write(diffs, -1, true, true)
      diffs
    else
      display_content
    end
--- a/lib/xhtmldiff.rb
+++ b/lib/xhtmldiff.rb
@ -0,0 +1,179 @@
 #!/usr/bin/env ruby
 # Author: Aredridel <aredridel@nbtsc.org>
 # Website: http://theinternetco.net/projects/ruby/xhtmldiff.html
 # Licence: same as Ruby
 # Version: 1.22
 #
 # Tweaks by Jacques Distler <distler@golem.ph.utexas.edu>
 #  -- add classnames to <del> and <ins> elements added by XHTMLDiff,
 #     for better CSS styling
 require 'diff/lcs'
 require 'rexml/document'
 require 'delegate'
 def Math.max(a, b)
 	a > b ? a : b
 end
 module REXML
 	class Text 
 		def deep_clone
 			clone
 		end
 	end
 	class HashableElementDelegator < DelegateClass(Element)
 		def initialize(sub)
 			super sub
 		end
 		def == other
 			res = other.to_s.strip == self.to_s.strip
 			res
 		end
 		def eql? other
 			self == other
 		end
 		def[](k)
 			r = super
 			if r.kind_of? __getobj__.class
 				self.class.new(r)
 			else
 				r
 			end
 		end
 		def hash
 			r = __getobj__.to_s.hash
 			r
 		end
 	end
 end
 class XHTMLDiff
 	include REXML
  attr_accessor :output
 	class << self
 		BLOCK_CONTAINERS = ['div', 'ul', 'li']
 		def diff(a, b)
 			if a == b
 				return a.deep_clone
 			end
 			if REXML::HashableElementDelegator === a and REXML::HashableElementDelegator === b
 				o = REXML::Element.new(a.name)
 				o.add_attributes  a.attributes
 				hd = self.new(o)
 				Diff::LCS.traverse_balanced(a, b, hd)
 				o
 			elsif REXML::Text === a and REXML::Text === b
 				o = REXML::Element.new('span')
 				aa = a.value.split(/\s/)
 				ba = b.value.split(/\s/)
 				hd = XHTMLTextDiff.new(o)
 				Diff::LCS.traverse_balanced(aa, ba, hd)
 				o
 			else
 				raise ArgumentError.new("both arguments must be equal or both be elements. a is #{a.class.name} and b is #{b.class.name}")
 			end
 		end
 	end
 	def diff(a, b)
 		self.class.diff(a,b)
 	end
  def initialize(output)
    @output = output
  end
    # This will be called with both elements are the same
  def match(event)
    @output << event.old_element.deep_clone if event.old_element
  end
  # This will be called when there is an element in A that isn't in B
  def discard_a(event)
 		@output << wrap(event.old_element, 'del', 'diffdel') 
  end
 	def change(event)
 		begin
 			sd = diff(event.old_element, event.new_element)
 		rescue ArgumentError
 			sd = nil
 		end
 		if sd and (ratio = (Float(rs = sd.to_s.gsub(%r{<(ins|del)>.*</\1>}, '').size) / bs = Math.max(event.old_element.to_s.size, event.new_element.to_s.size))) > 0.5
 			@output << sd
 		else
 			@output << wrap(event.old_element, 'del', 'diffmod')
 			@output << wrap(event.new_element, 'ins', 'diffmod')
 		end
  end
  # This will be called when there is an element in B that isn't in A
  def discard_b(event)
 		@output << wrap(event.new_element, 'ins', 'diffins')
 	end
 	def choose_event(event, element, tag)
  end
 	def wrap(element, tag = nil, class_name = nil)
 		if tag 
 			el = Element.new tag
 			el << element.deep_clone
 		else
 			el = element.deep_clone
 		end
                if class_name
                   el.add_attribute('class', class_name)
                end
 		el
 	end
 	class XHTMLTextDiff < XHTMLDiff
 		def change(event)
 			@output << wrap(event.old_element, 'del', 'diffmod')
 			@output << wrap(event.new_element, 'ins', 'diffmod')
 		end
 		# This will be called with both elements are the same
 		def match(event)
 			@output << wrap(event.old_element, nil, nil) if event.old_element
 		end
 		# This will be called when there is an element in A that isn't in B
 		def discard_a(event)
 			@output << wrap(event.old_element, 'del', 'diffdel') 
 		end
 		# This will be called when there is an element in B that isn't in A
 		def discard_b(event)
 			@output << wrap(event.new_element, 'ins', 'diffins')
 		end
 		def wrap(element, tag = nil, class_name = nil)
 			element = REXML::Text.new(" " << element) if String === element
                        return element unless tag
                        wrapper_element = REXML::Element.new(tag)
                        wrapper_element.add_text element
                        if class_name
                           wrapper_element.add_attribute('class', class_name)
                        end
                        wrapper_element
 		end
 	end
 end
 if $0 == __FILE__
 	$stderr.puts "No tests available yet"
 	exit(1)
 end
--- a/test/unit/diff_test.rb
+++ b/test/unit/diff_test.rb
@ -1,110 +1,94 @@
 #!/usr/bin/env ruby
 require File.expand_path(File.dirname(__FILE__) + '/../test_helper')
-require 'diff'
+require 'xhtmldiff'
 class DiffTest < Test::Unit::TestCase
  include HTMLDiff
  def setup
-    @builder = DiffBuilder.new('old', 'new')
+
  end
-  def test_start_of_tag
+  def diff(a,b)
-    assert @builder.start_of_tag?('<')
+    diff_doc = REXML::Document.new
-    assert(!@builder.start_of_tag?('>'))
+    diff_doc << (div = REXML::Element.new 'div' )
-    assert(!@builder.start_of_tag?('a'))
+    hd = XHTMLDiff.new(div)
-  end
+    parsed_a = REXML::HashableElementDelegator.new(
-
+           REXML::XPath.first(REXML::Document.new("<div>"+a+"</div>"), '/div'))
-  def test_end_of_tag
+    parsed_b = REXML::HashableElementDelegator.new(
-    assert @builder.end_of_tag?('>')
+           REXML::XPath.first(REXML::Document.new("<div>"+b+"</div>"), '/div'))
-    assert(!@builder.end_of_tag?('<'))
+    Diff::LCS.traverse_balanced(parsed_a, parsed_b, hd)
-    assert(!@builder.end_of_tag?('a'))
+    diffs = ''
-  end
+    diff_doc.write(diffs, -1, true, true)
-
+    diffs
  def test_whitespace
    assert @builder.whitespace?(" ")
    assert @builder.whitespace?("\n")
    assert @builder.whitespace?("\r")
    assert(!@builder.whitespace?("a"))
  end
  def test_convert_html_to_list_of_words_simple
    assert_equal(
        ['the', ' ', 'original', ' ', 'text'],
        @builder.convert_html_to_list_of_words('the original text'))
  end
  def test_convert_html_to_list_of_words_should_separate_endlines
    assert_equal(
        ['a', "\n", 'b', "\r", 'c'],
        @builder.convert_html_to_list_of_words("a\nb\rc"))
  end
  def test_convert_html_to_list_of_words_should_not_compress_whitespace
    assert_equal(
        ['a', ' ', 'b', '  ', 'c', "\r \n ", 'd'],
        @builder.convert_html_to_list_of_words("a b  c\r \n d"))
  end
  def test_convert_html_to_list_of_words_should_handle_tags_well
    assert_equal(
        ['<p>', 'foo', ' ', 'bar', '</p>'],
        @builder.convert_html_to_list_of_words("<p>foo bar</p>"))
  end
  def test_convert_html_to_list_of_words_interesting
    assert_equal(
        ['<p>', 'this', ' ', 'is', '</p>', "\r\n", '<p>', 'the', ' ', 'new', ' ', 'string', 
         '</p>', "\r\n", '<p>', 'around', ' ', 'the', ' ', 'world', '</p>'],
        @builder.convert_html_to_list_of_words(
            "<p>this is</p>\r\n<p>the new string</p>\r\n<p>around the world</p>"))
  end
  def test_html_diff_simple
    a = 'this was the original string'
    b = 'this is the new string'
-    assert_equal('this <del class="diffmod">was</del><ins class="diffmod">is</ins> the ' +
+    assert_equal("<div><span> this<del class='diffmod'> was</del><ins class='diffmod'> is</ins> the" +
-           '<del class="diffmod">original</del><ins class="diffmod">new</ins> string',
+           "<del class='diffmod'> original</del><ins class='diffmod'> new</ins> string</span></div>",
-           diff(a, b))
+          diff(a, b))
  end
  def test_html_diff_with_multiple_paragraphs
    a = "<p>this was the original string</p>"
-    b = "<p>this is</p>\r\n<p> the new string</p>\r\n<p>around the world</p>"
+    b = "<p>this is</p>\n<p> the new string</p>\n<p>around the world</p>"
    # Some of this expected result is accidental to implementation. 
    # At least it's well-formed and more or less correct.
    assert_equal(
-        "<p>this <del class=\"diffmod\">was</del><ins class=\"diffmod\">is</ins></p>"+
+        "<div><p><span> this<del class='diffmod'> was</del><ins class='diffmod'> is</ins>" +
-        "<ins class=\"diffmod\">\r\n</ins><p> the " +
+        "<del class='diffdel'> the</del><del class='diffdel'> original</del><del class='diffdel'> string</del></span></p>" +
-        "<del class=\"diffmod\">original</del><ins class=\"diffmod\">new</ins>" +
+        "<ins class='diffins'>\n</ins><ins class='diffins'><p> the new string</p></ins>" +
-        " string</p><ins class=\"diffins\">\r\n</ins>" +
+        "<ins class='diffins'>\n</ins><ins class='diffins'><p>around the world</p></ins></div>",
        "<p><ins class=\"diffins\">around the world</ins></p>",
        diff(a, b))
  end
  def test_split_paragraph_into_two
     a = "<p>foo bar</p>"
     b = "<p>foo</p><p>bar</p>"
     assert_equal(
       "<div><p><span> foo<del class='diffdel'> bar</del></span></p>" +
       "<ins class='diffins'><p>bar</p></ins></div>",
      diff(a,b))
  end
  def test_join_two_paragraphs_into_one
     a = "<p>foo</p><p>bar</p>"
     b = "<p>foo bar</p>"
     assert_equal(
       "<div><p><span> foo<ins class='diffins'> bar</ins></span></p>" +
       "<del class='diffdel'><p>bar</p></del></div>",
      diff(a,b))
  end
  def test_add_inline_element
     a = "<p>foo bar</p>"
     b = "<p>foo <b>bar</b></p>"
     assert_equal(
        "<div><p><span> foo<del class='diffdel'> bar</del></span>" +
        "<ins class='diffins'><b>bar</b></ins></p></div>",
       diff(a,b))
  end
  # FIXME this test fails (ticket #67, http://dev.instiki.org/ticket/67)
  def test_html_diff_preserves_endlines_in_pre
-    a = "<pre>\na\nb\nc\n</pre>"
+    a = "<pre>a\nb\nc\n</pre>"
-    b = "<pre>\n</pre>"
+    b = "<pre>a\n</pre>"
    assert_equal(
-        "<pre>\n<del class=\"diffdel\">a\nb\nc\n</del></pre>",
+        "<div><pre><span> a\n<del class='diffdel'>b\nc\n</del></span></pre></div>",
        diff(a, b))
  end
  def test_html_diff_with_tags
    a = ""
    b = "<div>foo</div>"
-    assert_equal '<div><ins class="diffins">foo</ins></div>', diff(a, b)
+    assert_equal "<div><ins class='diffins'><div>foo</div></ins></div>", diff(a, b)
  end
  def test_diff_for_tag_change
    a = "<a>x</a>"
    b = "<b>x</b>"
    # FIXME sad, but true - this case produces an invalid XML. If handle this you can, strong your foo is.
-    assert_equal '<a><b>x</a></b>', diff(a, b)
+    assert_equal "<div><del class='diffdel'><a>x</a></del><ins class='diffins'><b>x</b></ins></div>", diff(a, b)
  end
 end