Replaced diff.rb with xhtmldiff.rb, which (unlike its predecessor) produces well-formed redline documents.

2007-02-03 22:52:48 -06:00 · 2007-02-03 22:52:48 -06:00 · 8c52f28864
commit 8c52f28864
parent 86e9c70a26
12 changed files with 2420 additions and 391 deletions
--- a/lib/diff.rb
+++ b/lib/diff.rb
@ -1,316 +0,0 @@
-module HTMLDiff
-
-  Match = Struct.new(:start_in_old, :start_in_new, :size)
-  class Match
-    def end_in_old
-      self.start_in_old + self.size
-    end
-    
-    def end_in_new
-      self.start_in_new + self.size
-    end
-  end
-  
-  Operation = Struct.new(:action, :start_in_old, :end_in_old, :start_in_new, :end_in_new)
-
-  class DiffBuilder
-
-    def initialize(old_version, new_version)
-      @old_version, @new_version = old_version, new_version
-      @content = []
-    end
-
-    def build
-      split_inputs_to_words
-      index_new_words
-      operations.each { |op| perform_operation(op) }
-      return @content.join
-    end
-
-    def split_inputs_to_words
-      @old_words = convert_html_to_list_of_words(explode(@old_version))
-      @new_words = convert_html_to_list_of_words(explode(@new_version))
-    end
-
-    def index_new_words
-      @word_indices = Hash.new { |h, word| h[word] = [] }
-      @new_words.each_with_index { |word, i| @word_indices[word] << i }
-    end
-
-    def operations
-      position_in_old = position_in_new = 0
-      operations = []
-      
-      matches = matching_blocks
-      # an empty match at the end forces the loop below to handle the unmatched tails
-      # I'm sure it can be done more gracefully, but not at 23:52
-      matches << Match.new(@old_words.length, @new_words.length, 0)
-      
-      matches.each_with_index do |match, i|
-        match_starts_at_current_position_in_old = (position_in_old == match.start_in_old)
-        match_starts_at_current_position_in_new = (position_in_new == match.start_in_new)
-        
-        action_upto_match_positions = 
-          case [match_starts_at_current_position_in_old, match_starts_at_current_position_in_new]
-          when [false, false]
-            :replace
-          when [true, false]
-            :insert
-          when [false, true]
-            :delete
-          else
-            # this happens if the first few words are same in both versions
-            :none
-          end
-
-        if action_upto_match_positions != :none
-          operation_upto_match_positions = 
-              Operation.new(action_upto_match_positions, 
-                  position_in_old, match.start_in_old, 
-                  position_in_new, match.start_in_new)
-          operations << operation_upto_match_positions
-        end
-        if match.size != 0
-          match_operation = Operation.new(:equal, 
-              match.start_in_old, match.end_in_old, 
-              match.start_in_new, match.end_in_new)
-          operations << match_operation
-        end
-
-        position_in_old = match.end_in_old
-        position_in_new = match.end_in_new
-      end
-      
-      operations
-    end
-
-    def matching_blocks
-      matching_blocks = []
-      recursively_find_matching_blocks(0, @old_words.size, 0, @new_words.size, matching_blocks)
-      matching_blocks
-    end
-
-    def recursively_find_matching_blocks(start_in_old, end_in_old, start_in_new, end_in_new, matching_blocks)
-      match = find_match(start_in_old, end_in_old, start_in_new, end_in_new)
-      if match
-        if start_in_old < match.start_in_old and start_in_new < match.start_in_new
-          recursively_find_matching_blocks(
-              start_in_old, match.start_in_old, start_in_new, match.start_in_new, matching_blocks) 
-        end
-        matching_blocks << match
-        if match.end_in_old < end_in_old and match.end_in_new < end_in_new
-          recursively_find_matching_blocks(
-              match.end_in_old, end_in_old, match.end_in_new, end_in_new, matching_blocks)
-        end
-      end
-    end
-
-    def find_match(start_in_old, end_in_old, start_in_new, end_in_new)
-
-      best_match_in_old = start_in_old
-      best_match_in_new = start_in_new
-      best_match_size = 0
-      
-      match_length_at = Hash.new { |h, index| h[index] = 0 }
-      
-      start_in_old.upto(end_in_old - 1) do |index_in_old|
-
-        new_match_length_at = Hash.new { |h, index| h[index] = 0 }
-
-        @word_indices[@old_words[index_in_old]].each do |index_in_new|
-          next  if index_in_new < start_in_new
-          break if index_in_new >= end_in_new
-
-          new_match_length = match_length_at[index_in_new - 1] + 1
-          new_match_length_at[index_in_new] = new_match_length
-
-          if new_match_length > best_match_size
-            best_match_in_old = index_in_old - new_match_length + 1
-            best_match_in_new = index_in_new - new_match_length + 1
-            best_match_size = new_match_length
-          end
-        end
-        match_length_at = new_match_length_at
-      end
-
-#      best_match_in_old, best_match_in_new, best_match_size = add_matching_words_left(
-#          best_match_in_old, best_match_in_new, best_match_size, start_in_old, start_in_new)
-#      best_match_in_old, best_match_in_new, match_size = add_matching_words_right(
-#          best_match_in_old, best_match_in_new, best_match_size, end_in_old, end_in_new)
-
-      return (best_match_size != 0 ? Match.new(best_match_in_old, best_match_in_new, best_match_size) : nil)
-    end
-
-    def add_matching_words_left(match_in_old, match_in_new, match_size, start_in_old, start_in_new)
-      while match_in_old > start_in_old and 
-            match_in_new > start_in_new and 
-            @old_words[match_in_old - 1] == @new_words[match_in_new - 1]
-        match_in_old -= 1
-        match_in_new -= 1
-        match_size += 1
-      end
-      [match_in_old, match_in_new, match_size]
-    end
-
-    def add_matching_words_right(match_in_old, match_in_new, match_size, end_in_old, end_in_new)
-      while match_in_old + match_size < end_in_old and 
-            match_in_new + match_size < end_in_new and
-            @old_words[match_in_old + match_size] == @new_words[match_in_new + match_size]
-        match_size += 1
-      end
-      [match_in_old, match_in_new, match_size]
-    end
-    
-    VALID_METHODS = [:replace, :insert, :delete, :equal]
-
-    def perform_operation(operation)
-      @operation = operation
-      self.send operation.action, operation
-    end
-
-    def replace(operation)
-      delete(operation, 'diffmod')
-      insert(operation, 'diffmod')
-    end
-    
-    def insert(operation, tagclass = 'diffins')
-      insert_tag('ins', tagclass, @new_words[operation.start_in_new...operation.end_in_new])
-    end
-    
-    def delete(operation, tagclass = 'diffdel')
-       insert_tag('del', tagclass, @old_words[operation.start_in_old...operation.end_in_old])
-    end
-    
-    def equal(operation)
-      # no tags to insert, simply copy the matching words from one of the versions
-      @content += @new_words[operation.start_in_new...operation.end_in_new]
-    end
-  
-    def opening_tag?(item)
-      item =~ %r!^\s*<[^>]+>\s*$!
-    end
-
-    def closing_tag?(item)
-      item =~ %r!^\s*</[^>]+>\s*$!
-    end
-
-    def tag?(item)
-      opening_tag?(item) or closing_tag?(item)
-    end
-
-    def extract_consecutive_words(words, &condition)
-      index_of_first_tag = nil
-      words.each_with_index do |word, i| 
-        if !condition.call(word)
-          index_of_first_tag = i
-          break
-        end
-      end
-      if index_of_first_tag
-        return words.slice!(0...index_of_first_tag)
-      else
-        return words.slice!(0..words.length)
-      end
-    end
-
-    # This method encloses words within a specified tag (ins or del), and adds this into @content, 
-    # with a twist: if there are words contain tags, it actually creates multiple ins or del, 
-    # so that they don't include any ins or del. This handles cases like
-    # old: '<p>a</p>'
-    # new: '<p>ab</p><p>c</b>'
-    # diff result: '<p>a<ins>b</ins></p><p><ins>c</ins></p>'
-    # this still doesn't guarantee valid HTML (hint: think about diffing a text containing ins or
-    # del tags), but handles correctly more cases than the earlier version.
-    # 
-    # P.S.: Spare a thought for people who write HTML browsers. They live in this ... every day.
-
-    def insert_tag(tagname, cssclass, words)
-      loop do
-        break if words.empty?
-        non_tags = extract_consecutive_words(words) { |word| not tag?(word) }
-        @content << wrap_text(non_tags.join, tagname, cssclass) unless non_tags.empty?
-
-        break if words.empty?
-        @content += extract_consecutive_words(words) { |word| tag?(word) }
-      end
-    end
-
-    def wrap_text(text, tagname, cssclass)
-      %(<#{tagname} class="#{cssclass}">#{text}</#{tagname}>)
-    end
-
-    def explode(sequence)
-      sequence.is_a?(String) ? sequence.split(//) : sequence
-    end
-  
-    def end_of_tag?(char)
-      char == '>'
-    end
-  
-    def start_of_tag?(char)
-      char == '<'
-    end
-    
-    def whitespace?(char)
-      char =~ /\s/
-    end
-  
-    def convert_html_to_list_of_words(x, use_brackets = false)
-      mode = :char
-      current_word  = ''
-      words = []
-      
-      explode(x).each do |char|
-        case mode
-        when :tag
-          if end_of_tag? char
-            current_word << (use_brackets ? ']' : '>')
-            words << current_word
-            current_word = ''
-            if whitespace?(char) 
-              mode = :whitespace 
-            else
-              mode = :char
-            end
-          else
-            current_word << char
-          end
-        when :char
-          if start_of_tag? char
-            words << current_word unless current_word.empty?
-            current_word = (use_brackets ? '[' : '<')
-            mode = :tag
-          elsif /\s/.match char
-            words << current_word unless current_word.empty?
-            current_word = char
-            mode = :whitespace
-          else
-            current_word << char
-          end
-        when :whitespace
-          if start_of_tag? char
-            words << current_word unless current_word.empty?
-            current_word = (use_brackets ? '[' : '<')
-            mode = :tag
-          elsif /\s/.match char
-            current_word << char
-          else
-            words << current_word unless current_word.empty?
-            current_word = char
-            mode = :char
-          end
-        else 
-          raise "Unknown mode #{mode.inspect}"
-        end
-      end
-      words << current_word unless current_word.empty?
-      words
-    end
-
-  end # of class Diff Builder
-  
-  def diff(a, b)
-    DiffBuilder.new(a, b).build
-  end
-
-end
--- a/lib/diff/lcs.rb
+++ b/lib/diff/lcs.rb
--- a/lib/diff/lcs/array.rb
+++ b/lib/diff/lcs/array.rb
@ -0,0 +1,21 @@
+#! /usr/env/bin ruby
+#--
+# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
+#   adapted from:
+#     Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
+#     Smalltalk by Mario I. Wolczko <mario@wolczko.com>
+#   implements McIlroy-Hunt diff algorithm
+#
+# This program is free software. It may be redistributed and/or modified under
+# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
+# Ruby licence.
+# 
+# $Id: array.rb,v 1.3 2004/08/08 20:33:09 austin Exp $
+#++
+# Includes Diff::LCS into the Array built-in class.
+
+require 'diff/lcs'
+
+class Array
+  include Diff::LCS
+end
--- a/lib/diff/lcs/block.rb
+++ b/lib/diff/lcs/block.rb
@ -0,0 +1,51 @@
+#! /usr/env/bin ruby
+#--
+# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
+#   adapted from:
+#     Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
+#     Smalltalk by Mario I. Wolczko <mario@wolczko.com>
+#   implements McIlroy-Hunt diff algorithm
+#
+# This program is free software. It may be redistributed and/or modified under
+# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
+# Ruby licence.
+# 
+# $Id: block.rb,v 1.3 2004/08/08 20:33:09 austin Exp $
+#++
+# Contains Diff::LCS::Block for bin/ldiff.
+
+  # A block is an operation removing, adding, or changing a group of items.
+  # Basically, this is just a list of changes, where each change adds or
+  # deletes a single item. Used by bin/ldiff.
+class Diff::LCS::Block
+  attr_reader :changes, :insert, :remove
+
+  def initialize(chunk)
+    @changes = []
+    @insert = []
+    @remove = []
+
+    chunk.each do |item|
+      @changes << item
+      @remove << item if item.deleting?
+      @insert << item if item.adding?
+    end
+  end
+
+  def diff_size
+    @insert.size - @remove.size
+  end
+
+  def op
+    case [@remove.empty?, @insert.empty?]
+    when [false, false]
+      '!'
+    when [false, true]
+      '-'
+    when [true, false]
+      '+'
+    else # [true, true]
+      '^'
+    end
+  end
+end
--- a/lib/diff/lcs/callbacks.rb
+++ b/lib/diff/lcs/callbacks.rb
@ -0,0 +1,322 @@
+#! /usr/env/bin ruby
+#--
+# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
+#   adapted from:
+#     Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
+#     Smalltalk by Mario I. Wolczko <mario@wolczko.com>
+#   implements McIlroy-Hunt diff algorithm
+#
+# This program is free software. It may be redistributed and/or modified under
+# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
+# Ruby licence.
+# 
+# $Id: callbacks.rb,v 1.4 2004/09/14 18:51:26 austin Exp $
+#++
+# Contains definitions for all default callback objects.
+
+require 'diff/lcs/change'
+
+module Diff::LCS
+    # This callback object implements the default set of callback events, which
+    # only returns the event itself. Note that #finished_a and #finished_b are
+    # not implemented -- I haven't yet figured out where they would be useful.
+    #
+    # Note that this is intended to be called as is, e.g.,
+    #
+    #     Diff::LCS.LCS(seq1, seq2, Diff::LCS::DefaultCallbacks)
+  class DefaultCallbacks
+    class << self
+        # Called when two items match.
+      def match(event)
+        event
+      end
+        # Called when the old value is discarded in favour of the new value.
+      def discard_a(event)
+        event
+      end
+        # Called when the new value is discarded in favour of the old value.
+      def discard_b(event)
+        event
+      end
+        # Called when both the old and new values have changed.
+      def change(event)
+        event
+      end
+
+      private :new
+    end
+  end
+
+    # An alias for DefaultCallbacks that is used in Diff::LCS#traverse_sequences.
+    #
+    #     Diff::LCS.LCS(seq1, seq2, Diff::LCS::SequenceCallbacks)
+  SequenceCallbacks = DefaultCallbacks
+    # An alias for DefaultCallbacks that is used in Diff::LCS#traverse_balanced.
+    #
+    #     Diff::LCS.LCS(seq1, seq2, Diff::LCS::BalancedCallbacks)
+  BalancedCallbacks = DefaultCallbacks
+end
+
+  # This will produce a compound array of simple diff change objects. Each
+  # element in the #diffs array is a +hunk+ or +hunk+ array, where each
+  # element in each +hunk+ array is a single Change object representing the
+  # addition or removal of a single element from one of the two tested
+  # sequences. The +hunk+ provides the full context for the changes.
+  #
+  #     diffs = Diff::LCS.diff(seq1, seq2)
+  #       # This example shows a simplified array format.
+  #       # [ [ [ '-',  0, 'a' ] ],   # 1
+  #       #   [ [ '+',  2, 'd' ] ],   # 2
+  #       #   [ [ '-',  4, 'h' ],     # 3
+  #       #     [ '+',  4, 'f' ] ],
+  #       #   [ [ '+',  6, 'k' ] ],   # 4
+  #       #   [ [ '-',  8, 'n' ],     # 5
+  #       #     [ '-',  9, 'p' ],
+  #       #     [ '+',  9, 'r' ],
+  #       #     [ '+', 10, 's' ],
+  #       #     [ '+', 11, 't' ] ] ]
+  #
+  # There are five hunks here. The first hunk says that the +a+ at position 0
+  # of the first sequence should be deleted (<tt>'-'</tt>). The second hunk
+  # says that the +d+ at position 2 of the second sequence should be inserted
+  # (<tt>'+'</tt>). The third hunk says that the +h+ at position 4 of the
+  # first sequence should be removed and replaced with the +f+ from position 4
+  # of the second sequence. The other two hunks are described similarly.
+  #
+  # === Use
+  # This callback object must be initialised and is used by the Diff::LCS#diff
+  # method.
+  #
+  #     cbo = Diff::LCS::DiffCallbacks.new
+  #     Diff::LCS.LCS(seq1, seq2, cbo)
+  #     cbo.finish
+  #
+  # Note that the call to #finish is absolutely necessary, or the last set of
+  # changes will not be visible. Alternatively, can be used as:
+  #
+  #     cbo = Diff::LCS::DiffCallbacks.new { |tcbo| Diff::LCS.LCS(seq1, seq2, tcbo) }
+  #
+  # The necessary #finish call will be made.
+  #
+  # === Simplified Array Format
+  # The simplified array format used in the example above can be obtained
+  # with:
+  #
+  #     require 'pp'
+  #     pp diffs.map { |e| e.map { |f| f.to_a } }
+class Diff::LCS::DiffCallbacks
+    # Returns the difference set collected during the diff process.
+  attr_reader :diffs
+
+  def initialize # :yields self:
+    @hunk = []
+    @diffs = []
+
+    if block_given?
+      begin
+        yield self
+      ensure
+        self.finish
+      end
+    end
+  end
+
+    # Finalizes the diff process. If an unprocessed hunk still exists, then it
+    # is appended to the diff list.
+  def finish
+    add_nonempty_hunk
+  end
+
+  def match(event)
+    add_nonempty_hunk
+  end
+
+  def discard_a(event)
+    @hunk << Diff::LCS::Change.new('-', event.old_position, event.old_element)
+  end
+
+  def discard_b(event)
+    @hunk << Diff::LCS::Change.new('+', event.new_position, event.new_element)
+  end
+
+private
+  def add_nonempty_hunk
+    @diffs << @hunk unless @hunk.empty?
+    @hunk = []
+  end
+end
+
+  # This will produce a compound array of contextual diff change objects. Each
+  # element in the #diffs array is a "hunk" array, where each element in each
+  # "hunk" array is a single change. Each change is a Diff::LCS::ContextChange
+  # that contains both the old index and new index values for the change. The
+  # "hunk" provides the full context for the changes. Both old and new objects
+  # will be presented for changed objects. +nil+ will be substituted for a
+  # discarded object.
+  #
+  #     seq1 = %w(a b c e h j l m n p)
+  #     seq2 = %w(b c d e f j k l m r s t)
+  #
+  #     diffs = Diff::LCS.diff(seq1, seq2, Diff::LCS::ContextDiffCallbacks)
+  #       # This example shows a simplified array format.
+  #       # [ [ [ '-', [  0, 'a' ], [  0, nil ] ] ],   # 1
+  #       #   [ [ '+', [  3, nil ], [  2, 'd' ] ] ],   # 2
+  #       #   [ [ '-', [  4, 'h' ], [  4, nil ] ],     # 3
+  #       #     [ '+', [  5, nil ], [  4, 'f' ] ] ],
+  #       #   [ [ '+', [  6, nil ], [  6, 'k' ] ] ],   # 4
+  #       #   [ [ '-', [  8, 'n' ], [  9, nil ] ],     # 5
+  #       #     [ '+', [  9, nil ], [  9, 'r' ] ],
+  #       #     [ '-', [  9, 'p' ], [ 10, nil ] ],
+  #       #     [ '+', [ 10, nil ], [ 10, 's' ] ],
+  #       #     [ '+', [ 10, nil ], [ 11, 't' ] ] ] ]
+  #
+  # The five hunks shown are comprised of individual changes; if there is a
+  # related set of changes, they are still shown individually.
+  #
+  # This callback can also be used with Diff::LCS#sdiff, which will produce
+  # results like:
+  #
+  #     diffs = Diff::LCS.sdiff(seq1, seq2, Diff::LCS::ContextCallbacks)
+  #       # This example shows a simplified array format.
+  #       # [ [ [ "-", [  0, "a" ], [  0, nil ] ] ],  # 1
+  #       #   [ [ "+", [  3, nil ], [  2, "d" ] ] ],  # 2
+  #       #   [ [ "!", [  4, "h" ], [  4, "f" ] ] ],  # 3
+  #       #   [ [ "+", [  6, nil ], [  6, "k" ] ] ],  # 4
+  #       #   [ [ "!", [  8, "n" ], [  9, "r" ] ],    # 5
+  #       #     [ "!", [  9, "p" ], [ 10, "s" ] ],
+  #       #     [ "+", [ 10, nil ], [ 11, "t" ] ] ] ]
+  #
+  # The five hunks are still present, but are significantly shorter in total
+  # presentation, because changed items are shown as changes ("!") instead of
+  # potentially "mismatched" pairs of additions and deletions.
+  #
+  # The result of this operation is similar to that of
+  # Diff::LCS::SDiffCallbacks. They may be compared as:
+  #
+  #     s = Diff::LCS.sdiff(seq1, seq2).reject { |e| e.action == "=" }
+  #     c = Diff::LCS.sdiff(seq1, seq2, Diff::LCS::ContextDiffCallbacks).flatten
+  #
+  #     s == c # -> true
+  #
+  # === Use
+  # This callback object must be initialised and can be used by the
+  # Diff::LCS#diff or Diff::LCS#sdiff methods.
+  #
+  #     cbo = Diff::LCS::ContextDiffCallbacks.new
+  #     Diff::LCS.LCS(seq1, seq2, cbo)
+  #     cbo.finish
+  #
+  # Note that the call to #finish is absolutely necessary, or the last set of
+  # changes will not be visible. Alternatively, can be used as:
+  #
+  #     cbo = Diff::LCS::ContextDiffCallbacks.new { |tcbo| Diff::LCS.LCS(seq1, seq2, tcbo) }
+  #
+  # The necessary #finish call will be made.
+  #
+  # === Simplified Array Format
+  # The simplified array format used in the example above can be obtained
+  # with:
+  #
+  #     require 'pp'
+  #     pp diffs.map { |e| e.map { |f| f.to_a } }
+class Diff::LCS::ContextDiffCallbacks < Diff::LCS::DiffCallbacks
+  def discard_a(event)
+    @hunk << Diff::LCS::ContextChange.simplify(event)
+  end
+
+  def discard_b(event)
+    @hunk << Diff::LCS::ContextChange.simplify(event)
+  end
+
+  def change(event)
+    @hunk << Diff::LCS::ContextChange.simplify(event)
+  end
+end
+
+  # This will produce a simple array of diff change objects. Each element in
+  # the #diffs array is a single ContextChange. In the set of #diffs provided
+  # by SDiffCallbacks, both old and new objects will be presented for both
+  # changed <strong>and unchanged</strong> objects. +nil+ will be substituted
+  # for a discarded object.
+  #
+  # The diffset produced by this callback, when provided to Diff::LCS#sdiff,
+  # will compute and display the necessary components to show two sequences
+  # and their minimized differences side by side, just like the Unix utility
+  # +sdiff+.
+  # 
+  #     same             same
+  #     before     |     after
+  #     old        <     -
+  #     -          >     new
+  #
+  #     seq1 = %w(a b c e h j l m n p)
+  #     seq2 = %w(b c d e f j k l m r s t)
+  #
+  #     diffs = Diff::LCS.sdiff(seq1, seq2)
+  #       # This example shows a simplified array format.
+  #       # [ [ "-", [  0, "a"], [  0, nil ] ],
+  #       #   [ "=", [  1, "b"], [  0, "b" ] ],
+  #       #   [ "=", [  2, "c"], [  1, "c" ] ],
+  #       #   [ "+", [  3, nil], [  2, "d" ] ],
+  #       #   [ "=", [  3, "e"], [  3, "e" ] ],
+  #       #   [ "!", [  4, "h"], [  4, "f" ] ],
+  #       #   [ "=", [  5, "j"], [  5, "j" ] ],
+  #       #   [ "+", [  6, nil], [  6, "k" ] ],
+  #       #   [ "=", [  6, "l"], [  7, "l" ] ],
+  #       #   [ "=", [  7, "m"], [  8, "m" ] ],
+  #       #   [ "!", [  8, "n"], [  9, "r" ] ],
+  #       #   [ "!", [  9, "p"], [ 10, "s" ] ],
+  #       #   [ "+", [ 10, nil], [ 11, "t" ] ] ]
+  #
+  # The result of this operation is similar to that of
+  # Diff::LCS::ContextDiffCallbacks. They may be compared as:
+  #
+  #     s = Diff::LCS.sdiff(seq1, seq2).reject { |e| e.action == "=" }
+  #     c = Diff::LCS.sdiff(seq1, seq2, Diff::LCS::ContextDiffCallbacks).flatten
+  #
+  #     s == c # -> true
+  #
+  # === Use
+  # This callback object must be initialised and is used by the Diff::LCS#sdiff
+  # method.
+  #
+  #     cbo = Diff::LCS::SDiffCallbacks.new
+  #     Diff::LCS.LCS(seq1, seq2, cbo)
+  #
+  # As with the other initialisable callback objects, Diff::LCS::SDiffCallbacks
+  # can be initialised with a block. As there is no "fininishing" to be done,
+  # this has no effect on the state of the object.
+  #
+  #     cbo = Diff::LCS::SDiffCallbacks.new { |tcbo| Diff::LCS.LCS(seq1, seq2, tcbo) }
+  #
+  # === Simplified Array Format
+  # The simplified array format used in the example above can be obtained
+  # with:
+  #
+  #     require 'pp'
+  #     pp diffs.map { |e| e.to_a }
+class Diff::LCS::SDiffCallbacks
+    # Returns the difference set collected during the diff process.
+  attr_reader :diffs
+
+  def initialize #:yields self:
+    @diffs = []
+    yield self if block_given?
+  end
+
+  def match(event)
+    @diffs << Diff::LCS::ContextChange.simplify(event)
+  end
+
+  def discard_a(event)
+    @diffs << Diff::LCS::ContextChange.simplify(event)
+  end
+
+  def discard_b(event)
+    @diffs << Diff::LCS::ContextChange.simplify(event)
+  end
+
+  def change(event)
+    @diffs << Diff::LCS::ContextChange.simplify(event)
+  end
+end
--- a/lib/diff/lcs/change.rb
+++ b/lib/diff/lcs/change.rb
@ -0,0 +1,169 @@
+#! /usr/env/bin ruby
+#--
+# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
+#   adapted from:
+#     Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
+#     Smalltalk by Mario I. Wolczko <mario@wolczko.com>
+#   implements McIlroy-Hunt diff algorithm
+#
+# This program is free software. It may be redistributed and/or modified under
+# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
+# Ruby licence.
+# 
+# $Id: change.rb,v 1.4 2004/08/08 20:33:09 austin Exp $
+#++
+# Provides Diff::LCS::Change and Diff::LCS::ContextChange.
+
+  # Centralises the change test code in Diff::LCS::Change and
+  # Diff::LCS::ContextChange, since it's the same for both classes.
+module Diff::LCS::ChangeTypeTests
+  def deleting?
+    @action == '-'
+  end
+
+  def adding?
+    @action == '+'
+  end
+
+  def unchanged?
+    @action == '='
+  end
+
+  def changed?
+    @changed == '!'
+  end
+
+  def finished_a?
+    @changed == '>'
+  end
+
+  def finished_b?
+    @changed == '<'
+  end
+end
+
+  # Represents a simplistic (non-contextual) change. Represents the removal or
+  # addition of an element from either the old or the new sequenced enumerable.
+class Diff::LCS::Change
+    # Returns the action this Change represents. Can be '+' (#adding?), '-'
+    # (#deleting?), '=' (#unchanged?), # or '!' (#changed?). When created by
+    # Diff::LCS#diff or Diff::LCS#sdiff, it may also be '>' (#finished_a?) or
+    # '<' (#finished_b?).
+  attr_reader :action
+  attr_reader :position
+  attr_reader :element
+
+  include Comparable
+  def ==(other)
+    (self.action == other.action) and
+    (self.position == other.position) and
+    (self.element == other.element)
+  end
+
+  def <=>(other)
+    r = self.action <=> other.action
+    r = self.position <=> other.position if r.zero?
+    r = self.element <=> other.element if r.zero?
+    r
+  end
+
+  def initialize(action, position, element)
+    @action = action
+    @position = position
+    @element = element
+  end
+
+    # Creates a Change from an array produced by Change#to_a.
+  def to_a
+    [@action, @position, @element]
+  end
+
+  def self.from_a(arr)
+    Diff::LCS::Change.new(arr[0], arr[1], arr[2])
+  end
+
+  include Diff::LCS::ChangeTypeTests
+end
+
+  # Represents a contextual change. Contains the position and values of the
+  # elements in the old and the new sequenced enumerables as well as the action
+  # taken.
+class Diff::LCS::ContextChange
+    # Returns the action this Change represents. Can be '+' (#adding?), '-'
+    # (#deleting?), '=' (#unchanged?), # or '!' (#changed?). When
+    # created by Diff::LCS#diff or Diff::LCS#sdiff, it may also be '>'
+    # (#finished_a?) or '<' (#finished_b?).
+  attr_reader :action
+  attr_reader :old_position
+  attr_reader :old_element
+  attr_reader :new_position
+  attr_reader :new_element
+
+  include Comparable
+
+  def ==(other)
+    (@action == other.action) and
+    (@old_position == other.old_position) and
+    (@new_position == other.new_position) and
+    (@old_element == other.old_element) and
+    (@new_element == other.new_element)
+  end
+
+  def inspect(*args)
+    %Q(#<#{self.class.name}:#{__id__} @action=#{action} positions=#{old_position},#{new_position} elements=#{old_element.inspect},#{new_element.inspect}>)
+  end
+
+  def <=>(other)
+    r = @action <=> other.action
+    r = @old_position <=> other.old_position if r.zero?
+    r = @new_position <=> other.new_position if r.zero?
+    r = @old_element <=> other.old_element if r.zero?
+    r = @new_element <=> other.new_element if r.zero?
+    r
+  end
+
+  def initialize(action, old_position, old_element, new_position, new_element)
+    @action = action
+    @old_position = old_position
+    @old_element = old_element
+    @new_position = new_position
+    @new_element = new_element
+  end
+
+  def to_a
+    [@action, [@old_position, @old_element], [@new_position, @new_element]]
+  end
+
+    # Creates a ContextChange from an array produced by ContextChange#to_a.
+  def self.from_a(arr)
+    if arr.size == 5
+      Diff::LCS::ContextChange.new(arr[0], arr[1], arr[2], arr[3], arr[4])
+    else
+      Diff::LCS::ContextChange.new(arr[0], arr[1][0], arr[1][1], arr[2][0],
+                                   arr[2][1])
+    end
+  end
+
+    # Simplifies a context change for use in some diff callbacks. '<' actions
+    # are converted to '-' and '>' actions are converted to '+'. 
+  def self.simplify(event)
+    ea = event.to_a
+
+    case ea[0]
+    when '-'
+      ea[2][1] = nil
+    when '<'
+      ea[0] = '-'
+      ea[2][1] = nil
+    when '+'
+      ea[1][1] = nil
+    when '>'
+      ea[0] = '+'
+      ea[1][1] = nil
+    end
+
+    Diff::LCS::ContextChange.from_a(ea)
+  end
+
+  include Diff::LCS::ChangeTypeTests
+end
--- a/lib/diff/lcs/hunk.rb
+++ b/lib/diff/lcs/hunk.rb
@ -0,0 +1,257 @@
+#! /usr/env/bin ruby
+#--
+# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
+#   adapted from:
+#     Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
+#     Smalltalk by Mario I. Wolczko <mario@wolczko.com>
+#   implements McIlroy-Hunt diff algorithm
+#
+# This program is free software. It may be redistributed and/or modified under
+# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
+# Ruby licence.
+# 
+# $Id: hunk.rb,v 1.2 2004/08/08 20:33:09 austin Exp $
+#++
+# Contains Diff::LCS::Hunk for bin/ldiff.
+
+require 'diff/lcs/block'
+
+  # A Hunk is a group of Blocks which overlap because of the context
+  # surrounding each block. (So if we're not using context, every hunk will
+  # contain one block.) Used in the diff program (bin/diff).
+class Diff::LCS::Hunk
+    # Create a hunk using references to both the old and new data, as well as
+    # the piece of data
+  def initialize(data_old, data_new, piece, context, file_length_difference)
+      # At first, a hunk will have just one Block in it
+    @blocks = [ Diff::LCS::Block.new(piece) ]
+    @data_old = data_old
+    @data_new = data_new
+
+    before = after = file_length_difference
+    after += @blocks[0].diff_size
+    @file_length_difference = after # The caller must get this manually
+
+      # Save the start & end of each array. If the array doesn't exist
+      # (e.g., we're only adding items in this block), then figure out the
+      # line number based on the line number of the other file and the
+      # current difference in file lengths.
+    if @blocks[0].remove.empty?
+      a1 = a2 = nil
+    else
+      a1 = @blocks[0].remove[0].position
+      a2 = @blocks[0].remove[-1].position
+    end
+
+    if @blocks[0].insert.empty?
+      b1 = b2 = nil
+    else
+      b1 = @blocks[0].insert[0].position
+      b2 = @blocks[0].insert[-1].position
+    end
+
+    @start_old = a1 || (b1 - before)
+    @start_new = b1 || (a1 + before)
+    @end_old   = a2 || (b2 - after)
+    @end_new   = b2 || (a2 + after)
+
+    self.flag_context = context
+  end
+
+  attr_reader :blocks
+  attr_reader :start_old, :start_new
+  attr_reader :end_old, :end_new
+  attr_reader :file_length_difference
+
+    # Change the "start" and "end" fields to note that context should be added
+    # to this hunk
+  attr_accessor :flag_context
+  def flag_context=(context) #:nodoc:
+    return if context.nil? or context.zero?
+
+    add_start = (context > @start_old) ? @start_old : context
+    @start_old -= add_start
+    @start_new -= add_start
+
+    if (@end_old + context) > @data_old.size
+      add_end = @data_old.size - @end_old
+    else
+      add_end = context
+    end
+    @end_old += add_end
+    @end_new += add_end
+  end
+
+  def unshift(hunk)
+    @start_old = hunk.start_old
+    @start_new = hunk.start_new
+    blocks.unshift(*hunk.blocks)
+  end
+
+    # Is there an overlap between hunk arg0 and old hunk arg1? Note: if end
+    # of old hunk is one less than beginning of second, they overlap
+  def overlaps?(hunk = nil)
+    return nil if hunk.nil?
+
+    a = (@start_old - hunk.end_old) <= 1
+    b = (@start_new - hunk.end_new) <= 1
+    return (a or b)
+  end
+
+  def diff(format)
+    case format
+    when :old
+      old_diff
+    when :unified
+      unified_diff
+    when :context
+      context_diff
+    when :ed
+      self
+    when :reverse_ed, :ed_finish
+      ed_diff(format)
+    else
+      raise "Unknown diff format #{format}."
+    end
+  end
+
+  def each_old(block)
+    @data_old[@start_old .. @end_old].each { |e| yield e }
+  end
+
+  private
+    # Note that an old diff can't have any context. Therefore, we know that
+    # there's only one block in the hunk.
+  def old_diff
+    warn "Expecting only one block in an old diff hunk!" if @blocks.size > 1
+    op_act = { "+" => 'a', "-" => 'd', "!" => "c" }
+
+    block = @blocks[0]
+
+      # Calculate item number range. Old diff range is just like a context
+      # diff range, except the ranges are on one line with the action between
+      # them.
+    s = "#{context_range(:old)}#{op_act[block.op]}#{context_range(:new)}\n"
+      # If removing anything, just print out all the remove lines in the hunk
+      # which is just all the remove lines in the block.
+    @data_old[@start_old .. @end_old].each { |e| s << "< #{e}\n" } unless block.remove.empty?
+    s << "---\n" if block.op == "!"
+    @data_new[@start_new .. @end_new].each { |e| s << "> #{e}\n" } unless block.insert.empty?
+    s
+  end
+
+  def unified_diff
+      # Calculate item number range.
+    s = "@@ -#{unified_range(:old)} +#{unified_range(:new)} @@\n"
+
+      # Outlist starts containing the hunk of the old file. Removing an item
+      # just means putting a '-' in front of it. Inserting an item requires
+      # getting it from the new file and splicing it in. We splice in
+      # +num_added+ items. Remove blocks use +num_added+ because splicing
+      # changed the length of outlist.
+      #
+      # We remove +num_removed+ items. Insert blocks use +num_removed+
+      # because their item numbers -- corresponding to positions in the NEW
+      # file -- don't take removed items into account.
+    lo, hi, num_added, num_removed = @start_old, @end_old, 0, 0
+
+    outlist = @data_old[lo .. hi].collect { |e| e.gsub(/^/, ' ') }
+
+    @blocks.each do |block|
+      block.remove.each do |item|
+        op = item.action.to_s # -
+        offset = item.position - lo + num_added
+        outlist[offset].gsub!(/^ /, op.to_s)
+        num_removed += 1
+      end
+      block.insert.each do |item|
+        op = item.action.to_s # +
+        offset = item.position - @start_new + num_removed
+        outlist[offset, 0] = "#{op}#{@data_new[item.position]}"
+        num_added += 1
+      end
+    end
+
+    s << outlist.join("\n")
+  end
+
+  def context_diff
+    s = "***************\n"
+    s << "*** #{context_range(:old)} ****\n"
+    r = context_range(:new)
+
+      # Print out file 1 part for each block in context diff format if there
+      # are any blocks that remove items
+    lo, hi = @start_old, @end_old
+    removes = @blocks.select { |e| not e.remove.empty? }
+    if removes
+      outlist = @data_old[lo .. hi].collect { |e| e.gsub(/^/, '  ') }
+      removes.each do |block|
+        block.remove.each do |item|
+          outlist[item.position - lo].gsub!(/^ /) { block.op } # - or !
+        end
+      end
+      s << outlist.join("\n")
+    end
+
+    s << "\n--- #{r} ----\n"
+    lo, hi = @start_new, @end_new
+    inserts = @blocks.select { |e| not e.insert.empty? }
+    if inserts
+      outlist = @data_new[lo .. hi].collect { |e| e.gsub(/^/, '  ') }
+      inserts.each do |block|
+        block.insert.each do |item|
+          outlist[item.position - lo].gsub!(/^ /) { block.op } # + or !
+        end
+      end
+      s << outlist.join("\n")
+    end
+    s
+  end
+
+  def ed_diff(format)
+    op_act = { "+" => 'a', "-" => 'd', "!" => "c" }
+    warn "Expecting only one block in an old diff hunk!" if @blocks.size > 1
+
+    if format == :reverse_ed
+      s = "#{op_act[@blocks[0].op]}#{context_range(:old)}\n"
+    else
+      s = "#{context_range(:old).gsub(/,/, ' ')}#{op_act[@blocks[0].op]}\n"
+    end
+
+    unless @blocks[0].insert.empty?
+      @data_new[@start_new .. @end_new].each { |e| s << "#{e}\n" }
+      s << ".\n"
+    end
+    s
+  end
+
+    # Generate a range of item numbers to print. Only print 1 number if the
+    # range has only one item in it. Otherwise, it's 'start,end'
+  def context_range(mode)
+    case mode
+    when :old
+      s, e = (@start_old + 1), (@end_old + 1)
+    when :new
+      s, e = (@start_new + 1), (@end_new + 1)
+    end
+
+    (s < e) ? "#{s},#{e}" : "#{e}"
+  end
+
+    # Generate a range of item numbers to print for unified diff. Print
+    # number where block starts, followed by number of lines in the block
+    # (don't print number of lines if it's 1)
+  def unified_range(mode)
+    case mode
+    when :old
+      s, e = (@start_old + 1), (@end_old + 1)
+    when :new
+      s, e = (@start_new + 1), (@end_new + 1)
+    end
+
+    length = e - s + 1
+    first = (length < 2) ? e : s # "strange, but correct"
+    (length == 1) ? "#{first}" : "#{first},#{length}"
+  end
+end
--- a/lib/diff/lcs/ldiff.rb
+++ b/lib/diff/lcs/ldiff.rb
@ -0,0 +1,226 @@
+#!/usr/bin/env ruby
+
+require 'optparse'
+require 'ostruct'
+require 'diff/lcs/hunk'
+
+  # == ldiff Usage
+  #   ldiff [options] oldfile newfile
+  #
+  # -c::                            Displays a context diff with 3 lines of context.
+  # -C [LINES], --context [LINES]:: Displays a context diff with LINES lines of context. Default 3 lines.
+  # -u::                            Displays a unified diff with 3 lines of context.
+  # -U [LINES], --unified [LINES]:: Displays a unified diff with LINES lines of context. Default 3 lines.
+  # -e::                            Creates an 'ed' script to change oldfile to newfile.
+  # -f::                            Creates an 'ed' script to change oldfile to newfile in reverse order.
+  # -a, --text::                    Treats the files as text and compares them line-by-line, even if they do not seem to be text.
+  # --binary::                      Treats the files as binary.
+  # -q, --brief::                   Reports only whether or not the files differ, not the details.
+  # --help::                        Shows the command-line help.
+  # --version::                     Shows the version of Diff::LCS.
+  #
+  # By default, runs produces an "old-style" diff, with output like UNIX diff.
+  #
+  # == Copyright
+  # Copyright &copy; 2004 Austin Ziegler
+  #
+  #   Part of Diff::LCS <http://rubyforge.org/projects/ruwiki/>
+  #   Austin Ziegler <diff-lcs@halostatue.ca>
+  #
+  # This program is free software. It may be redistributed and/or modified under
+  # the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
+  # Ruby licence.
+module Diff::LCS::Ldiff
+  BANNER = <<-COPYRIGHT
+ldiff #{Diff::LCS::VERSION}
+  Copyright © 2004 Austin Ziegler
+
+  Part of Diff::LCS.
+  http://rubyforge.org/projects/ruwiki/
+
+  Austin Ziegler <diff-lcs@halostatue.ca>
+
+  This program is free software. It may be redistributed and/or modified under
+  the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
+  Ruby licence.
+
+$Id: ldiff.rb,v 1.1 2004/09/26 01:37:49 austin Exp $
+              COPYRIGHT
+  
+  class << self
+    attr_reader   :format, :lines       #:nodoc:
+    attr_reader   :file_old, :file_new  #:nodoc:
+    attr_reader   :data_old, :data_new  #:nodoc:
+
+    def run(args, input = $stdin, output = $stdout, error = $stderr) #:nodoc:
+      args.options do |o|
+        o.banner = "Usage: #{File.basename($0)} [options] oldfile newfile"
+        o.separator ""
+        o.on('-c',
+            'Displays a context diff with 3 lines of',
+            'context.') do |ctx|
+          @format = :context
+          @lines  = 3
+        end
+        o.on('-C', '--context [LINES]', Numeric,
+            'Displays a context diff with LINES lines',
+            'of context. Default 3 lines.') do |ctx|
+          @format = :context
+          @lines  = ctx || 3
+        end
+        o.on('-u',
+            'Displays a unified diff with 3 lines of',
+            'context.') do |ctx|
+          @format = :unified
+          @lines  = 3
+        end
+        o.on('-U', '--unified [LINES]', Numeric,
+            'Displays a unified diff with LINES lines',
+            'of context. Default 3 lines.') do |ctx|
+          @format = :unified
+          @lines  = ctx || 3
+        end
+        o.on('-e',
+            'Creates an \'ed\' script to change',
+            'oldfile to newfile.') do |ctx|
+          @format = :ed
+        end
+        o.on('-f',
+            'Creates an \'ed\' script to change',
+            'oldfile to newfile in reverse order.') do |ctx|
+          @format = :reverse_ed
+        end
+        o.on('-a', '--text',
+             'Treat the files as text and compare them',
+             'line-by-line, even if they do not seem',
+             'to be text.') do |txt|
+          @binary = false
+        end
+        o.on('--binary',
+             'Treats the files as binary.') do |bin|
+          @binary = true
+        end
+        o.on('-q', '--brief',
+             'Report only whether or not the files',
+             'differ, not the details.') do |ctx|
+          @format = :report
+        end
+        o.on_tail('--help', 'Shows this text.') do
+          error << o
+          return 0
+        end
+        o.on_tail('--version', 'Shows the version of Diff::LCS.') do
+          error << BANNER
+          return 0
+        end
+        o.on_tail ""
+        o.on_tail 'By default, runs produces an "old-style" diff, with output like UNIX diff.'
+        o.parse!
+      end
+
+      unless args.size == 2
+        error << args.options
+        return 127
+      end
+
+        # Defaults are for old-style diff
+      @format ||= :old
+      @lines  ||= 0
+
+      file_old, file_new = *ARGV
+
+      case @format
+      when :context
+        char_old = '*' * 3
+        char_new = '-' * 3
+      when :unified
+        char_old = '-' * 3
+        char_new = '+' * 3
+      end
+
+        # After we've read up to a certain point in each file, the number of
+        # items we've read from each file will differ by FLD (could be 0).
+      file_length_difference = 0
+
+      if @binary.nil? or @binary
+        data_old = IO::read(file_old)
+        data_new = IO::read(file_new)
+
+          # Test binary status
+        if @binary.nil?
+          old_txt = data_old[0...4096].grep(/\0/).empty?
+          new_txt = data_new[0...4096].grep(/\0/).empty?
+          @binary = (not old_txt) or (not new_txt)
+          old_txt = new_txt = nil
+        end
+
+        unless @binary
+          data_old = data_old.split(/\n/).map! { |e| e.chomp }
+          data_new = data_new.split(/\n/).map! { |e| e.chomp }
+        end
+      else
+        data_old = IO::readlines(file_old).map! { |e| e.chomp }
+        data_new = IO::readlines(file_new).map! { |e| e.chomp }
+      end
+
+        # diff yields lots of pieces, each of which is basically a Block object
+      if @binary
+        diffs = (data_old == data_new)
+      else
+        diffs = Diff::LCS.diff(data_old, data_new)
+        diffs = nil if diffs.empty?
+      end
+
+      return 0 unless diffs
+
+      if (@format == :report) and diffs
+        output << "Files #{file_old} and #{file_new} differ\n"
+        return 1
+      end
+
+      if (@format == :unified) or (@format == :context)
+        ft = File.stat(file_old).mtime.localtime.strftime('%Y-%m-%d %H:%M:%S %z')
+        puts "#{char_old} #{file_old}\t#{ft}"
+        ft = File.stat(file_new).mtime.localtime.strftime('%Y-%m-%d %H:%M:%S %z')
+        puts "#{char_new} #{file_new}\t#{ft}"
+      end
+
+        # Loop over hunks. If a hunk overlaps with the last hunk, join them.
+        # Otherwise, print out the old one.
+      oldhunk = hunk = nil
+
+      if @format == :ed
+        real_output = output
+        output = []
+      end
+
+      diffs.each do |piece|
+      begin
+        hunk = Diff::LCS::Hunk.new(data_old, data_new, piece, @lines,
+                                   file_length_difference)
+        file_length_difference = hunk.file_length_difference
+
+        next unless oldhunk
+
+        if (@lines > 0) and hunk.overlaps?(oldhunk)
+          hunk.unshift(oldhunk)
+        else
+          output << oldhunk.diff(@format)
+        end
+      ensure
+        oldhunk = hunk
+        output << "\n"
+      end
+      end
+
+      output << oldhunk.diff(@format)
+      output << "\n"
+
+      if @format == :ed
+        output.reverse_each { |e| real_output << e.diff(:ed_finish) }
+      end
+
+      return 1
+    end
+  end
+end
--- a/lib/diff/lcs/string.rb
+++ b/lib/diff/lcs/string.rb
@ -0,0 +1,19 @@
+#! /usr/env/bin ruby
+#--
+# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
+#   adapted from:
+#     Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
+#     Smalltalk by Mario I. Wolczko <mario@wolczko.com>
+#   implements McIlroy-Hunt diff algorithm
+#
+# This program is free software. It may be redistributed and/or modified under
+# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
+# Ruby licence.
+# 
+# $Id: string.rb,v 1.3 2004/08/08 20:33:09 austin Exp $
+#++
+# Includes Diff::LCS into String.
+
+class String
+  include Diff::LCS
+end
--- a/lib/page_renderer.rb
+++ b/lib/page_renderer.rb
@ -1,11 +1,9 @@
-require 'diff'
+require 'xhtmldiff'
 # Temporary class containing all rendering stuff from a Revision 
 # I want to shift all rendering loguc to the controller eventually

 class PageRenderer

-  include HTMLDiff
-
  def self.setup_url_generator(url_generator)
    @@url_generator = url_generator
  end
@ -41,8 +39,22 @@ class PageRenderer
  def display_diff
    previous_revision = @revision.page.previous_revision(@revision)
    if previous_revision
-      rendered_previous_revision = WikiContent.new(previous_revision, @@url_generator).render!
-      diff(rendered_previous_revision, display_content) 
+
+      previous_content = "<div>\n" + WikiContent.new(previous_revision, @@url_generator).render!.to_s + "\n</div>"
+      current_content = "<div>\n" + display_content.to_s  + "\n</div>"
+      diff_doc = REXML::Document.new
+      diff_doc << (div = REXML::Element.new 'div')
+      hd = XHTMLDiff.new(div)
+
+      parsed_previous_revision = REXML::HashableElementDelegator.new(
+           REXML::XPath.first(REXML::Document.new(previous_content), '/div'))
+      parsed_display_content = REXML::HashableElementDelegator.new(
+           REXML::XPath.first(REXML::Document.new(current_content), '/div'))
+      Diff::LCS.traverse_balanced(parsed_previous_revision, parsed_display_content, hd)
+
+      diffs = ''
+      diff_doc.write(diffs, -1, true, true)
+      diffs
    else
      display_content
    end
--- a/lib/xhtmldiff.rb
+++ b/lib/xhtmldiff.rb
@ -0,0 +1,179 @@
+#!/usr/bin/env ruby
+# Author: Aredridel <aredridel@nbtsc.org>
+# Website: http://theinternetco.net/projects/ruby/xhtmldiff.html
+# Licence: same as Ruby
+# Version: 1.22
+#
+# Tweaks by Jacques Distler <distler@golem.ph.utexas.edu>
+#  -- add classnames to <del> and <ins> elements added by XHTMLDiff,
+#     for better CSS styling
+
+require 'diff/lcs'
+require 'rexml/document'
+require 'delegate'
+
+def Math.max(a, b)
+	a > b ? a : b
+end
+
+module REXML
+
+	class Text 
+		def deep_clone
+			clone
+		end
+	end
+
+	class HashableElementDelegator < DelegateClass(Element)
+		def initialize(sub)
+			super sub
+		end
+		def == other
+			res = other.to_s.strip == self.to_s.strip
+			res
+		end
+
+		def eql? other
+			self == other
+		end
+
+		def[](k)
+			r = super
+			if r.kind_of? __getobj__.class
+				self.class.new(r)
+			else
+				r
+			end
+		end
+
+		def hash
+			r = __getobj__.to_s.hash
+			r
+		end
+	end
+
+end
+
+class XHTMLDiff
+	include REXML
+  attr_accessor :output
+
+	class << self
+		BLOCK_CONTAINERS = ['div', 'ul', 'li']
+		def diff(a, b)
+			if a == b
+				return a.deep_clone
+			end
+			if REXML::HashableElementDelegator === a and REXML::HashableElementDelegator === b
+				o = REXML::Element.new(a.name)
+				o.add_attributes  a.attributes
+				hd = self.new(o)
+				Diff::LCS.traverse_balanced(a, b, hd)
+				o
+			elsif REXML::Text === a and REXML::Text === b
+				o = REXML::Element.new('span')
+				aa = a.value.split(/\s/)
+				ba = b.value.split(/\s/)
+				hd = XHTMLTextDiff.new(o)
+				Diff::LCS.traverse_balanced(aa, ba, hd)
+				o
+			else
+				raise ArgumentError.new("both arguments must be equal or both be elements. a is #{a.class.name} and b is #{b.class.name}")
+			end
+		end
+	end
+
+	def diff(a, b)
+		self.class.diff(a,b)
+	end
+
+  def initialize(output)
+    @output = output
+  end
+
+    # This will be called with both elements are the same
+  def match(event)
+    @output << event.old_element.deep_clone if event.old_element
+  end
+
+  # This will be called when there is an element in A that isn't in B
+  def discard_a(event)
+		@output << wrap(event.old_element, 'del', 'diffdel') 
+  end
+  
+	def change(event)
+		begin
+			sd = diff(event.old_element, event.new_element)
+		rescue ArgumentError
+			sd = nil
+		end
+		if sd and (ratio = (Float(rs = sd.to_s.gsub(%r{<(ins|del)>.*</\1>}, '').size) / bs = Math.max(event.old_element.to_s.size, event.new_element.to_s.size))) > 0.5
+			@output << sd
+		else
+			@output << wrap(event.old_element, 'del', 'diffmod')
+			@output << wrap(event.new_element, 'ins', 'diffmod')
+		end
+  end
+
+  # This will be called when there is an element in B that isn't in A
+  def discard_b(event)
+		@output << wrap(event.new_element, 'ins', 'diffins')
+	end
+
+	def choose_event(event, element, tag)
+  end
+
+	def wrap(element, tag = nil, class_name = nil)
+		if tag 
+			el = Element.new tag
+			el << element.deep_clone
+		else
+			el = element.deep_clone
+		end
+                if class_name
+                   el.add_attribute('class', class_name)
+                end
+		el
+	end
+
+	class XHTMLTextDiff < XHTMLDiff
+		def change(event)
+			@output << wrap(event.old_element, 'del', 'diffmod')
+			@output << wrap(event.new_element, 'ins', 'diffmod')
+		end
+
+		# This will be called with both elements are the same
+		def match(event)
+			@output << wrap(event.old_element, nil, nil) if event.old_element
+		end
+
+		# This will be called when there is an element in A that isn't in B
+		def discard_a(event)
+			@output << wrap(event.old_element, 'del', 'diffdel') 
+		end
+		
+		# This will be called when there is an element in B that isn't in A
+		def discard_b(event)
+			@output << wrap(event.new_element, 'ins', 'diffins')
+		end
+
+		def wrap(element, tag = nil, class_name = nil)
+			element = REXML::Text.new(" " << element) if String === element
+                        return element unless tag
+                        wrapper_element = REXML::Element.new(tag)
+                        wrapper_element.add_text element
+                        if class_name
+                           wrapper_element.add_attribute('class', class_name)
+                        end
+                        wrapper_element
+		end
+	end
+		
+end
+
+if $0 == __FILE__
+
+	$stderr.puts "No tests available yet"
+	exit(1)
+
+end
--- a/test/unit/diff_test.rb
+++ b/test/unit/diff_test.rb
@ -1,110 +1,94 @@
 #!/usr/bin/env ruby

 require File.expand_path(File.dirname(__FILE__) + '/../test_helper')
-require 'diff'
+require 'xhtmldiff'

 class DiffTest < Test::Unit::TestCase

-  include HTMLDiff
-
  def setup
-    @builder = DiffBuilder.new('old', 'new')
+
  end

-  def test_start_of_tag
-    assert @builder.start_of_tag?('<')
-    assert(!@builder.start_of_tag?('>'))
-    assert(!@builder.start_of_tag?('a'))
-  end
-
-  def test_end_of_tag
-    assert @builder.end_of_tag?('>')
-    assert(!@builder.end_of_tag?('<'))
-    assert(!@builder.end_of_tag?('a'))
-  end
-
-  def test_whitespace
-    assert @builder.whitespace?(" ")
-    assert @builder.whitespace?("\n")
-    assert @builder.whitespace?("\r")
-    assert(!@builder.whitespace?("a"))
-  end
-
-  def test_convert_html_to_list_of_words_simple
-    assert_equal(
-        ['the', ' ', 'original', ' ', 'text'],
-        @builder.convert_html_to_list_of_words('the original text'))
-  end
-
-  def test_convert_html_to_list_of_words_should_separate_endlines
-    assert_equal(
-        ['a', "\n", 'b', "\r", 'c'],
-        @builder.convert_html_to_list_of_words("a\nb\rc"))
-  end
-
-  def test_convert_html_to_list_of_words_should_not_compress_whitespace
-    assert_equal(
-        ['a', ' ', 'b', '  ', 'c', "\r \n ", 'd'],
-        @builder.convert_html_to_list_of_words("a b  c\r \n d"))
-  end
-
-  def test_convert_html_to_list_of_words_should_handle_tags_well
-    assert_equal(
-        ['<p>', 'foo', ' ', 'bar', '</p>'],
-        @builder.convert_html_to_list_of_words("<p>foo bar</p>"))
-  end
-  
-  def test_convert_html_to_list_of_words_interesting
-    assert_equal(
-        ['<p>', 'this', ' ', 'is', '</p>', "\r\n", '<p>', 'the', ' ', 'new', ' ', 'string', 
-         '</p>', "\r\n", '<p>', 'around', ' ', 'the', ' ', 'world', '</p>'],
-        @builder.convert_html_to_list_of_words(
-            "<p>this is</p>\r\n<p>the new string</p>\r\n<p>around the world</p>"))
+  def diff(a,b)
+    diff_doc = REXML::Document.new
+    diff_doc << (div = REXML::Element.new 'div' )
+    hd = XHTMLDiff.new(div)
+    parsed_a = REXML::HashableElementDelegator.new(
+           REXML::XPath.first(REXML::Document.new("<div>"+a+"</div>"), '/div'))
+    parsed_b = REXML::HashableElementDelegator.new(
+           REXML::XPath.first(REXML::Document.new("<div>"+b+"</div>"), '/div'))
+    Diff::LCS.traverse_balanced(parsed_a, parsed_b, hd)
+    diffs = ''
+    diff_doc.write(diffs, -1, true, true)
+    diffs
  end

  def test_html_diff_simple
    a = 'this was the original string'
    b = 'this is the new string'
-    assert_equal('this <del class="diffmod">was</del><ins class="diffmod">is</ins> the ' +
-           '<del class="diffmod">original</del><ins class="diffmod">new</ins> string',
-           diff(a, b))
+    assert_equal("<div><span> this<del class='diffmod'> was</del><ins class='diffmod'> is</ins> the" +
+           "<del class='diffmod'> original</del><ins class='diffmod'> new</ins> string</span></div>",
+          diff(a, b))
  end

  def test_html_diff_with_multiple_paragraphs
    a = "<p>this was the original string</p>"
-    b = "<p>this is</p>\r\n<p> the new string</p>\r\n<p>around the world</p>"
-
-    # Some of this expected result is accidental to implementation. 
-    # At least it's well-formed and more or less correct.
+    b = "<p>this is</p>\n<p> the new string</p>\n<p>around the world</p>"
    assert_equal(
-        "<p>this <del class=\"diffmod\">was</del><ins class=\"diffmod\">is</ins></p>"+
-        "<ins class=\"diffmod\">\r\n</ins><p> the " +
-        "<del class=\"diffmod\">original</del><ins class=\"diffmod\">new</ins>" +
-        " string</p><ins class=\"diffins\">\r\n</ins>" +
-        "<p><ins class=\"diffins\">around the world</ins></p>",
+        "<div><p><span> this<del class='diffmod'> was</del><ins class='diffmod'> is</ins>" +
+        "<del class='diffdel'> the</del><del class='diffdel'> original</del><del class='diffdel'> string</del></span></p>" +
+        "<ins class='diffins'>\n</ins><ins class='diffins'><p> the new string</p></ins>" +
+        "<ins class='diffins'>\n</ins><ins class='diffins'><p>around the world</p></ins></div>",
        diff(a, b))
  end

+  def test_split_paragraph_into_two
+     a = "<p>foo bar</p>"
+     b = "<p>foo</p><p>bar</p>"
+     assert_equal(
+       "<div><p><span> foo<del class='diffdel'> bar</del></span></p>" +
+       "<ins class='diffins'><p>bar</p></ins></div>",
+      diff(a,b))
+  end
+
+  def test_join_two_paragraphs_into_one
+     a = "<p>foo</p><p>bar</p>"
+     b = "<p>foo bar</p>"
+     assert_equal(
+       "<div><p><span> foo<ins class='diffins'> bar</ins></span></p>" +
+       "<del class='diffdel'><p>bar</p></del></div>",
+      diff(a,b))
+  end
+
+  def test_add_inline_element
+     a = "<p>foo bar</p>"
+     b = "<p>foo <b>bar</b></p>"
+     assert_equal(
+        "<div><p><span> foo<del class='diffdel'> bar</del></span>" +
+        "<ins class='diffins'><b>bar</b></ins></p></div>",
+       diff(a,b))
+  end
+
  # FIXME this test fails (ticket #67, http://dev.instiki.org/ticket/67)
  def test_html_diff_preserves_endlines_in_pre
-    a = "<pre>\na\nb\nc\n</pre>"
-    b = "<pre>\n</pre>"
+    a = "<pre>a\nb\nc\n</pre>"
+    b = "<pre>a\n</pre>"
    assert_equal(
-        "<pre>\n<del class=\"diffdel\">a\nb\nc\n</del></pre>",
+        "<div><pre><span> a\n<del class='diffdel'>b\nc\n</del></span></pre></div>",
        diff(a, b))
  end
  
  def test_html_diff_with_tags
    a = ""
    b = "<div>foo</div>"
-    assert_equal '<div><ins class="diffins">foo</ins></div>', diff(a, b)
+    assert_equal "<div><ins class='diffins'><div>foo</div></ins></div>", diff(a, b)
  end
  
  def test_diff_for_tag_change
    a = "<a>x</a>"
    b = "<b>x</b>"
    # FIXME sad, but true - this case produces an invalid XML. If handle this you can, strong your foo is.
-    assert_equal '<a><b>x</a></b>', diff(a, b)
+    assert_equal "<div><del class='diffdel'><a>x</a></del><ins class='diffins'><b>x</b></ins></div>", diff(a, b)
  end

 end