Replaced diff.rb with xhtmldiff.rb, which (unlike its predecessor) produces well-formed redline documents.

This commit is contained in:
Jacques Distler 2007-02-03 22:52:48 -06:00
parent 86e9c70a26
commit 8c52f28864
12 changed files with 2420 additions and 391 deletions

View file

@ -1,316 +0,0 @@
module HTMLDiff
Match = Struct.new(:start_in_old, :start_in_new, :size)
class Match
def end_in_old
self.start_in_old + self.size
end
def end_in_new
self.start_in_new + self.size
end
end
Operation = Struct.new(:action, :start_in_old, :end_in_old, :start_in_new, :end_in_new)
class DiffBuilder
def initialize(old_version, new_version)
@old_version, @new_version = old_version, new_version
@content = []
end
def build
split_inputs_to_words
index_new_words
operations.each { |op| perform_operation(op) }
return @content.join
end
def split_inputs_to_words
@old_words = convert_html_to_list_of_words(explode(@old_version))
@new_words = convert_html_to_list_of_words(explode(@new_version))
end
def index_new_words
@word_indices = Hash.new { |h, word| h[word] = [] }
@new_words.each_with_index { |word, i| @word_indices[word] << i }
end
def operations
position_in_old = position_in_new = 0
operations = []
matches = matching_blocks
# an empty match at the end forces the loop below to handle the unmatched tails
# I'm sure it can be done more gracefully, but not at 23:52
matches << Match.new(@old_words.length, @new_words.length, 0)
matches.each_with_index do |match, i|
match_starts_at_current_position_in_old = (position_in_old == match.start_in_old)
match_starts_at_current_position_in_new = (position_in_new == match.start_in_new)
action_upto_match_positions =
case [match_starts_at_current_position_in_old, match_starts_at_current_position_in_new]
when [false, false]
:replace
when [true, false]
:insert
when [false, true]
:delete
else
# this happens if the first few words are same in both versions
:none
end
if action_upto_match_positions != :none
operation_upto_match_positions =
Operation.new(action_upto_match_positions,
position_in_old, match.start_in_old,
position_in_new, match.start_in_new)
operations << operation_upto_match_positions
end
if match.size != 0
match_operation = Operation.new(:equal,
match.start_in_old, match.end_in_old,
match.start_in_new, match.end_in_new)
operations << match_operation
end
position_in_old = match.end_in_old
position_in_new = match.end_in_new
end
operations
end
def matching_blocks
matching_blocks = []
recursively_find_matching_blocks(0, @old_words.size, 0, @new_words.size, matching_blocks)
matching_blocks
end
def recursively_find_matching_blocks(start_in_old, end_in_old, start_in_new, end_in_new, matching_blocks)
match = find_match(start_in_old, end_in_old, start_in_new, end_in_new)
if match
if start_in_old < match.start_in_old and start_in_new < match.start_in_new
recursively_find_matching_blocks(
start_in_old, match.start_in_old, start_in_new, match.start_in_new, matching_blocks)
end
matching_blocks << match
if match.end_in_old < end_in_old and match.end_in_new < end_in_new
recursively_find_matching_blocks(
match.end_in_old, end_in_old, match.end_in_new, end_in_new, matching_blocks)
end
end
end
def find_match(start_in_old, end_in_old, start_in_new, end_in_new)
best_match_in_old = start_in_old
best_match_in_new = start_in_new
best_match_size = 0
match_length_at = Hash.new { |h, index| h[index] = 0 }
start_in_old.upto(end_in_old - 1) do |index_in_old|
new_match_length_at = Hash.new { |h, index| h[index] = 0 }
@word_indices[@old_words[index_in_old]].each do |index_in_new|
next if index_in_new < start_in_new
break if index_in_new >= end_in_new
new_match_length = match_length_at[index_in_new - 1] + 1
new_match_length_at[index_in_new] = new_match_length
if new_match_length > best_match_size
best_match_in_old = index_in_old - new_match_length + 1
best_match_in_new = index_in_new - new_match_length + 1
best_match_size = new_match_length
end
end
match_length_at = new_match_length_at
end
# best_match_in_old, best_match_in_new, best_match_size = add_matching_words_left(
# best_match_in_old, best_match_in_new, best_match_size, start_in_old, start_in_new)
# best_match_in_old, best_match_in_new, match_size = add_matching_words_right(
# best_match_in_old, best_match_in_new, best_match_size, end_in_old, end_in_new)
return (best_match_size != 0 ? Match.new(best_match_in_old, best_match_in_new, best_match_size) : nil)
end
def add_matching_words_left(match_in_old, match_in_new, match_size, start_in_old, start_in_new)
while match_in_old > start_in_old and
match_in_new > start_in_new and
@old_words[match_in_old - 1] == @new_words[match_in_new - 1]
match_in_old -= 1
match_in_new -= 1
match_size += 1
end
[match_in_old, match_in_new, match_size]
end
def add_matching_words_right(match_in_old, match_in_new, match_size, end_in_old, end_in_new)
while match_in_old + match_size < end_in_old and
match_in_new + match_size < end_in_new and
@old_words[match_in_old + match_size] == @new_words[match_in_new + match_size]
match_size += 1
end
[match_in_old, match_in_new, match_size]
end
VALID_METHODS = [:replace, :insert, :delete, :equal]
def perform_operation(operation)
@operation = operation
self.send operation.action, operation
end
def replace(operation)
delete(operation, 'diffmod')
insert(operation, 'diffmod')
end
def insert(operation, tagclass = 'diffins')
insert_tag('ins', tagclass, @new_words[operation.start_in_new...operation.end_in_new])
end
def delete(operation, tagclass = 'diffdel')
insert_tag('del', tagclass, @old_words[operation.start_in_old...operation.end_in_old])
end
def equal(operation)
# no tags to insert, simply copy the matching words from one of the versions
@content += @new_words[operation.start_in_new...operation.end_in_new]
end
def opening_tag?(item)
item =~ %r!^\s*<[^>]+>\s*$!
end
def closing_tag?(item)
item =~ %r!^\s*</[^>]+>\s*$!
end
def tag?(item)
opening_tag?(item) or closing_tag?(item)
end
def extract_consecutive_words(words, &condition)
index_of_first_tag = nil
words.each_with_index do |word, i|
if !condition.call(word)
index_of_first_tag = i
break
end
end
if index_of_first_tag
return words.slice!(0...index_of_first_tag)
else
return words.slice!(0..words.length)
end
end
# This method encloses words within a specified tag (ins or del), and adds this into @content,
# with a twist: if there are words contain tags, it actually creates multiple ins or del,
# so that they don't include any ins or del. This handles cases like
# old: '<p>a</p>'
# new: '<p>ab</p><p>c</b>'
# diff result: '<p>a<ins>b</ins></p><p><ins>c</ins></p>'
# this still doesn't guarantee valid HTML (hint: think about diffing a text containing ins or
# del tags), but handles correctly more cases than the earlier version.
#
# P.S.: Spare a thought for people who write HTML browsers. They live in this ... every day.
def insert_tag(tagname, cssclass, words)
loop do
break if words.empty?
non_tags = extract_consecutive_words(words) { |word| not tag?(word) }
@content << wrap_text(non_tags.join, tagname, cssclass) unless non_tags.empty?
break if words.empty?
@content += extract_consecutive_words(words) { |word| tag?(word) }
end
end
def wrap_text(text, tagname, cssclass)
%(<#{tagname} class="#{cssclass}">#{text}</#{tagname}>)
end
def explode(sequence)
sequence.is_a?(String) ? sequence.split(//) : sequence
end
def end_of_tag?(char)
char == '>'
end
def start_of_tag?(char)
char == '<'
end
def whitespace?(char)
char =~ /\s/
end
def convert_html_to_list_of_words(x, use_brackets = false)
mode = :char
current_word = ''
words = []
explode(x).each do |char|
case mode
when :tag
if end_of_tag? char
current_word << (use_brackets ? ']' : '>')
words << current_word
current_word = ''
if whitespace?(char)
mode = :whitespace
else
mode = :char
end
else
current_word << char
end
when :char
if start_of_tag? char
words << current_word unless current_word.empty?
current_word = (use_brackets ? '[' : '<')
mode = :tag
elsif /\s/.match char
words << current_word unless current_word.empty?
current_word = char
mode = :whitespace
else
current_word << char
end
when :whitespace
if start_of_tag? char
words << current_word unless current_word.empty?
current_word = (use_brackets ? '[' : '<')
mode = :tag
elsif /\s/.match char
current_word << char
else
words << current_word unless current_word.empty?
current_word = char
mode = :char
end
else
raise "Unknown mode #{mode.inspect}"
end
end
words << current_word unless current_word.empty?
words
end
end # of class Diff Builder
def diff(a, b)
DiffBuilder.new(a, b).build
end
end

1105
lib/diff/lcs.rb Normal file

File diff suppressed because it is too large Load diff

21
lib/diff/lcs/array.rb Normal file
View file

@ -0,0 +1,21 @@
#! /usr/env/bin ruby
#--
# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
# adapted from:
# Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
# Smalltalk by Mario I. Wolczko <mario@wolczko.com>
# implements McIlroy-Hunt diff algorithm
#
# This program is free software. It may be redistributed and/or modified under
# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
# Ruby licence.
#
# $Id: array.rb,v 1.3 2004/08/08 20:33:09 austin Exp $
#++
# Includes Diff::LCS into the Array built-in class.
require 'diff/lcs'
class Array
include Diff::LCS
end

51
lib/diff/lcs/block.rb Normal file
View file

@ -0,0 +1,51 @@
#! /usr/env/bin ruby
#--
# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
# adapted from:
# Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
# Smalltalk by Mario I. Wolczko <mario@wolczko.com>
# implements McIlroy-Hunt diff algorithm
#
# This program is free software. It may be redistributed and/or modified under
# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
# Ruby licence.
#
# $Id: block.rb,v 1.3 2004/08/08 20:33:09 austin Exp $
#++
# Contains Diff::LCS::Block for bin/ldiff.
# A block is an operation removing, adding, or changing a group of items.
# Basically, this is just a list of changes, where each change adds or
# deletes a single item. Used by bin/ldiff.
class Diff::LCS::Block
attr_reader :changes, :insert, :remove
def initialize(chunk)
@changes = []
@insert = []
@remove = []
chunk.each do |item|
@changes << item
@remove << item if item.deleting?
@insert << item if item.adding?
end
end
def diff_size
@insert.size - @remove.size
end
def op
case [@remove.empty?, @insert.empty?]
when [false, false]
'!'
when [false, true]
'-'
when [true, false]
'+'
else # [true, true]
'^'
end
end
end

322
lib/diff/lcs/callbacks.rb Normal file
View file

@ -0,0 +1,322 @@
#! /usr/env/bin ruby
#--
# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
# adapted from:
# Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
# Smalltalk by Mario I. Wolczko <mario@wolczko.com>
# implements McIlroy-Hunt diff algorithm
#
# This program is free software. It may be redistributed and/or modified under
# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
# Ruby licence.
#
# $Id: callbacks.rb,v 1.4 2004/09/14 18:51:26 austin Exp $
#++
# Contains definitions for all default callback objects.
require 'diff/lcs/change'
module Diff::LCS
# This callback object implements the default set of callback events, which
# only returns the event itself. Note that #finished_a and #finished_b are
# not implemented -- I haven't yet figured out where they would be useful.
#
# Note that this is intended to be called as is, e.g.,
#
# Diff::LCS.LCS(seq1, seq2, Diff::LCS::DefaultCallbacks)
class DefaultCallbacks
class << self
# Called when two items match.
def match(event)
event
end
# Called when the old value is discarded in favour of the new value.
def discard_a(event)
event
end
# Called when the new value is discarded in favour of the old value.
def discard_b(event)
event
end
# Called when both the old and new values have changed.
def change(event)
event
end
private :new
end
end
# An alias for DefaultCallbacks that is used in Diff::LCS#traverse_sequences.
#
# Diff::LCS.LCS(seq1, seq2, Diff::LCS::SequenceCallbacks)
SequenceCallbacks = DefaultCallbacks
# An alias for DefaultCallbacks that is used in Diff::LCS#traverse_balanced.
#
# Diff::LCS.LCS(seq1, seq2, Diff::LCS::BalancedCallbacks)
BalancedCallbacks = DefaultCallbacks
end
# This will produce a compound array of simple diff change objects. Each
# element in the #diffs array is a +hunk+ or +hunk+ array, where each
# element in each +hunk+ array is a single Change object representing the
# addition or removal of a single element from one of the two tested
# sequences. The +hunk+ provides the full context for the changes.
#
# diffs = Diff::LCS.diff(seq1, seq2)
# # This example shows a simplified array format.
# # [ [ [ '-', 0, 'a' ] ], # 1
# # [ [ '+', 2, 'd' ] ], # 2
# # [ [ '-', 4, 'h' ], # 3
# # [ '+', 4, 'f' ] ],
# # [ [ '+', 6, 'k' ] ], # 4
# # [ [ '-', 8, 'n' ], # 5
# # [ '-', 9, 'p' ],
# # [ '+', 9, 'r' ],
# # [ '+', 10, 's' ],
# # [ '+', 11, 't' ] ] ]
#
# There are five hunks here. The first hunk says that the +a+ at position 0
# of the first sequence should be deleted (<tt>'-'</tt>). The second hunk
# says that the +d+ at position 2 of the second sequence should be inserted
# (<tt>'+'</tt>). The third hunk says that the +h+ at position 4 of the
# first sequence should be removed and replaced with the +f+ from position 4
# of the second sequence. The other two hunks are described similarly.
#
# === Use
# This callback object must be initialised and is used by the Diff::LCS#diff
# method.
#
# cbo = Diff::LCS::DiffCallbacks.new
# Diff::LCS.LCS(seq1, seq2, cbo)
# cbo.finish
#
# Note that the call to #finish is absolutely necessary, or the last set of
# changes will not be visible. Alternatively, can be used as:
#
# cbo = Diff::LCS::DiffCallbacks.new { |tcbo| Diff::LCS.LCS(seq1, seq2, tcbo) }
#
# The necessary #finish call will be made.
#
# === Simplified Array Format
# The simplified array format used in the example above can be obtained
# with:
#
# require 'pp'
# pp diffs.map { |e| e.map { |f| f.to_a } }
class Diff::LCS::DiffCallbacks
# Returns the difference set collected during the diff process.
attr_reader :diffs
def initialize # :yields self:
@hunk = []
@diffs = []
if block_given?
begin
yield self
ensure
self.finish
end
end
end
# Finalizes the diff process. If an unprocessed hunk still exists, then it
# is appended to the diff list.
def finish
add_nonempty_hunk
end
def match(event)
add_nonempty_hunk
end
def discard_a(event)
@hunk << Diff::LCS::Change.new('-', event.old_position, event.old_element)
end
def discard_b(event)
@hunk << Diff::LCS::Change.new('+', event.new_position, event.new_element)
end
private
def add_nonempty_hunk
@diffs << @hunk unless @hunk.empty?
@hunk = []
end
end
# This will produce a compound array of contextual diff change objects. Each
# element in the #diffs array is a "hunk" array, where each element in each
# "hunk" array is a single change. Each change is a Diff::LCS::ContextChange
# that contains both the old index and new index values for the change. The
# "hunk" provides the full context for the changes. Both old and new objects
# will be presented for changed objects. +nil+ will be substituted for a
# discarded object.
#
# seq1 = %w(a b c e h j l m n p)
# seq2 = %w(b c d e f j k l m r s t)
#
# diffs = Diff::LCS.diff(seq1, seq2, Diff::LCS::ContextDiffCallbacks)
# # This example shows a simplified array format.
# # [ [ [ '-', [ 0, 'a' ], [ 0, nil ] ] ], # 1
# # [ [ '+', [ 3, nil ], [ 2, 'd' ] ] ], # 2
# # [ [ '-', [ 4, 'h' ], [ 4, nil ] ], # 3
# # [ '+', [ 5, nil ], [ 4, 'f' ] ] ],
# # [ [ '+', [ 6, nil ], [ 6, 'k' ] ] ], # 4
# # [ [ '-', [ 8, 'n' ], [ 9, nil ] ], # 5
# # [ '+', [ 9, nil ], [ 9, 'r' ] ],
# # [ '-', [ 9, 'p' ], [ 10, nil ] ],
# # [ '+', [ 10, nil ], [ 10, 's' ] ],
# # [ '+', [ 10, nil ], [ 11, 't' ] ] ] ]
#
# The five hunks shown are comprised of individual changes; if there is a
# related set of changes, they are still shown individually.
#
# This callback can also be used with Diff::LCS#sdiff, which will produce
# results like:
#
# diffs = Diff::LCS.sdiff(seq1, seq2, Diff::LCS::ContextCallbacks)
# # This example shows a simplified array format.
# # [ [ [ "-", [ 0, "a" ], [ 0, nil ] ] ], # 1
# # [ [ "+", [ 3, nil ], [ 2, "d" ] ] ], # 2
# # [ [ "!", [ 4, "h" ], [ 4, "f" ] ] ], # 3
# # [ [ "+", [ 6, nil ], [ 6, "k" ] ] ], # 4
# # [ [ "!", [ 8, "n" ], [ 9, "r" ] ], # 5
# # [ "!", [ 9, "p" ], [ 10, "s" ] ],
# # [ "+", [ 10, nil ], [ 11, "t" ] ] ] ]
#
# The five hunks are still present, but are significantly shorter in total
# presentation, because changed items are shown as changes ("!") instead of
# potentially "mismatched" pairs of additions and deletions.
#
# The result of this operation is similar to that of
# Diff::LCS::SDiffCallbacks. They may be compared as:
#
# s = Diff::LCS.sdiff(seq1, seq2).reject { |e| e.action == "=" }
# c = Diff::LCS.sdiff(seq1, seq2, Diff::LCS::ContextDiffCallbacks).flatten
#
# s == c # -> true
#
# === Use
# This callback object must be initialised and can be used by the
# Diff::LCS#diff or Diff::LCS#sdiff methods.
#
# cbo = Diff::LCS::ContextDiffCallbacks.new
# Diff::LCS.LCS(seq1, seq2, cbo)
# cbo.finish
#
# Note that the call to #finish is absolutely necessary, or the last set of
# changes will not be visible. Alternatively, can be used as:
#
# cbo = Diff::LCS::ContextDiffCallbacks.new { |tcbo| Diff::LCS.LCS(seq1, seq2, tcbo) }
#
# The necessary #finish call will be made.
#
# === Simplified Array Format
# The simplified array format used in the example above can be obtained
# with:
#
# require 'pp'
# pp diffs.map { |e| e.map { |f| f.to_a } }
class Diff::LCS::ContextDiffCallbacks < Diff::LCS::DiffCallbacks
def discard_a(event)
@hunk << Diff::LCS::ContextChange.simplify(event)
end
def discard_b(event)
@hunk << Diff::LCS::ContextChange.simplify(event)
end
def change(event)
@hunk << Diff::LCS::ContextChange.simplify(event)
end
end
# This will produce a simple array of diff change objects. Each element in
# the #diffs array is a single ContextChange. In the set of #diffs provided
# by SDiffCallbacks, both old and new objects will be presented for both
# changed <strong>and unchanged</strong> objects. +nil+ will be substituted
# for a discarded object.
#
# The diffset produced by this callback, when provided to Diff::LCS#sdiff,
# will compute and display the necessary components to show two sequences
# and their minimized differences side by side, just like the Unix utility
# +sdiff+.
#
# same same
# before | after
# old < -
# - > new
#
# seq1 = %w(a b c e h j l m n p)
# seq2 = %w(b c d e f j k l m r s t)
#
# diffs = Diff::LCS.sdiff(seq1, seq2)
# # This example shows a simplified array format.
# # [ [ "-", [ 0, "a"], [ 0, nil ] ],
# # [ "=", [ 1, "b"], [ 0, "b" ] ],
# # [ "=", [ 2, "c"], [ 1, "c" ] ],
# # [ "+", [ 3, nil], [ 2, "d" ] ],
# # [ "=", [ 3, "e"], [ 3, "e" ] ],
# # [ "!", [ 4, "h"], [ 4, "f" ] ],
# # [ "=", [ 5, "j"], [ 5, "j" ] ],
# # [ "+", [ 6, nil], [ 6, "k" ] ],
# # [ "=", [ 6, "l"], [ 7, "l" ] ],
# # [ "=", [ 7, "m"], [ 8, "m" ] ],
# # [ "!", [ 8, "n"], [ 9, "r" ] ],
# # [ "!", [ 9, "p"], [ 10, "s" ] ],
# # [ "+", [ 10, nil], [ 11, "t" ] ] ]
#
# The result of this operation is similar to that of
# Diff::LCS::ContextDiffCallbacks. They may be compared as:
#
# s = Diff::LCS.sdiff(seq1, seq2).reject { |e| e.action == "=" }
# c = Diff::LCS.sdiff(seq1, seq2, Diff::LCS::ContextDiffCallbacks).flatten
#
# s == c # -> true
#
# === Use
# This callback object must be initialised and is used by the Diff::LCS#sdiff
# method.
#
# cbo = Diff::LCS::SDiffCallbacks.new
# Diff::LCS.LCS(seq1, seq2, cbo)
#
# As with the other initialisable callback objects, Diff::LCS::SDiffCallbacks
# can be initialised with a block. As there is no "fininishing" to be done,
# this has no effect on the state of the object.
#
# cbo = Diff::LCS::SDiffCallbacks.new { |tcbo| Diff::LCS.LCS(seq1, seq2, tcbo) }
#
# === Simplified Array Format
# The simplified array format used in the example above can be obtained
# with:
#
# require 'pp'
# pp diffs.map { |e| e.to_a }
class Diff::LCS::SDiffCallbacks
# Returns the difference set collected during the diff process.
attr_reader :diffs
def initialize #:yields self:
@diffs = []
yield self if block_given?
end
def match(event)
@diffs << Diff::LCS::ContextChange.simplify(event)
end
def discard_a(event)
@diffs << Diff::LCS::ContextChange.simplify(event)
end
def discard_b(event)
@diffs << Diff::LCS::ContextChange.simplify(event)
end
def change(event)
@diffs << Diff::LCS::ContextChange.simplify(event)
end
end

169
lib/diff/lcs/change.rb Normal file
View file

@ -0,0 +1,169 @@
#! /usr/env/bin ruby
#--
# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
# adapted from:
# Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
# Smalltalk by Mario I. Wolczko <mario@wolczko.com>
# implements McIlroy-Hunt diff algorithm
#
# This program is free software. It may be redistributed and/or modified under
# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
# Ruby licence.
#
# $Id: change.rb,v 1.4 2004/08/08 20:33:09 austin Exp $
#++
# Provides Diff::LCS::Change and Diff::LCS::ContextChange.
# Centralises the change test code in Diff::LCS::Change and
# Diff::LCS::ContextChange, since it's the same for both classes.
module Diff::LCS::ChangeTypeTests
def deleting?
@action == '-'
end
def adding?
@action == '+'
end
def unchanged?
@action == '='
end
def changed?
@changed == '!'
end
def finished_a?
@changed == '>'
end
def finished_b?
@changed == '<'
end
end
# Represents a simplistic (non-contextual) change. Represents the removal or
# addition of an element from either the old or the new sequenced enumerable.
class Diff::LCS::Change
# Returns the action this Change represents. Can be '+' (#adding?), '-'
# (#deleting?), '=' (#unchanged?), # or '!' (#changed?). When created by
# Diff::LCS#diff or Diff::LCS#sdiff, it may also be '>' (#finished_a?) or
# '<' (#finished_b?).
attr_reader :action
attr_reader :position
attr_reader :element
include Comparable
def ==(other)
(self.action == other.action) and
(self.position == other.position) and
(self.element == other.element)
end
def <=>(other)
r = self.action <=> other.action
r = self.position <=> other.position if r.zero?
r = self.element <=> other.element if r.zero?
r
end
def initialize(action, position, element)
@action = action
@position = position
@element = element
end
# Creates a Change from an array produced by Change#to_a.
def to_a
[@action, @position, @element]
end
def self.from_a(arr)
Diff::LCS::Change.new(arr[0], arr[1], arr[2])
end
include Diff::LCS::ChangeTypeTests
end
# Represents a contextual change. Contains the position and values of the
# elements in the old and the new sequenced enumerables as well as the action
# taken.
class Diff::LCS::ContextChange
# Returns the action this Change represents. Can be '+' (#adding?), '-'
# (#deleting?), '=' (#unchanged?), # or '!' (#changed?). When
# created by Diff::LCS#diff or Diff::LCS#sdiff, it may also be '>'
# (#finished_a?) or '<' (#finished_b?).
attr_reader :action
attr_reader :old_position
attr_reader :old_element
attr_reader :new_position
attr_reader :new_element
include Comparable
def ==(other)
(@action == other.action) and
(@old_position == other.old_position) and
(@new_position == other.new_position) and
(@old_element == other.old_element) and
(@new_element == other.new_element)
end
def inspect(*args)
%Q(#<#{self.class.name}:#{__id__} @action=#{action} positions=#{old_position},#{new_position} elements=#{old_element.inspect},#{new_element.inspect}>)
end
def <=>(other)
r = @action <=> other.action
r = @old_position <=> other.old_position if r.zero?
r = @new_position <=> other.new_position if r.zero?
r = @old_element <=> other.old_element if r.zero?
r = @new_element <=> other.new_element if r.zero?
r
end
def initialize(action, old_position, old_element, new_position, new_element)
@action = action
@old_position = old_position
@old_element = old_element
@new_position = new_position
@new_element = new_element
end
def to_a
[@action, [@old_position, @old_element], [@new_position, @new_element]]
end
# Creates a ContextChange from an array produced by ContextChange#to_a.
def self.from_a(arr)
if arr.size == 5
Diff::LCS::ContextChange.new(arr[0], arr[1], arr[2], arr[3], arr[4])
else
Diff::LCS::ContextChange.new(arr[0], arr[1][0], arr[1][1], arr[2][0],
arr[2][1])
end
end
# Simplifies a context change for use in some diff callbacks. '<' actions
# are converted to '-' and '>' actions are converted to '+'.
def self.simplify(event)
ea = event.to_a
case ea[0]
when '-'
ea[2][1] = nil
when '<'
ea[0] = '-'
ea[2][1] = nil
when '+'
ea[1][1] = nil
when '>'
ea[0] = '+'
ea[1][1] = nil
end
Diff::LCS::ContextChange.from_a(ea)
end
include Diff::LCS::ChangeTypeTests
end

257
lib/diff/lcs/hunk.rb Normal file
View file

@ -0,0 +1,257 @@
#! /usr/env/bin ruby
#--
# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
# adapted from:
# Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
# Smalltalk by Mario I. Wolczko <mario@wolczko.com>
# implements McIlroy-Hunt diff algorithm
#
# This program is free software. It may be redistributed and/or modified under
# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
# Ruby licence.
#
# $Id: hunk.rb,v 1.2 2004/08/08 20:33:09 austin Exp $
#++
# Contains Diff::LCS::Hunk for bin/ldiff.
require 'diff/lcs/block'
# A Hunk is a group of Blocks which overlap because of the context
# surrounding each block. (So if we're not using context, every hunk will
# contain one block.) Used in the diff program (bin/diff).
class Diff::LCS::Hunk
# Create a hunk using references to both the old and new data, as well as
# the piece of data
def initialize(data_old, data_new, piece, context, file_length_difference)
# At first, a hunk will have just one Block in it
@blocks = [ Diff::LCS::Block.new(piece) ]
@data_old = data_old
@data_new = data_new
before = after = file_length_difference
after += @blocks[0].diff_size
@file_length_difference = after # The caller must get this manually
# Save the start & end of each array. If the array doesn't exist
# (e.g., we're only adding items in this block), then figure out the
# line number based on the line number of the other file and the
# current difference in file lengths.
if @blocks[0].remove.empty?
a1 = a2 = nil
else
a1 = @blocks[0].remove[0].position
a2 = @blocks[0].remove[-1].position
end
if @blocks[0].insert.empty?
b1 = b2 = nil
else
b1 = @blocks[0].insert[0].position
b2 = @blocks[0].insert[-1].position
end
@start_old = a1 || (b1 - before)
@start_new = b1 || (a1 + before)
@end_old = a2 || (b2 - after)
@end_new = b2 || (a2 + after)
self.flag_context = context
end
attr_reader :blocks
attr_reader :start_old, :start_new
attr_reader :end_old, :end_new
attr_reader :file_length_difference
# Change the "start" and "end" fields to note that context should be added
# to this hunk
attr_accessor :flag_context
def flag_context=(context) #:nodoc:
return if context.nil? or context.zero?
add_start = (context > @start_old) ? @start_old : context
@start_old -= add_start
@start_new -= add_start
if (@end_old + context) > @data_old.size
add_end = @data_old.size - @end_old
else
add_end = context
end
@end_old += add_end
@end_new += add_end
end
def unshift(hunk)
@start_old = hunk.start_old
@start_new = hunk.start_new
blocks.unshift(*hunk.blocks)
end
# Is there an overlap between hunk arg0 and old hunk arg1? Note: if end
# of old hunk is one less than beginning of second, they overlap
def overlaps?(hunk = nil)
return nil if hunk.nil?
a = (@start_old - hunk.end_old) <= 1
b = (@start_new - hunk.end_new) <= 1
return (a or b)
end
def diff(format)
case format
when :old
old_diff
when :unified
unified_diff
when :context
context_diff
when :ed
self
when :reverse_ed, :ed_finish
ed_diff(format)
else
raise "Unknown diff format #{format}."
end
end
def each_old(block)
@data_old[@start_old .. @end_old].each { |e| yield e }
end
private
# Note that an old diff can't have any context. Therefore, we know that
# there's only one block in the hunk.
def old_diff
warn "Expecting only one block in an old diff hunk!" if @blocks.size > 1
op_act = { "+" => 'a', "-" => 'd', "!" => "c" }
block = @blocks[0]
# Calculate item number range. Old diff range is just like a context
# diff range, except the ranges are on one line with the action between
# them.
s = "#{context_range(:old)}#{op_act[block.op]}#{context_range(:new)}\n"
# If removing anything, just print out all the remove lines in the hunk
# which is just all the remove lines in the block.
@data_old[@start_old .. @end_old].each { |e| s << "< #{e}\n" } unless block.remove.empty?
s << "---\n" if block.op == "!"
@data_new[@start_new .. @end_new].each { |e| s << "> #{e}\n" } unless block.insert.empty?
s
end
def unified_diff
# Calculate item number range.
s = "@@ -#{unified_range(:old)} +#{unified_range(:new)} @@\n"
# Outlist starts containing the hunk of the old file. Removing an item
# just means putting a '-' in front of it. Inserting an item requires
# getting it from the new file and splicing it in. We splice in
# +num_added+ items. Remove blocks use +num_added+ because splicing
# changed the length of outlist.
#
# We remove +num_removed+ items. Insert blocks use +num_removed+
# because their item numbers -- corresponding to positions in the NEW
# file -- don't take removed items into account.
lo, hi, num_added, num_removed = @start_old, @end_old, 0, 0
outlist = @data_old[lo .. hi].collect { |e| e.gsub(/^/, ' ') }
@blocks.each do |block|
block.remove.each do |item|
op = item.action.to_s # -
offset = item.position - lo + num_added
outlist[offset].gsub!(/^ /, op.to_s)
num_removed += 1
end
block.insert.each do |item|
op = item.action.to_s # +
offset = item.position - @start_new + num_removed
outlist[offset, 0] = "#{op}#{@data_new[item.position]}"
num_added += 1
end
end
s << outlist.join("\n")
end
def context_diff
s = "***************\n"
s << "*** #{context_range(:old)} ****\n"
r = context_range(:new)
# Print out file 1 part for each block in context diff format if there
# are any blocks that remove items
lo, hi = @start_old, @end_old
removes = @blocks.select { |e| not e.remove.empty? }
if removes
outlist = @data_old[lo .. hi].collect { |e| e.gsub(/^/, ' ') }
removes.each do |block|
block.remove.each do |item|
outlist[item.position - lo].gsub!(/^ /) { block.op } # - or !
end
end
s << outlist.join("\n")
end
s << "\n--- #{r} ----\n"
lo, hi = @start_new, @end_new
inserts = @blocks.select { |e| not e.insert.empty? }
if inserts
outlist = @data_new[lo .. hi].collect { |e| e.gsub(/^/, ' ') }
inserts.each do |block|
block.insert.each do |item|
outlist[item.position - lo].gsub!(/^ /) { block.op } # + or !
end
end
s << outlist.join("\n")
end
s
end
def ed_diff(format)
op_act = { "+" => 'a', "-" => 'd', "!" => "c" }
warn "Expecting only one block in an old diff hunk!" if @blocks.size > 1
if format == :reverse_ed
s = "#{op_act[@blocks[0].op]}#{context_range(:old)}\n"
else
s = "#{context_range(:old).gsub(/,/, ' ')}#{op_act[@blocks[0].op]}\n"
end
unless @blocks[0].insert.empty?
@data_new[@start_new .. @end_new].each { |e| s << "#{e}\n" }
s << ".\n"
end
s
end
# Generate a range of item numbers to print. Only print 1 number if the
# range has only one item in it. Otherwise, it's 'start,end'
def context_range(mode)
case mode
when :old
s, e = (@start_old + 1), (@end_old + 1)
when :new
s, e = (@start_new + 1), (@end_new + 1)
end
(s < e) ? "#{s},#{e}" : "#{e}"
end
# Generate a range of item numbers to print for unified diff. Print
# number where block starts, followed by number of lines in the block
# (don't print number of lines if it's 1)
def unified_range(mode)
case mode
when :old
s, e = (@start_old + 1), (@end_old + 1)
when :new
s, e = (@start_new + 1), (@end_new + 1)
end
length = e - s + 1
first = (length < 2) ? e : s # "strange, but correct"
(length == 1) ? "#{first}" : "#{first},#{length}"
end
end

226
lib/diff/lcs/ldiff.rb Normal file
View file

@ -0,0 +1,226 @@
#!/usr/bin/env ruby
require 'optparse'
require 'ostruct'
require 'diff/lcs/hunk'
# == ldiff Usage
# ldiff [options] oldfile newfile
#
# -c:: Displays a context diff with 3 lines of context.
# -C [LINES], --context [LINES]:: Displays a context diff with LINES lines of context. Default 3 lines.
# -u:: Displays a unified diff with 3 lines of context.
# -U [LINES], --unified [LINES]:: Displays a unified diff with LINES lines of context. Default 3 lines.
# -e:: Creates an 'ed' script to change oldfile to newfile.
# -f:: Creates an 'ed' script to change oldfile to newfile in reverse order.
# -a, --text:: Treats the files as text and compares them line-by-line, even if they do not seem to be text.
# --binary:: Treats the files as binary.
# -q, --brief:: Reports only whether or not the files differ, not the details.
# --help:: Shows the command-line help.
# --version:: Shows the version of Diff::LCS.
#
# By default, runs produces an "old-style" diff, with output like UNIX diff.
#
# == Copyright
# Copyright &copy; 2004 Austin Ziegler
#
# Part of Diff::LCS <http://rubyforge.org/projects/ruwiki/>
# Austin Ziegler <diff-lcs@halostatue.ca>
#
# This program is free software. It may be redistributed and/or modified under
# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
# Ruby licence.
module Diff::LCS::Ldiff
BANNER = <<-COPYRIGHT
ldiff #{Diff::LCS::VERSION}
Copyright © 2004 Austin Ziegler
Part of Diff::LCS.
http://rubyforge.org/projects/ruwiki/
Austin Ziegler <diff-lcs@halostatue.ca>
This program is free software. It may be redistributed and/or modified under
the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
Ruby licence.
$Id: ldiff.rb,v 1.1 2004/09/26 01:37:49 austin Exp $
COPYRIGHT
class << self
attr_reader :format, :lines #:nodoc:
attr_reader :file_old, :file_new #:nodoc:
attr_reader :data_old, :data_new #:nodoc:
def run(args, input = $stdin, output = $stdout, error = $stderr) #:nodoc:
args.options do |o|
o.banner = "Usage: #{File.basename($0)} [options] oldfile newfile"
o.separator ""
o.on('-c',
'Displays a context diff with 3 lines of',
'context.') do |ctx|
@format = :context
@lines = 3
end
o.on('-C', '--context [LINES]', Numeric,
'Displays a context diff with LINES lines',
'of context. Default 3 lines.') do |ctx|
@format = :context
@lines = ctx || 3
end
o.on('-u',
'Displays a unified diff with 3 lines of',
'context.') do |ctx|
@format = :unified
@lines = 3
end
o.on('-U', '--unified [LINES]', Numeric,
'Displays a unified diff with LINES lines',
'of context. Default 3 lines.') do |ctx|
@format = :unified
@lines = ctx || 3
end
o.on('-e',
'Creates an \'ed\' script to change',
'oldfile to newfile.') do |ctx|
@format = :ed
end
o.on('-f',
'Creates an \'ed\' script to change',
'oldfile to newfile in reverse order.') do |ctx|
@format = :reverse_ed
end
o.on('-a', '--text',
'Treat the files as text and compare them',
'line-by-line, even if they do not seem',
'to be text.') do |txt|
@binary = false
end
o.on('--binary',
'Treats the files as binary.') do |bin|
@binary = true
end
o.on('-q', '--brief',
'Report only whether or not the files',
'differ, not the details.') do |ctx|
@format = :report
end
o.on_tail('--help', 'Shows this text.') do
error << o
return 0
end
o.on_tail('--version', 'Shows the version of Diff::LCS.') do
error << BANNER
return 0
end
o.on_tail ""
o.on_tail 'By default, runs produces an "old-style" diff, with output like UNIX diff.'
o.parse!
end
unless args.size == 2
error << args.options
return 127
end
# Defaults are for old-style diff
@format ||= :old
@lines ||= 0
file_old, file_new = *ARGV
case @format
when :context
char_old = '*' * 3
char_new = '-' * 3
when :unified
char_old = '-' * 3
char_new = '+' * 3
end
# After we've read up to a certain point in each file, the number of
# items we've read from each file will differ by FLD (could be 0).
file_length_difference = 0
if @binary.nil? or @binary
data_old = IO::read(file_old)
data_new = IO::read(file_new)
# Test binary status
if @binary.nil?
old_txt = data_old[0...4096].grep(/\0/).empty?
new_txt = data_new[0...4096].grep(/\0/).empty?
@binary = (not old_txt) or (not new_txt)
old_txt = new_txt = nil
end
unless @binary
data_old = data_old.split(/\n/).map! { |e| e.chomp }
data_new = data_new.split(/\n/).map! { |e| e.chomp }
end
else
data_old = IO::readlines(file_old).map! { |e| e.chomp }
data_new = IO::readlines(file_new).map! { |e| e.chomp }
end
# diff yields lots of pieces, each of which is basically a Block object
if @binary
diffs = (data_old == data_new)
else
diffs = Diff::LCS.diff(data_old, data_new)
diffs = nil if diffs.empty?
end
return 0 unless diffs
if (@format == :report) and diffs
output << "Files #{file_old} and #{file_new} differ\n"
return 1
end
if (@format == :unified) or (@format == :context)
ft = File.stat(file_old).mtime.localtime.strftime('%Y-%m-%d %H:%M:%S %z')
puts "#{char_old} #{file_old}\t#{ft}"
ft = File.stat(file_new).mtime.localtime.strftime('%Y-%m-%d %H:%M:%S %z')
puts "#{char_new} #{file_new}\t#{ft}"
end
# Loop over hunks. If a hunk overlaps with the last hunk, join them.
# Otherwise, print out the old one.
oldhunk = hunk = nil
if @format == :ed
real_output = output
output = []
end
diffs.each do |piece|
begin
hunk = Diff::LCS::Hunk.new(data_old, data_new, piece, @lines,
file_length_difference)
file_length_difference = hunk.file_length_difference
next unless oldhunk
if (@lines > 0) and hunk.overlaps?(oldhunk)
hunk.unshift(oldhunk)
else
output << oldhunk.diff(@format)
end
ensure
oldhunk = hunk
output << "\n"
end
end
output << oldhunk.diff(@format)
output << "\n"
if @format == :ed
output.reverse_each { |e| real_output << e.diff(:ed_finish) }
end
return 1
end
end
end

19
lib/diff/lcs/string.rb Normal file
View file

@ -0,0 +1,19 @@
#! /usr/env/bin ruby
#--
# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
# adapted from:
# Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
# Smalltalk by Mario I. Wolczko <mario@wolczko.com>
# implements McIlroy-Hunt diff algorithm
#
# This program is free software. It may be redistributed and/or modified under
# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
# Ruby licence.
#
# $Id: string.rb,v 1.3 2004/08/08 20:33:09 austin Exp $
#++
# Includes Diff::LCS into String.
class String
include Diff::LCS
end

View file

@ -1,11 +1,9 @@
require 'diff'
require 'xhtmldiff'
# Temporary class containing all rendering stuff from a Revision
# I want to shift all rendering loguc to the controller eventually
class PageRenderer
include HTMLDiff
def self.setup_url_generator(url_generator)
@@url_generator = url_generator
end
@ -41,8 +39,22 @@ class PageRenderer
def display_diff
previous_revision = @revision.page.previous_revision(@revision)
if previous_revision
rendered_previous_revision = WikiContent.new(previous_revision, @@url_generator).render!
diff(rendered_previous_revision, display_content)
previous_content = "<div>\n" + WikiContent.new(previous_revision, @@url_generator).render!.to_s + "\n</div>"
current_content = "<div>\n" + display_content.to_s + "\n</div>"
diff_doc = REXML::Document.new
diff_doc << (div = REXML::Element.new 'div')
hd = XHTMLDiff.new(div)
parsed_previous_revision = REXML::HashableElementDelegator.new(
REXML::XPath.first(REXML::Document.new(previous_content), '/div'))
parsed_display_content = REXML::HashableElementDelegator.new(
REXML::XPath.first(REXML::Document.new(current_content), '/div'))
Diff::LCS.traverse_balanced(parsed_previous_revision, parsed_display_content, hd)
diffs = ''
diff_doc.write(diffs, -1, true, true)
diffs
else
display_content
end

179
lib/xhtmldiff.rb Normal file
View file

@ -0,0 +1,179 @@
#!/usr/bin/env ruby
# Author: Aredridel <aredridel@nbtsc.org>
# Website: http://theinternetco.net/projects/ruby/xhtmldiff.html
# Licence: same as Ruby
# Version: 1.22
#
# Tweaks by Jacques Distler <distler@golem.ph.utexas.edu>
# -- add classnames to <del> and <ins> elements added by XHTMLDiff,
# for better CSS styling
require 'diff/lcs'
require 'rexml/document'
require 'delegate'
def Math.max(a, b)
a > b ? a : b
end
module REXML
class Text
def deep_clone
clone
end
end
class HashableElementDelegator < DelegateClass(Element)
def initialize(sub)
super sub
end
def == other
res = other.to_s.strip == self.to_s.strip
res
end
def eql? other
self == other
end
def[](k)
r = super
if r.kind_of? __getobj__.class
self.class.new(r)
else
r
end
end
def hash
r = __getobj__.to_s.hash
r
end
end
end
class XHTMLDiff
include REXML
attr_accessor :output
class << self
BLOCK_CONTAINERS = ['div', 'ul', 'li']
def diff(a, b)
if a == b
return a.deep_clone
end
if REXML::HashableElementDelegator === a and REXML::HashableElementDelegator === b
o = REXML::Element.new(a.name)
o.add_attributes a.attributes
hd = self.new(o)
Diff::LCS.traverse_balanced(a, b, hd)
o
elsif REXML::Text === a and REXML::Text === b
o = REXML::Element.new('span')
aa = a.value.split(/\s/)
ba = b.value.split(/\s/)
hd = XHTMLTextDiff.new(o)
Diff::LCS.traverse_balanced(aa, ba, hd)
o
else
raise ArgumentError.new("both arguments must be equal or both be elements. a is #{a.class.name} and b is #{b.class.name}")
end
end
end
def diff(a, b)
self.class.diff(a,b)
end
def initialize(output)
@output = output
end
# This will be called with both elements are the same
def match(event)
@output << event.old_element.deep_clone if event.old_element
end
# This will be called when there is an element in A that isn't in B
def discard_a(event)
@output << wrap(event.old_element, 'del', 'diffdel')
end
def change(event)
begin
sd = diff(event.old_element, event.new_element)
rescue ArgumentError
sd = nil
end
if sd and (ratio = (Float(rs = sd.to_s.gsub(%r{<(ins|del)>.*</\1>}, '').size) / bs = Math.max(event.old_element.to_s.size, event.new_element.to_s.size))) > 0.5
@output << sd
else
@output << wrap(event.old_element, 'del', 'diffmod')
@output << wrap(event.new_element, 'ins', 'diffmod')
end
end
# This will be called when there is an element in B that isn't in A
def discard_b(event)
@output << wrap(event.new_element, 'ins', 'diffins')
end
def choose_event(event, element, tag)
end
def wrap(element, tag = nil, class_name = nil)
if tag
el = Element.new tag
el << element.deep_clone
else
el = element.deep_clone
end
if class_name
el.add_attribute('class', class_name)
end
el
end
class XHTMLTextDiff < XHTMLDiff
def change(event)
@output << wrap(event.old_element, 'del', 'diffmod')
@output << wrap(event.new_element, 'ins', 'diffmod')
end
# This will be called with both elements are the same
def match(event)
@output << wrap(event.old_element, nil, nil) if event.old_element
end
# This will be called when there is an element in A that isn't in B
def discard_a(event)
@output << wrap(event.old_element, 'del', 'diffdel')
end
# This will be called when there is an element in B that isn't in A
def discard_b(event)
@output << wrap(event.new_element, 'ins', 'diffins')
end
def wrap(element, tag = nil, class_name = nil)
element = REXML::Text.new(" " << element) if String === element
return element unless tag
wrapper_element = REXML::Element.new(tag)
wrapper_element.add_text element
if class_name
wrapper_element.add_attribute('class', class_name)
end
wrapper_element
end
end
end
if $0 == __FILE__
$stderr.puts "No tests available yet"
exit(1)
end

View file

@ -1,110 +1,94 @@
#!/usr/bin/env ruby
require File.expand_path(File.dirname(__FILE__) + '/../test_helper')
require 'diff'
require 'xhtmldiff'
class DiffTest < Test::Unit::TestCase
include HTMLDiff
def setup
@builder = DiffBuilder.new('old', 'new')
end
def test_start_of_tag
assert @builder.start_of_tag?('<')
assert(!@builder.start_of_tag?('>'))
assert(!@builder.start_of_tag?('a'))
end
def test_end_of_tag
assert @builder.end_of_tag?('>')
assert(!@builder.end_of_tag?('<'))
assert(!@builder.end_of_tag?('a'))
end
def test_whitespace
assert @builder.whitespace?(" ")
assert @builder.whitespace?("\n")
assert @builder.whitespace?("\r")
assert(!@builder.whitespace?("a"))
end
def test_convert_html_to_list_of_words_simple
assert_equal(
['the', ' ', 'original', ' ', 'text'],
@builder.convert_html_to_list_of_words('the original text'))
end
def test_convert_html_to_list_of_words_should_separate_endlines
assert_equal(
['a', "\n", 'b', "\r", 'c'],
@builder.convert_html_to_list_of_words("a\nb\rc"))
end
def test_convert_html_to_list_of_words_should_not_compress_whitespace
assert_equal(
['a', ' ', 'b', ' ', 'c', "\r \n ", 'd'],
@builder.convert_html_to_list_of_words("a b c\r \n d"))
end
def test_convert_html_to_list_of_words_should_handle_tags_well
assert_equal(
['<p>', 'foo', ' ', 'bar', '</p>'],
@builder.convert_html_to_list_of_words("<p>foo bar</p>"))
end
def test_convert_html_to_list_of_words_interesting
assert_equal(
['<p>', 'this', ' ', 'is', '</p>', "\r\n", '<p>', 'the', ' ', 'new', ' ', 'string',
'</p>', "\r\n", '<p>', 'around', ' ', 'the', ' ', 'world', '</p>'],
@builder.convert_html_to_list_of_words(
"<p>this is</p>\r\n<p>the new string</p>\r\n<p>around the world</p>"))
def diff(a,b)
diff_doc = REXML::Document.new
diff_doc << (div = REXML::Element.new 'div' )
hd = XHTMLDiff.new(div)
parsed_a = REXML::HashableElementDelegator.new(
REXML::XPath.first(REXML::Document.new("<div>"+a+"</div>"), '/div'))
parsed_b = REXML::HashableElementDelegator.new(
REXML::XPath.first(REXML::Document.new("<div>"+b+"</div>"), '/div'))
Diff::LCS.traverse_balanced(parsed_a, parsed_b, hd)
diffs = ''
diff_doc.write(diffs, -1, true, true)
diffs
end
def test_html_diff_simple
a = 'this was the original string'
b = 'this is the new string'
assert_equal('this <del class="diffmod">was</del><ins class="diffmod">is</ins> the ' +
'<del class="diffmod">original</del><ins class="diffmod">new</ins> string',
diff(a, b))
assert_equal("<div><span> this<del class='diffmod'> was</del><ins class='diffmod'> is</ins> the" +
"<del class='diffmod'> original</del><ins class='diffmod'> new</ins> string</span></div>",
diff(a, b))
end
def test_html_diff_with_multiple_paragraphs
a = "<p>this was the original string</p>"
b = "<p>this is</p>\r\n<p> the new string</p>\r\n<p>around the world</p>"
# Some of this expected result is accidental to implementation.
# At least it's well-formed and more or less correct.
b = "<p>this is</p>\n<p> the new string</p>\n<p>around the world</p>"
assert_equal(
"<p>this <del class=\"diffmod\">was</del><ins class=\"diffmod\">is</ins></p>"+
"<ins class=\"diffmod\">\r\n</ins><p> the " +
"<del class=\"diffmod\">original</del><ins class=\"diffmod\">new</ins>" +
" string</p><ins class=\"diffins\">\r\n</ins>" +
"<p><ins class=\"diffins\">around the world</ins></p>",
"<div><p><span> this<del class='diffmod'> was</del><ins class='diffmod'> is</ins>" +
"<del class='diffdel'> the</del><del class='diffdel'> original</del><del class='diffdel'> string</del></span></p>" +
"<ins class='diffins'>\n</ins><ins class='diffins'><p> the new string</p></ins>" +
"<ins class='diffins'>\n</ins><ins class='diffins'><p>around the world</p></ins></div>",
diff(a, b))
end
def test_split_paragraph_into_two
a = "<p>foo bar</p>"
b = "<p>foo</p><p>bar</p>"
assert_equal(
"<div><p><span> foo<del class='diffdel'> bar</del></span></p>" +
"<ins class='diffins'><p>bar</p></ins></div>",
diff(a,b))
end
def test_join_two_paragraphs_into_one
a = "<p>foo</p><p>bar</p>"
b = "<p>foo bar</p>"
assert_equal(
"<div><p><span> foo<ins class='diffins'> bar</ins></span></p>" +
"<del class='diffdel'><p>bar</p></del></div>",
diff(a,b))
end
def test_add_inline_element
a = "<p>foo bar</p>"
b = "<p>foo <b>bar</b></p>"
assert_equal(
"<div><p><span> foo<del class='diffdel'> bar</del></span>" +
"<ins class='diffins'><b>bar</b></ins></p></div>",
diff(a,b))
end
# FIXME this test fails (ticket #67, http://dev.instiki.org/ticket/67)
def test_html_diff_preserves_endlines_in_pre
a = "<pre>\na\nb\nc\n</pre>"
b = "<pre>\n</pre>"
a = "<pre>a\nb\nc\n</pre>"
b = "<pre>a\n</pre>"
assert_equal(
"<pre>\n<del class=\"diffdel\">a\nb\nc\n</del></pre>",
"<div><pre><span> a\n<del class='diffdel'>b\nc\n</del></span></pre></div>",
diff(a, b))
end
def test_html_diff_with_tags
a = ""
b = "<div>foo</div>"
assert_equal '<div><ins class="diffins">foo</ins></div>', diff(a, b)
assert_equal "<div><ins class='diffins'><div>foo</div></ins></div>", diff(a, b)
end
def test_diff_for_tag_change
a = "<a>x</a>"
b = "<b>x</b>"
# FIXME sad, but true - this case produces an invalid XML. If handle this you can, strong your foo is.
assert_equal '<a><b>x</a></b>', diff(a, b)
assert_equal "<div><del class='diffdel'><a>x</a></del><ins class='diffins'><b>x</b></ins></div>", diff(a, b)
end
end