Replaced diff.rb with xhtmldiff.rb, which (unlike its predecessor) produces well-formed redline documents.
This commit is contained in:
parent
86e9c70a26
commit
8c52f28864
12 changed files with 2420 additions and 391 deletions
316
lib/diff.rb
316
lib/diff.rb
|
@ -1,316 +0,0 @@
|
||||||
module HTMLDiff
|
|
||||||
|
|
||||||
Match = Struct.new(:start_in_old, :start_in_new, :size)
|
|
||||||
class Match
|
|
||||||
def end_in_old
|
|
||||||
self.start_in_old + self.size
|
|
||||||
end
|
|
||||||
|
|
||||||
def end_in_new
|
|
||||||
self.start_in_new + self.size
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
Operation = Struct.new(:action, :start_in_old, :end_in_old, :start_in_new, :end_in_new)
|
|
||||||
|
|
||||||
class DiffBuilder
|
|
||||||
|
|
||||||
def initialize(old_version, new_version)
|
|
||||||
@old_version, @new_version = old_version, new_version
|
|
||||||
@content = []
|
|
||||||
end
|
|
||||||
|
|
||||||
def build
|
|
||||||
split_inputs_to_words
|
|
||||||
index_new_words
|
|
||||||
operations.each { |op| perform_operation(op) }
|
|
||||||
return @content.join
|
|
||||||
end
|
|
||||||
|
|
||||||
def split_inputs_to_words
|
|
||||||
@old_words = convert_html_to_list_of_words(explode(@old_version))
|
|
||||||
@new_words = convert_html_to_list_of_words(explode(@new_version))
|
|
||||||
end
|
|
||||||
|
|
||||||
def index_new_words
|
|
||||||
@word_indices = Hash.new { |h, word| h[word] = [] }
|
|
||||||
@new_words.each_with_index { |word, i| @word_indices[word] << i }
|
|
||||||
end
|
|
||||||
|
|
||||||
def operations
|
|
||||||
position_in_old = position_in_new = 0
|
|
||||||
operations = []
|
|
||||||
|
|
||||||
matches = matching_blocks
|
|
||||||
# an empty match at the end forces the loop below to handle the unmatched tails
|
|
||||||
# I'm sure it can be done more gracefully, but not at 23:52
|
|
||||||
matches << Match.new(@old_words.length, @new_words.length, 0)
|
|
||||||
|
|
||||||
matches.each_with_index do |match, i|
|
|
||||||
match_starts_at_current_position_in_old = (position_in_old == match.start_in_old)
|
|
||||||
match_starts_at_current_position_in_new = (position_in_new == match.start_in_new)
|
|
||||||
|
|
||||||
action_upto_match_positions =
|
|
||||||
case [match_starts_at_current_position_in_old, match_starts_at_current_position_in_new]
|
|
||||||
when [false, false]
|
|
||||||
:replace
|
|
||||||
when [true, false]
|
|
||||||
:insert
|
|
||||||
when [false, true]
|
|
||||||
:delete
|
|
||||||
else
|
|
||||||
# this happens if the first few words are same in both versions
|
|
||||||
:none
|
|
||||||
end
|
|
||||||
|
|
||||||
if action_upto_match_positions != :none
|
|
||||||
operation_upto_match_positions =
|
|
||||||
Operation.new(action_upto_match_positions,
|
|
||||||
position_in_old, match.start_in_old,
|
|
||||||
position_in_new, match.start_in_new)
|
|
||||||
operations << operation_upto_match_positions
|
|
||||||
end
|
|
||||||
if match.size != 0
|
|
||||||
match_operation = Operation.new(:equal,
|
|
||||||
match.start_in_old, match.end_in_old,
|
|
||||||
match.start_in_new, match.end_in_new)
|
|
||||||
operations << match_operation
|
|
||||||
end
|
|
||||||
|
|
||||||
position_in_old = match.end_in_old
|
|
||||||
position_in_new = match.end_in_new
|
|
||||||
end
|
|
||||||
|
|
||||||
operations
|
|
||||||
end
|
|
||||||
|
|
||||||
def matching_blocks
|
|
||||||
matching_blocks = []
|
|
||||||
recursively_find_matching_blocks(0, @old_words.size, 0, @new_words.size, matching_blocks)
|
|
||||||
matching_blocks
|
|
||||||
end
|
|
||||||
|
|
||||||
def recursively_find_matching_blocks(start_in_old, end_in_old, start_in_new, end_in_new, matching_blocks)
|
|
||||||
match = find_match(start_in_old, end_in_old, start_in_new, end_in_new)
|
|
||||||
if match
|
|
||||||
if start_in_old < match.start_in_old and start_in_new < match.start_in_new
|
|
||||||
recursively_find_matching_blocks(
|
|
||||||
start_in_old, match.start_in_old, start_in_new, match.start_in_new, matching_blocks)
|
|
||||||
end
|
|
||||||
matching_blocks << match
|
|
||||||
if match.end_in_old < end_in_old and match.end_in_new < end_in_new
|
|
||||||
recursively_find_matching_blocks(
|
|
||||||
match.end_in_old, end_in_old, match.end_in_new, end_in_new, matching_blocks)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def find_match(start_in_old, end_in_old, start_in_new, end_in_new)
|
|
||||||
|
|
||||||
best_match_in_old = start_in_old
|
|
||||||
best_match_in_new = start_in_new
|
|
||||||
best_match_size = 0
|
|
||||||
|
|
||||||
match_length_at = Hash.new { |h, index| h[index] = 0 }
|
|
||||||
|
|
||||||
start_in_old.upto(end_in_old - 1) do |index_in_old|
|
|
||||||
|
|
||||||
new_match_length_at = Hash.new { |h, index| h[index] = 0 }
|
|
||||||
|
|
||||||
@word_indices[@old_words[index_in_old]].each do |index_in_new|
|
|
||||||
next if index_in_new < start_in_new
|
|
||||||
break if index_in_new >= end_in_new
|
|
||||||
|
|
||||||
new_match_length = match_length_at[index_in_new - 1] + 1
|
|
||||||
new_match_length_at[index_in_new] = new_match_length
|
|
||||||
|
|
||||||
if new_match_length > best_match_size
|
|
||||||
best_match_in_old = index_in_old - new_match_length + 1
|
|
||||||
best_match_in_new = index_in_new - new_match_length + 1
|
|
||||||
best_match_size = new_match_length
|
|
||||||
end
|
|
||||||
end
|
|
||||||
match_length_at = new_match_length_at
|
|
||||||
end
|
|
||||||
|
|
||||||
# best_match_in_old, best_match_in_new, best_match_size = add_matching_words_left(
|
|
||||||
# best_match_in_old, best_match_in_new, best_match_size, start_in_old, start_in_new)
|
|
||||||
# best_match_in_old, best_match_in_new, match_size = add_matching_words_right(
|
|
||||||
# best_match_in_old, best_match_in_new, best_match_size, end_in_old, end_in_new)
|
|
||||||
|
|
||||||
return (best_match_size != 0 ? Match.new(best_match_in_old, best_match_in_new, best_match_size) : nil)
|
|
||||||
end
|
|
||||||
|
|
||||||
def add_matching_words_left(match_in_old, match_in_new, match_size, start_in_old, start_in_new)
|
|
||||||
while match_in_old > start_in_old and
|
|
||||||
match_in_new > start_in_new and
|
|
||||||
@old_words[match_in_old - 1] == @new_words[match_in_new - 1]
|
|
||||||
match_in_old -= 1
|
|
||||||
match_in_new -= 1
|
|
||||||
match_size += 1
|
|
||||||
end
|
|
||||||
[match_in_old, match_in_new, match_size]
|
|
||||||
end
|
|
||||||
|
|
||||||
def add_matching_words_right(match_in_old, match_in_new, match_size, end_in_old, end_in_new)
|
|
||||||
while match_in_old + match_size < end_in_old and
|
|
||||||
match_in_new + match_size < end_in_new and
|
|
||||||
@old_words[match_in_old + match_size] == @new_words[match_in_new + match_size]
|
|
||||||
match_size += 1
|
|
||||||
end
|
|
||||||
[match_in_old, match_in_new, match_size]
|
|
||||||
end
|
|
||||||
|
|
||||||
VALID_METHODS = [:replace, :insert, :delete, :equal]
|
|
||||||
|
|
||||||
def perform_operation(operation)
|
|
||||||
@operation = operation
|
|
||||||
self.send operation.action, operation
|
|
||||||
end
|
|
||||||
|
|
||||||
def replace(operation)
|
|
||||||
delete(operation, 'diffmod')
|
|
||||||
insert(operation, 'diffmod')
|
|
||||||
end
|
|
||||||
|
|
||||||
def insert(operation, tagclass = 'diffins')
|
|
||||||
insert_tag('ins', tagclass, @new_words[operation.start_in_new...operation.end_in_new])
|
|
||||||
end
|
|
||||||
|
|
||||||
def delete(operation, tagclass = 'diffdel')
|
|
||||||
insert_tag('del', tagclass, @old_words[operation.start_in_old...operation.end_in_old])
|
|
||||||
end
|
|
||||||
|
|
||||||
def equal(operation)
|
|
||||||
# no tags to insert, simply copy the matching words from one of the versions
|
|
||||||
@content += @new_words[operation.start_in_new...operation.end_in_new]
|
|
||||||
end
|
|
||||||
|
|
||||||
def opening_tag?(item)
|
|
||||||
item =~ %r!^\s*<[^>]+>\s*$!
|
|
||||||
end
|
|
||||||
|
|
||||||
def closing_tag?(item)
|
|
||||||
item =~ %r!^\s*</[^>]+>\s*$!
|
|
||||||
end
|
|
||||||
|
|
||||||
def tag?(item)
|
|
||||||
opening_tag?(item) or closing_tag?(item)
|
|
||||||
end
|
|
||||||
|
|
||||||
def extract_consecutive_words(words, &condition)
|
|
||||||
index_of_first_tag = nil
|
|
||||||
words.each_with_index do |word, i|
|
|
||||||
if !condition.call(word)
|
|
||||||
index_of_first_tag = i
|
|
||||||
break
|
|
||||||
end
|
|
||||||
end
|
|
||||||
if index_of_first_tag
|
|
||||||
return words.slice!(0...index_of_first_tag)
|
|
||||||
else
|
|
||||||
return words.slice!(0..words.length)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# This method encloses words within a specified tag (ins or del), and adds this into @content,
|
|
||||||
# with a twist: if there are words contain tags, it actually creates multiple ins or del,
|
|
||||||
# so that they don't include any ins or del. This handles cases like
|
|
||||||
# old: '<p>a</p>'
|
|
||||||
# new: '<p>ab</p><p>c</b>'
|
|
||||||
# diff result: '<p>a<ins>b</ins></p><p><ins>c</ins></p>'
|
|
||||||
# this still doesn't guarantee valid HTML (hint: think about diffing a text containing ins or
|
|
||||||
# del tags), but handles correctly more cases than the earlier version.
|
|
||||||
#
|
|
||||||
# P.S.: Spare a thought for people who write HTML browsers. They live in this ... every day.
|
|
||||||
|
|
||||||
def insert_tag(tagname, cssclass, words)
|
|
||||||
loop do
|
|
||||||
break if words.empty?
|
|
||||||
non_tags = extract_consecutive_words(words) { |word| not tag?(word) }
|
|
||||||
@content << wrap_text(non_tags.join, tagname, cssclass) unless non_tags.empty?
|
|
||||||
|
|
||||||
break if words.empty?
|
|
||||||
@content += extract_consecutive_words(words) { |word| tag?(word) }
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def wrap_text(text, tagname, cssclass)
|
|
||||||
%(<#{tagname} class="#{cssclass}">#{text}</#{tagname}>)
|
|
||||||
end
|
|
||||||
|
|
||||||
def explode(sequence)
|
|
||||||
sequence.is_a?(String) ? sequence.split(//) : sequence
|
|
||||||
end
|
|
||||||
|
|
||||||
def end_of_tag?(char)
|
|
||||||
char == '>'
|
|
||||||
end
|
|
||||||
|
|
||||||
def start_of_tag?(char)
|
|
||||||
char == '<'
|
|
||||||
end
|
|
||||||
|
|
||||||
def whitespace?(char)
|
|
||||||
char =~ /\s/
|
|
||||||
end
|
|
||||||
|
|
||||||
def convert_html_to_list_of_words(x, use_brackets = false)
|
|
||||||
mode = :char
|
|
||||||
current_word = ''
|
|
||||||
words = []
|
|
||||||
|
|
||||||
explode(x).each do |char|
|
|
||||||
case mode
|
|
||||||
when :tag
|
|
||||||
if end_of_tag? char
|
|
||||||
current_word << (use_brackets ? ']' : '>')
|
|
||||||
words << current_word
|
|
||||||
current_word = ''
|
|
||||||
if whitespace?(char)
|
|
||||||
mode = :whitespace
|
|
||||||
else
|
|
||||||
mode = :char
|
|
||||||
end
|
|
||||||
else
|
|
||||||
current_word << char
|
|
||||||
end
|
|
||||||
when :char
|
|
||||||
if start_of_tag? char
|
|
||||||
words << current_word unless current_word.empty?
|
|
||||||
current_word = (use_brackets ? '[' : '<')
|
|
||||||
mode = :tag
|
|
||||||
elsif /\s/.match char
|
|
||||||
words << current_word unless current_word.empty?
|
|
||||||
current_word = char
|
|
||||||
mode = :whitespace
|
|
||||||
else
|
|
||||||
current_word << char
|
|
||||||
end
|
|
||||||
when :whitespace
|
|
||||||
if start_of_tag? char
|
|
||||||
words << current_word unless current_word.empty?
|
|
||||||
current_word = (use_brackets ? '[' : '<')
|
|
||||||
mode = :tag
|
|
||||||
elsif /\s/.match char
|
|
||||||
current_word << char
|
|
||||||
else
|
|
||||||
words << current_word unless current_word.empty?
|
|
||||||
current_word = char
|
|
||||||
mode = :char
|
|
||||||
end
|
|
||||||
else
|
|
||||||
raise "Unknown mode #{mode.inspect}"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
words << current_word unless current_word.empty?
|
|
||||||
words
|
|
||||||
end
|
|
||||||
|
|
||||||
end # of class Diff Builder
|
|
||||||
|
|
||||||
def diff(a, b)
|
|
||||||
DiffBuilder.new(a, b).build
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
|
1105
lib/diff/lcs.rb
Normal file
1105
lib/diff/lcs.rb
Normal file
File diff suppressed because it is too large
Load diff
21
lib/diff/lcs/array.rb
Normal file
21
lib/diff/lcs/array.rb
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
#! /usr/env/bin ruby
|
||||||
|
#--
|
||||||
|
# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
|
||||||
|
# adapted from:
|
||||||
|
# Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
|
||||||
|
# Smalltalk by Mario I. Wolczko <mario@wolczko.com>
|
||||||
|
# implements McIlroy-Hunt diff algorithm
|
||||||
|
#
|
||||||
|
# This program is free software. It may be redistributed and/or modified under
|
||||||
|
# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
|
||||||
|
# Ruby licence.
|
||||||
|
#
|
||||||
|
# $Id: array.rb,v 1.3 2004/08/08 20:33:09 austin Exp $
|
||||||
|
#++
|
||||||
|
# Includes Diff::LCS into the Array built-in class.
|
||||||
|
|
||||||
|
require 'diff/lcs'
|
||||||
|
|
||||||
|
class Array
|
||||||
|
include Diff::LCS
|
||||||
|
end
|
51
lib/diff/lcs/block.rb
Normal file
51
lib/diff/lcs/block.rb
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
#! /usr/env/bin ruby
|
||||||
|
#--
|
||||||
|
# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
|
||||||
|
# adapted from:
|
||||||
|
# Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
|
||||||
|
# Smalltalk by Mario I. Wolczko <mario@wolczko.com>
|
||||||
|
# implements McIlroy-Hunt diff algorithm
|
||||||
|
#
|
||||||
|
# This program is free software. It may be redistributed and/or modified under
|
||||||
|
# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
|
||||||
|
# Ruby licence.
|
||||||
|
#
|
||||||
|
# $Id: block.rb,v 1.3 2004/08/08 20:33:09 austin Exp $
|
||||||
|
#++
|
||||||
|
# Contains Diff::LCS::Block for bin/ldiff.
|
||||||
|
|
||||||
|
# A block is an operation removing, adding, or changing a group of items.
|
||||||
|
# Basically, this is just a list of changes, where each change adds or
|
||||||
|
# deletes a single item. Used by bin/ldiff.
|
||||||
|
class Diff::LCS::Block
|
||||||
|
attr_reader :changes, :insert, :remove
|
||||||
|
|
||||||
|
def initialize(chunk)
|
||||||
|
@changes = []
|
||||||
|
@insert = []
|
||||||
|
@remove = []
|
||||||
|
|
||||||
|
chunk.each do |item|
|
||||||
|
@changes << item
|
||||||
|
@remove << item if item.deleting?
|
||||||
|
@insert << item if item.adding?
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def diff_size
|
||||||
|
@insert.size - @remove.size
|
||||||
|
end
|
||||||
|
|
||||||
|
def op
|
||||||
|
case [@remove.empty?, @insert.empty?]
|
||||||
|
when [false, false]
|
||||||
|
'!'
|
||||||
|
when [false, true]
|
||||||
|
'-'
|
||||||
|
when [true, false]
|
||||||
|
'+'
|
||||||
|
else # [true, true]
|
||||||
|
'^'
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
322
lib/diff/lcs/callbacks.rb
Normal file
322
lib/diff/lcs/callbacks.rb
Normal file
|
@ -0,0 +1,322 @@
|
||||||
|
#! /usr/env/bin ruby
|
||||||
|
#--
|
||||||
|
# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
|
||||||
|
# adapted from:
|
||||||
|
# Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
|
||||||
|
# Smalltalk by Mario I. Wolczko <mario@wolczko.com>
|
||||||
|
# implements McIlroy-Hunt diff algorithm
|
||||||
|
#
|
||||||
|
# This program is free software. It may be redistributed and/or modified under
|
||||||
|
# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
|
||||||
|
# Ruby licence.
|
||||||
|
#
|
||||||
|
# $Id: callbacks.rb,v 1.4 2004/09/14 18:51:26 austin Exp $
|
||||||
|
#++
|
||||||
|
# Contains definitions for all default callback objects.
|
||||||
|
|
||||||
|
require 'diff/lcs/change'
|
||||||
|
|
||||||
|
module Diff::LCS
|
||||||
|
# This callback object implements the default set of callback events, which
|
||||||
|
# only returns the event itself. Note that #finished_a and #finished_b are
|
||||||
|
# not implemented -- I haven't yet figured out where they would be useful.
|
||||||
|
#
|
||||||
|
# Note that this is intended to be called as is, e.g.,
|
||||||
|
#
|
||||||
|
# Diff::LCS.LCS(seq1, seq2, Diff::LCS::DefaultCallbacks)
|
||||||
|
class DefaultCallbacks
|
||||||
|
class << self
|
||||||
|
# Called when two items match.
|
||||||
|
def match(event)
|
||||||
|
event
|
||||||
|
end
|
||||||
|
# Called when the old value is discarded in favour of the new value.
|
||||||
|
def discard_a(event)
|
||||||
|
event
|
||||||
|
end
|
||||||
|
# Called when the new value is discarded in favour of the old value.
|
||||||
|
def discard_b(event)
|
||||||
|
event
|
||||||
|
end
|
||||||
|
# Called when both the old and new values have changed.
|
||||||
|
def change(event)
|
||||||
|
event
|
||||||
|
end
|
||||||
|
|
||||||
|
private :new
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# An alias for DefaultCallbacks that is used in Diff::LCS#traverse_sequences.
|
||||||
|
#
|
||||||
|
# Diff::LCS.LCS(seq1, seq2, Diff::LCS::SequenceCallbacks)
|
||||||
|
SequenceCallbacks = DefaultCallbacks
|
||||||
|
# An alias for DefaultCallbacks that is used in Diff::LCS#traverse_balanced.
|
||||||
|
#
|
||||||
|
# Diff::LCS.LCS(seq1, seq2, Diff::LCS::BalancedCallbacks)
|
||||||
|
BalancedCallbacks = DefaultCallbacks
|
||||||
|
end
|
||||||
|
|
||||||
|
# This will produce a compound array of simple diff change objects. Each
|
||||||
|
# element in the #diffs array is a +hunk+ or +hunk+ array, where each
|
||||||
|
# element in each +hunk+ array is a single Change object representing the
|
||||||
|
# addition or removal of a single element from one of the two tested
|
||||||
|
# sequences. The +hunk+ provides the full context for the changes.
|
||||||
|
#
|
||||||
|
# diffs = Diff::LCS.diff(seq1, seq2)
|
||||||
|
# # This example shows a simplified array format.
|
||||||
|
# # [ [ [ '-', 0, 'a' ] ], # 1
|
||||||
|
# # [ [ '+', 2, 'd' ] ], # 2
|
||||||
|
# # [ [ '-', 4, 'h' ], # 3
|
||||||
|
# # [ '+', 4, 'f' ] ],
|
||||||
|
# # [ [ '+', 6, 'k' ] ], # 4
|
||||||
|
# # [ [ '-', 8, 'n' ], # 5
|
||||||
|
# # [ '-', 9, 'p' ],
|
||||||
|
# # [ '+', 9, 'r' ],
|
||||||
|
# # [ '+', 10, 's' ],
|
||||||
|
# # [ '+', 11, 't' ] ] ]
|
||||||
|
#
|
||||||
|
# There are five hunks here. The first hunk says that the +a+ at position 0
|
||||||
|
# of the first sequence should be deleted (<tt>'-'</tt>). The second hunk
|
||||||
|
# says that the +d+ at position 2 of the second sequence should be inserted
|
||||||
|
# (<tt>'+'</tt>). The third hunk says that the +h+ at position 4 of the
|
||||||
|
# first sequence should be removed and replaced with the +f+ from position 4
|
||||||
|
# of the second sequence. The other two hunks are described similarly.
|
||||||
|
#
|
||||||
|
# === Use
|
||||||
|
# This callback object must be initialised and is used by the Diff::LCS#diff
|
||||||
|
# method.
|
||||||
|
#
|
||||||
|
# cbo = Diff::LCS::DiffCallbacks.new
|
||||||
|
# Diff::LCS.LCS(seq1, seq2, cbo)
|
||||||
|
# cbo.finish
|
||||||
|
#
|
||||||
|
# Note that the call to #finish is absolutely necessary, or the last set of
|
||||||
|
# changes will not be visible. Alternatively, can be used as:
|
||||||
|
#
|
||||||
|
# cbo = Diff::LCS::DiffCallbacks.new { |tcbo| Diff::LCS.LCS(seq1, seq2, tcbo) }
|
||||||
|
#
|
||||||
|
# The necessary #finish call will be made.
|
||||||
|
#
|
||||||
|
# === Simplified Array Format
|
||||||
|
# The simplified array format used in the example above can be obtained
|
||||||
|
# with:
|
||||||
|
#
|
||||||
|
# require 'pp'
|
||||||
|
# pp diffs.map { |e| e.map { |f| f.to_a } }
|
||||||
|
class Diff::LCS::DiffCallbacks
|
||||||
|
# Returns the difference set collected during the diff process.
|
||||||
|
attr_reader :diffs
|
||||||
|
|
||||||
|
def initialize # :yields self:
|
||||||
|
@hunk = []
|
||||||
|
@diffs = []
|
||||||
|
|
||||||
|
if block_given?
|
||||||
|
begin
|
||||||
|
yield self
|
||||||
|
ensure
|
||||||
|
self.finish
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Finalizes the diff process. If an unprocessed hunk still exists, then it
|
||||||
|
# is appended to the diff list.
|
||||||
|
def finish
|
||||||
|
add_nonempty_hunk
|
||||||
|
end
|
||||||
|
|
||||||
|
def match(event)
|
||||||
|
add_nonempty_hunk
|
||||||
|
end
|
||||||
|
|
||||||
|
def discard_a(event)
|
||||||
|
@hunk << Diff::LCS::Change.new('-', event.old_position, event.old_element)
|
||||||
|
end
|
||||||
|
|
||||||
|
def discard_b(event)
|
||||||
|
@hunk << Diff::LCS::Change.new('+', event.new_position, event.new_element)
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
def add_nonempty_hunk
|
||||||
|
@diffs << @hunk unless @hunk.empty?
|
||||||
|
@hunk = []
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# This will produce a compound array of contextual diff change objects. Each
|
||||||
|
# element in the #diffs array is a "hunk" array, where each element in each
|
||||||
|
# "hunk" array is a single change. Each change is a Diff::LCS::ContextChange
|
||||||
|
# that contains both the old index and new index values for the change. The
|
||||||
|
# "hunk" provides the full context for the changes. Both old and new objects
|
||||||
|
# will be presented for changed objects. +nil+ will be substituted for a
|
||||||
|
# discarded object.
|
||||||
|
#
|
||||||
|
# seq1 = %w(a b c e h j l m n p)
|
||||||
|
# seq2 = %w(b c d e f j k l m r s t)
|
||||||
|
#
|
||||||
|
# diffs = Diff::LCS.diff(seq1, seq2, Diff::LCS::ContextDiffCallbacks)
|
||||||
|
# # This example shows a simplified array format.
|
||||||
|
# # [ [ [ '-', [ 0, 'a' ], [ 0, nil ] ] ], # 1
|
||||||
|
# # [ [ '+', [ 3, nil ], [ 2, 'd' ] ] ], # 2
|
||||||
|
# # [ [ '-', [ 4, 'h' ], [ 4, nil ] ], # 3
|
||||||
|
# # [ '+', [ 5, nil ], [ 4, 'f' ] ] ],
|
||||||
|
# # [ [ '+', [ 6, nil ], [ 6, 'k' ] ] ], # 4
|
||||||
|
# # [ [ '-', [ 8, 'n' ], [ 9, nil ] ], # 5
|
||||||
|
# # [ '+', [ 9, nil ], [ 9, 'r' ] ],
|
||||||
|
# # [ '-', [ 9, 'p' ], [ 10, nil ] ],
|
||||||
|
# # [ '+', [ 10, nil ], [ 10, 's' ] ],
|
||||||
|
# # [ '+', [ 10, nil ], [ 11, 't' ] ] ] ]
|
||||||
|
#
|
||||||
|
# The five hunks shown are comprised of individual changes; if there is a
|
||||||
|
# related set of changes, they are still shown individually.
|
||||||
|
#
|
||||||
|
# This callback can also be used with Diff::LCS#sdiff, which will produce
|
||||||
|
# results like:
|
||||||
|
#
|
||||||
|
# diffs = Diff::LCS.sdiff(seq1, seq2, Diff::LCS::ContextCallbacks)
|
||||||
|
# # This example shows a simplified array format.
|
||||||
|
# # [ [ [ "-", [ 0, "a" ], [ 0, nil ] ] ], # 1
|
||||||
|
# # [ [ "+", [ 3, nil ], [ 2, "d" ] ] ], # 2
|
||||||
|
# # [ [ "!", [ 4, "h" ], [ 4, "f" ] ] ], # 3
|
||||||
|
# # [ [ "+", [ 6, nil ], [ 6, "k" ] ] ], # 4
|
||||||
|
# # [ [ "!", [ 8, "n" ], [ 9, "r" ] ], # 5
|
||||||
|
# # [ "!", [ 9, "p" ], [ 10, "s" ] ],
|
||||||
|
# # [ "+", [ 10, nil ], [ 11, "t" ] ] ] ]
|
||||||
|
#
|
||||||
|
# The five hunks are still present, but are significantly shorter in total
|
||||||
|
# presentation, because changed items are shown as changes ("!") instead of
|
||||||
|
# potentially "mismatched" pairs of additions and deletions.
|
||||||
|
#
|
||||||
|
# The result of this operation is similar to that of
|
||||||
|
# Diff::LCS::SDiffCallbacks. They may be compared as:
|
||||||
|
#
|
||||||
|
# s = Diff::LCS.sdiff(seq1, seq2).reject { |e| e.action == "=" }
|
||||||
|
# c = Diff::LCS.sdiff(seq1, seq2, Diff::LCS::ContextDiffCallbacks).flatten
|
||||||
|
#
|
||||||
|
# s == c # -> true
|
||||||
|
#
|
||||||
|
# === Use
|
||||||
|
# This callback object must be initialised and can be used by the
|
||||||
|
# Diff::LCS#diff or Diff::LCS#sdiff methods.
|
||||||
|
#
|
||||||
|
# cbo = Diff::LCS::ContextDiffCallbacks.new
|
||||||
|
# Diff::LCS.LCS(seq1, seq2, cbo)
|
||||||
|
# cbo.finish
|
||||||
|
#
|
||||||
|
# Note that the call to #finish is absolutely necessary, or the last set of
|
||||||
|
# changes will not be visible. Alternatively, can be used as:
|
||||||
|
#
|
||||||
|
# cbo = Diff::LCS::ContextDiffCallbacks.new { |tcbo| Diff::LCS.LCS(seq1, seq2, tcbo) }
|
||||||
|
#
|
||||||
|
# The necessary #finish call will be made.
|
||||||
|
#
|
||||||
|
# === Simplified Array Format
|
||||||
|
# The simplified array format used in the example above can be obtained
|
||||||
|
# with:
|
||||||
|
#
|
||||||
|
# require 'pp'
|
||||||
|
# pp diffs.map { |e| e.map { |f| f.to_a } }
|
||||||
|
class Diff::LCS::ContextDiffCallbacks < Diff::LCS::DiffCallbacks
|
||||||
|
def discard_a(event)
|
||||||
|
@hunk << Diff::LCS::ContextChange.simplify(event)
|
||||||
|
end
|
||||||
|
|
||||||
|
def discard_b(event)
|
||||||
|
@hunk << Diff::LCS::ContextChange.simplify(event)
|
||||||
|
end
|
||||||
|
|
||||||
|
def change(event)
|
||||||
|
@hunk << Diff::LCS::ContextChange.simplify(event)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# This will produce a simple array of diff change objects. Each element in
|
||||||
|
# the #diffs array is a single ContextChange. In the set of #diffs provided
|
||||||
|
# by SDiffCallbacks, both old and new objects will be presented for both
|
||||||
|
# changed <strong>and unchanged</strong> objects. +nil+ will be substituted
|
||||||
|
# for a discarded object.
|
||||||
|
#
|
||||||
|
# The diffset produced by this callback, when provided to Diff::LCS#sdiff,
|
||||||
|
# will compute and display the necessary components to show two sequences
|
||||||
|
# and their minimized differences side by side, just like the Unix utility
|
||||||
|
# +sdiff+.
|
||||||
|
#
|
||||||
|
# same same
|
||||||
|
# before | after
|
||||||
|
# old < -
|
||||||
|
# - > new
|
||||||
|
#
|
||||||
|
# seq1 = %w(a b c e h j l m n p)
|
||||||
|
# seq2 = %w(b c d e f j k l m r s t)
|
||||||
|
#
|
||||||
|
# diffs = Diff::LCS.sdiff(seq1, seq2)
|
||||||
|
# # This example shows a simplified array format.
|
||||||
|
# # [ [ "-", [ 0, "a"], [ 0, nil ] ],
|
||||||
|
# # [ "=", [ 1, "b"], [ 0, "b" ] ],
|
||||||
|
# # [ "=", [ 2, "c"], [ 1, "c" ] ],
|
||||||
|
# # [ "+", [ 3, nil], [ 2, "d" ] ],
|
||||||
|
# # [ "=", [ 3, "e"], [ 3, "e" ] ],
|
||||||
|
# # [ "!", [ 4, "h"], [ 4, "f" ] ],
|
||||||
|
# # [ "=", [ 5, "j"], [ 5, "j" ] ],
|
||||||
|
# # [ "+", [ 6, nil], [ 6, "k" ] ],
|
||||||
|
# # [ "=", [ 6, "l"], [ 7, "l" ] ],
|
||||||
|
# # [ "=", [ 7, "m"], [ 8, "m" ] ],
|
||||||
|
# # [ "!", [ 8, "n"], [ 9, "r" ] ],
|
||||||
|
# # [ "!", [ 9, "p"], [ 10, "s" ] ],
|
||||||
|
# # [ "+", [ 10, nil], [ 11, "t" ] ] ]
|
||||||
|
#
|
||||||
|
# The result of this operation is similar to that of
|
||||||
|
# Diff::LCS::ContextDiffCallbacks. They may be compared as:
|
||||||
|
#
|
||||||
|
# s = Diff::LCS.sdiff(seq1, seq2).reject { |e| e.action == "=" }
|
||||||
|
# c = Diff::LCS.sdiff(seq1, seq2, Diff::LCS::ContextDiffCallbacks).flatten
|
||||||
|
#
|
||||||
|
# s == c # -> true
|
||||||
|
#
|
||||||
|
# === Use
|
||||||
|
# This callback object must be initialised and is used by the Diff::LCS#sdiff
|
||||||
|
# method.
|
||||||
|
#
|
||||||
|
# cbo = Diff::LCS::SDiffCallbacks.new
|
||||||
|
# Diff::LCS.LCS(seq1, seq2, cbo)
|
||||||
|
#
|
||||||
|
# As with the other initialisable callback objects, Diff::LCS::SDiffCallbacks
|
||||||
|
# can be initialised with a block. As there is no "fininishing" to be done,
|
||||||
|
# this has no effect on the state of the object.
|
||||||
|
#
|
||||||
|
# cbo = Diff::LCS::SDiffCallbacks.new { |tcbo| Diff::LCS.LCS(seq1, seq2, tcbo) }
|
||||||
|
#
|
||||||
|
# === Simplified Array Format
|
||||||
|
# The simplified array format used in the example above can be obtained
|
||||||
|
# with:
|
||||||
|
#
|
||||||
|
# require 'pp'
|
||||||
|
# pp diffs.map { |e| e.to_a }
|
||||||
|
class Diff::LCS::SDiffCallbacks
|
||||||
|
# Returns the difference set collected during the diff process.
|
||||||
|
attr_reader :diffs
|
||||||
|
|
||||||
|
def initialize #:yields self:
|
||||||
|
@diffs = []
|
||||||
|
yield self if block_given?
|
||||||
|
end
|
||||||
|
|
||||||
|
def match(event)
|
||||||
|
@diffs << Diff::LCS::ContextChange.simplify(event)
|
||||||
|
end
|
||||||
|
|
||||||
|
def discard_a(event)
|
||||||
|
@diffs << Diff::LCS::ContextChange.simplify(event)
|
||||||
|
end
|
||||||
|
|
||||||
|
def discard_b(event)
|
||||||
|
@diffs << Diff::LCS::ContextChange.simplify(event)
|
||||||
|
end
|
||||||
|
|
||||||
|
def change(event)
|
||||||
|
@diffs << Diff::LCS::ContextChange.simplify(event)
|
||||||
|
end
|
||||||
|
end
|
169
lib/diff/lcs/change.rb
Normal file
169
lib/diff/lcs/change.rb
Normal file
|
@ -0,0 +1,169 @@
|
||||||
|
#! /usr/env/bin ruby
|
||||||
|
#--
|
||||||
|
# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
|
||||||
|
# adapted from:
|
||||||
|
# Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
|
||||||
|
# Smalltalk by Mario I. Wolczko <mario@wolczko.com>
|
||||||
|
# implements McIlroy-Hunt diff algorithm
|
||||||
|
#
|
||||||
|
# This program is free software. It may be redistributed and/or modified under
|
||||||
|
# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
|
||||||
|
# Ruby licence.
|
||||||
|
#
|
||||||
|
# $Id: change.rb,v 1.4 2004/08/08 20:33:09 austin Exp $
|
||||||
|
#++
|
||||||
|
# Provides Diff::LCS::Change and Diff::LCS::ContextChange.
|
||||||
|
|
||||||
|
# Centralises the change test code in Diff::LCS::Change and
|
||||||
|
# Diff::LCS::ContextChange, since it's the same for both classes.
|
||||||
|
module Diff::LCS::ChangeTypeTests
|
||||||
|
def deleting?
|
||||||
|
@action == '-'
|
||||||
|
end
|
||||||
|
|
||||||
|
def adding?
|
||||||
|
@action == '+'
|
||||||
|
end
|
||||||
|
|
||||||
|
def unchanged?
|
||||||
|
@action == '='
|
||||||
|
end
|
||||||
|
|
||||||
|
def changed?
|
||||||
|
@changed == '!'
|
||||||
|
end
|
||||||
|
|
||||||
|
def finished_a?
|
||||||
|
@changed == '>'
|
||||||
|
end
|
||||||
|
|
||||||
|
def finished_b?
|
||||||
|
@changed == '<'
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Represents a simplistic (non-contextual) change. Represents the removal or
|
||||||
|
# addition of an element from either the old or the new sequenced enumerable.
|
||||||
|
class Diff::LCS::Change
|
||||||
|
# Returns the action this Change represents. Can be '+' (#adding?), '-'
|
||||||
|
# (#deleting?), '=' (#unchanged?), # or '!' (#changed?). When created by
|
||||||
|
# Diff::LCS#diff or Diff::LCS#sdiff, it may also be '>' (#finished_a?) or
|
||||||
|
# '<' (#finished_b?).
|
||||||
|
attr_reader :action
|
||||||
|
attr_reader :position
|
||||||
|
attr_reader :element
|
||||||
|
|
||||||
|
include Comparable
|
||||||
|
def ==(other)
|
||||||
|
(self.action == other.action) and
|
||||||
|
(self.position == other.position) and
|
||||||
|
(self.element == other.element)
|
||||||
|
end
|
||||||
|
|
||||||
|
def <=>(other)
|
||||||
|
r = self.action <=> other.action
|
||||||
|
r = self.position <=> other.position if r.zero?
|
||||||
|
r = self.element <=> other.element if r.zero?
|
||||||
|
r
|
||||||
|
end
|
||||||
|
|
||||||
|
def initialize(action, position, element)
|
||||||
|
@action = action
|
||||||
|
@position = position
|
||||||
|
@element = element
|
||||||
|
end
|
||||||
|
|
||||||
|
# Creates a Change from an array produced by Change#to_a.
|
||||||
|
def to_a
|
||||||
|
[@action, @position, @element]
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.from_a(arr)
|
||||||
|
Diff::LCS::Change.new(arr[0], arr[1], arr[2])
|
||||||
|
end
|
||||||
|
|
||||||
|
include Diff::LCS::ChangeTypeTests
|
||||||
|
end
|
||||||
|
|
||||||
|
# Represents a contextual change. Contains the position and values of the
|
||||||
|
# elements in the old and the new sequenced enumerables as well as the action
|
||||||
|
# taken.
|
||||||
|
class Diff::LCS::ContextChange
|
||||||
|
# Returns the action this Change represents. Can be '+' (#adding?), '-'
|
||||||
|
# (#deleting?), '=' (#unchanged?), # or '!' (#changed?). When
|
||||||
|
# created by Diff::LCS#diff or Diff::LCS#sdiff, it may also be '>'
|
||||||
|
# (#finished_a?) or '<' (#finished_b?).
|
||||||
|
attr_reader :action
|
||||||
|
attr_reader :old_position
|
||||||
|
attr_reader :old_element
|
||||||
|
attr_reader :new_position
|
||||||
|
attr_reader :new_element
|
||||||
|
|
||||||
|
include Comparable
|
||||||
|
|
||||||
|
def ==(other)
|
||||||
|
(@action == other.action) and
|
||||||
|
(@old_position == other.old_position) and
|
||||||
|
(@new_position == other.new_position) and
|
||||||
|
(@old_element == other.old_element) and
|
||||||
|
(@new_element == other.new_element)
|
||||||
|
end
|
||||||
|
|
||||||
|
def inspect(*args)
|
||||||
|
%Q(#<#{self.class.name}:#{__id__} @action=#{action} positions=#{old_position},#{new_position} elements=#{old_element.inspect},#{new_element.inspect}>)
|
||||||
|
end
|
||||||
|
|
||||||
|
def <=>(other)
|
||||||
|
r = @action <=> other.action
|
||||||
|
r = @old_position <=> other.old_position if r.zero?
|
||||||
|
r = @new_position <=> other.new_position if r.zero?
|
||||||
|
r = @old_element <=> other.old_element if r.zero?
|
||||||
|
r = @new_element <=> other.new_element if r.zero?
|
||||||
|
r
|
||||||
|
end
|
||||||
|
|
||||||
|
def initialize(action, old_position, old_element, new_position, new_element)
|
||||||
|
@action = action
|
||||||
|
@old_position = old_position
|
||||||
|
@old_element = old_element
|
||||||
|
@new_position = new_position
|
||||||
|
@new_element = new_element
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_a
|
||||||
|
[@action, [@old_position, @old_element], [@new_position, @new_element]]
|
||||||
|
end
|
||||||
|
|
||||||
|
# Creates a ContextChange from an array produced by ContextChange#to_a.
|
||||||
|
def self.from_a(arr)
|
||||||
|
if arr.size == 5
|
||||||
|
Diff::LCS::ContextChange.new(arr[0], arr[1], arr[2], arr[3], arr[4])
|
||||||
|
else
|
||||||
|
Diff::LCS::ContextChange.new(arr[0], arr[1][0], arr[1][1], arr[2][0],
|
||||||
|
arr[2][1])
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Simplifies a context change for use in some diff callbacks. '<' actions
|
||||||
|
# are converted to '-' and '>' actions are converted to '+'.
|
||||||
|
def self.simplify(event)
|
||||||
|
ea = event.to_a
|
||||||
|
|
||||||
|
case ea[0]
|
||||||
|
when '-'
|
||||||
|
ea[2][1] = nil
|
||||||
|
when '<'
|
||||||
|
ea[0] = '-'
|
||||||
|
ea[2][1] = nil
|
||||||
|
when '+'
|
||||||
|
ea[1][1] = nil
|
||||||
|
when '>'
|
||||||
|
ea[0] = '+'
|
||||||
|
ea[1][1] = nil
|
||||||
|
end
|
||||||
|
|
||||||
|
Diff::LCS::ContextChange.from_a(ea)
|
||||||
|
end
|
||||||
|
|
||||||
|
include Diff::LCS::ChangeTypeTests
|
||||||
|
end
|
257
lib/diff/lcs/hunk.rb
Normal file
257
lib/diff/lcs/hunk.rb
Normal file
|
@ -0,0 +1,257 @@
|
||||||
|
#! /usr/env/bin ruby
|
||||||
|
#--
|
||||||
|
# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
|
||||||
|
# adapted from:
|
||||||
|
# Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
|
||||||
|
# Smalltalk by Mario I. Wolczko <mario@wolczko.com>
|
||||||
|
# implements McIlroy-Hunt diff algorithm
|
||||||
|
#
|
||||||
|
# This program is free software. It may be redistributed and/or modified under
|
||||||
|
# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
|
||||||
|
# Ruby licence.
|
||||||
|
#
|
||||||
|
# $Id: hunk.rb,v 1.2 2004/08/08 20:33:09 austin Exp $
|
||||||
|
#++
|
||||||
|
# Contains Diff::LCS::Hunk for bin/ldiff.
|
||||||
|
|
||||||
|
require 'diff/lcs/block'
|
||||||
|
|
||||||
|
# A Hunk is a group of Blocks which overlap because of the context
|
||||||
|
# surrounding each block. (So if we're not using context, every hunk will
|
||||||
|
# contain one block.) Used in the diff program (bin/diff).
|
||||||
|
class Diff::LCS::Hunk
|
||||||
|
# Create a hunk using references to both the old and new data, as well as
|
||||||
|
# the piece of data
|
||||||
|
def initialize(data_old, data_new, piece, context, file_length_difference)
|
||||||
|
# At first, a hunk will have just one Block in it
|
||||||
|
@blocks = [ Diff::LCS::Block.new(piece) ]
|
||||||
|
@data_old = data_old
|
||||||
|
@data_new = data_new
|
||||||
|
|
||||||
|
before = after = file_length_difference
|
||||||
|
after += @blocks[0].diff_size
|
||||||
|
@file_length_difference = after # The caller must get this manually
|
||||||
|
|
||||||
|
# Save the start & end of each array. If the array doesn't exist
|
||||||
|
# (e.g., we're only adding items in this block), then figure out the
|
||||||
|
# line number based on the line number of the other file and the
|
||||||
|
# current difference in file lengths.
|
||||||
|
if @blocks[0].remove.empty?
|
||||||
|
a1 = a2 = nil
|
||||||
|
else
|
||||||
|
a1 = @blocks[0].remove[0].position
|
||||||
|
a2 = @blocks[0].remove[-1].position
|
||||||
|
end
|
||||||
|
|
||||||
|
if @blocks[0].insert.empty?
|
||||||
|
b1 = b2 = nil
|
||||||
|
else
|
||||||
|
b1 = @blocks[0].insert[0].position
|
||||||
|
b2 = @blocks[0].insert[-1].position
|
||||||
|
end
|
||||||
|
|
||||||
|
@start_old = a1 || (b1 - before)
|
||||||
|
@start_new = b1 || (a1 + before)
|
||||||
|
@end_old = a2 || (b2 - after)
|
||||||
|
@end_new = b2 || (a2 + after)
|
||||||
|
|
||||||
|
self.flag_context = context
|
||||||
|
end
|
||||||
|
|
||||||
|
attr_reader :blocks
|
||||||
|
attr_reader :start_old, :start_new
|
||||||
|
attr_reader :end_old, :end_new
|
||||||
|
attr_reader :file_length_difference
|
||||||
|
|
||||||
|
# Change the "start" and "end" fields to note that context should be added
|
||||||
|
# to this hunk
|
||||||
|
attr_accessor :flag_context
|
||||||
|
def flag_context=(context) #:nodoc:
|
||||||
|
return if context.nil? or context.zero?
|
||||||
|
|
||||||
|
add_start = (context > @start_old) ? @start_old : context
|
||||||
|
@start_old -= add_start
|
||||||
|
@start_new -= add_start
|
||||||
|
|
||||||
|
if (@end_old + context) > @data_old.size
|
||||||
|
add_end = @data_old.size - @end_old
|
||||||
|
else
|
||||||
|
add_end = context
|
||||||
|
end
|
||||||
|
@end_old += add_end
|
||||||
|
@end_new += add_end
|
||||||
|
end
|
||||||
|
|
||||||
|
def unshift(hunk)
|
||||||
|
@start_old = hunk.start_old
|
||||||
|
@start_new = hunk.start_new
|
||||||
|
blocks.unshift(*hunk.blocks)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Is there an overlap between hunk arg0 and old hunk arg1? Note: if end
|
||||||
|
# of old hunk is one less than beginning of second, they overlap
|
||||||
|
def overlaps?(hunk = nil)
|
||||||
|
return nil if hunk.nil?
|
||||||
|
|
||||||
|
a = (@start_old - hunk.end_old) <= 1
|
||||||
|
b = (@start_new - hunk.end_new) <= 1
|
||||||
|
return (a or b)
|
||||||
|
end
|
||||||
|
|
||||||
|
def diff(format)
|
||||||
|
case format
|
||||||
|
when :old
|
||||||
|
old_diff
|
||||||
|
when :unified
|
||||||
|
unified_diff
|
||||||
|
when :context
|
||||||
|
context_diff
|
||||||
|
when :ed
|
||||||
|
self
|
||||||
|
when :reverse_ed, :ed_finish
|
||||||
|
ed_diff(format)
|
||||||
|
else
|
||||||
|
raise "Unknown diff format #{format}."
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def each_old(block)
|
||||||
|
@data_old[@start_old .. @end_old].each { |e| yield e }
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
# Note that an old diff can't have any context. Therefore, we know that
|
||||||
|
# there's only one block in the hunk.
|
||||||
|
def old_diff
|
||||||
|
warn "Expecting only one block in an old diff hunk!" if @blocks.size > 1
|
||||||
|
op_act = { "+" => 'a', "-" => 'd', "!" => "c" }
|
||||||
|
|
||||||
|
block = @blocks[0]
|
||||||
|
|
||||||
|
# Calculate item number range. Old diff range is just like a context
|
||||||
|
# diff range, except the ranges are on one line with the action between
|
||||||
|
# them.
|
||||||
|
s = "#{context_range(:old)}#{op_act[block.op]}#{context_range(:new)}\n"
|
||||||
|
# If removing anything, just print out all the remove lines in the hunk
|
||||||
|
# which is just all the remove lines in the block.
|
||||||
|
@data_old[@start_old .. @end_old].each { |e| s << "< #{e}\n" } unless block.remove.empty?
|
||||||
|
s << "---\n" if block.op == "!"
|
||||||
|
@data_new[@start_new .. @end_new].each { |e| s << "> #{e}\n" } unless block.insert.empty?
|
||||||
|
s
|
||||||
|
end
|
||||||
|
|
||||||
|
def unified_diff
|
||||||
|
# Calculate item number range.
|
||||||
|
s = "@@ -#{unified_range(:old)} +#{unified_range(:new)} @@\n"
|
||||||
|
|
||||||
|
# Outlist starts containing the hunk of the old file. Removing an item
|
||||||
|
# just means putting a '-' in front of it. Inserting an item requires
|
||||||
|
# getting it from the new file and splicing it in. We splice in
|
||||||
|
# +num_added+ items. Remove blocks use +num_added+ because splicing
|
||||||
|
# changed the length of outlist.
|
||||||
|
#
|
||||||
|
# We remove +num_removed+ items. Insert blocks use +num_removed+
|
||||||
|
# because their item numbers -- corresponding to positions in the NEW
|
||||||
|
# file -- don't take removed items into account.
|
||||||
|
lo, hi, num_added, num_removed = @start_old, @end_old, 0, 0
|
||||||
|
|
||||||
|
outlist = @data_old[lo .. hi].collect { |e| e.gsub(/^/, ' ') }
|
||||||
|
|
||||||
|
@blocks.each do |block|
|
||||||
|
block.remove.each do |item|
|
||||||
|
op = item.action.to_s # -
|
||||||
|
offset = item.position - lo + num_added
|
||||||
|
outlist[offset].gsub!(/^ /, op.to_s)
|
||||||
|
num_removed += 1
|
||||||
|
end
|
||||||
|
block.insert.each do |item|
|
||||||
|
op = item.action.to_s # +
|
||||||
|
offset = item.position - @start_new + num_removed
|
||||||
|
outlist[offset, 0] = "#{op}#{@data_new[item.position]}"
|
||||||
|
num_added += 1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
s << outlist.join("\n")
|
||||||
|
end
|
||||||
|
|
||||||
|
def context_diff
|
||||||
|
s = "***************\n"
|
||||||
|
s << "*** #{context_range(:old)} ****\n"
|
||||||
|
r = context_range(:new)
|
||||||
|
|
||||||
|
# Print out file 1 part for each block in context diff format if there
|
||||||
|
# are any blocks that remove items
|
||||||
|
lo, hi = @start_old, @end_old
|
||||||
|
removes = @blocks.select { |e| not e.remove.empty? }
|
||||||
|
if removes
|
||||||
|
outlist = @data_old[lo .. hi].collect { |e| e.gsub(/^/, ' ') }
|
||||||
|
removes.each do |block|
|
||||||
|
block.remove.each do |item|
|
||||||
|
outlist[item.position - lo].gsub!(/^ /) { block.op } # - or !
|
||||||
|
end
|
||||||
|
end
|
||||||
|
s << outlist.join("\n")
|
||||||
|
end
|
||||||
|
|
||||||
|
s << "\n--- #{r} ----\n"
|
||||||
|
lo, hi = @start_new, @end_new
|
||||||
|
inserts = @blocks.select { |e| not e.insert.empty? }
|
||||||
|
if inserts
|
||||||
|
outlist = @data_new[lo .. hi].collect { |e| e.gsub(/^/, ' ') }
|
||||||
|
inserts.each do |block|
|
||||||
|
block.insert.each do |item|
|
||||||
|
outlist[item.position - lo].gsub!(/^ /) { block.op } # + or !
|
||||||
|
end
|
||||||
|
end
|
||||||
|
s << outlist.join("\n")
|
||||||
|
end
|
||||||
|
s
|
||||||
|
end
|
||||||
|
|
||||||
|
def ed_diff(format)
|
||||||
|
op_act = { "+" => 'a', "-" => 'd', "!" => "c" }
|
||||||
|
warn "Expecting only one block in an old diff hunk!" if @blocks.size > 1
|
||||||
|
|
||||||
|
if format == :reverse_ed
|
||||||
|
s = "#{op_act[@blocks[0].op]}#{context_range(:old)}\n"
|
||||||
|
else
|
||||||
|
s = "#{context_range(:old).gsub(/,/, ' ')}#{op_act[@blocks[0].op]}\n"
|
||||||
|
end
|
||||||
|
|
||||||
|
unless @blocks[0].insert.empty?
|
||||||
|
@data_new[@start_new .. @end_new].each { |e| s << "#{e}\n" }
|
||||||
|
s << ".\n"
|
||||||
|
end
|
||||||
|
s
|
||||||
|
end
|
||||||
|
|
||||||
|
# Generate a range of item numbers to print. Only print 1 number if the
|
||||||
|
# range has only one item in it. Otherwise, it's 'start,end'
|
||||||
|
def context_range(mode)
|
||||||
|
case mode
|
||||||
|
when :old
|
||||||
|
s, e = (@start_old + 1), (@end_old + 1)
|
||||||
|
when :new
|
||||||
|
s, e = (@start_new + 1), (@end_new + 1)
|
||||||
|
end
|
||||||
|
|
||||||
|
(s < e) ? "#{s},#{e}" : "#{e}"
|
||||||
|
end
|
||||||
|
|
||||||
|
# Generate a range of item numbers to print for unified diff. Print
|
||||||
|
# number where block starts, followed by number of lines in the block
|
||||||
|
# (don't print number of lines if it's 1)
|
||||||
|
def unified_range(mode)
|
||||||
|
case mode
|
||||||
|
when :old
|
||||||
|
s, e = (@start_old + 1), (@end_old + 1)
|
||||||
|
when :new
|
||||||
|
s, e = (@start_new + 1), (@end_new + 1)
|
||||||
|
end
|
||||||
|
|
||||||
|
length = e - s + 1
|
||||||
|
first = (length < 2) ? e : s # "strange, but correct"
|
||||||
|
(length == 1) ? "#{first}" : "#{first},#{length}"
|
||||||
|
end
|
||||||
|
end
|
226
lib/diff/lcs/ldiff.rb
Normal file
226
lib/diff/lcs/ldiff.rb
Normal file
|
@ -0,0 +1,226 @@
|
||||||
|
#!/usr/bin/env ruby
|
||||||
|
|
||||||
|
require 'optparse'
|
||||||
|
require 'ostruct'
|
||||||
|
require 'diff/lcs/hunk'
|
||||||
|
|
||||||
|
# == ldiff Usage
|
||||||
|
# ldiff [options] oldfile newfile
|
||||||
|
#
|
||||||
|
# -c:: Displays a context diff with 3 lines of context.
|
||||||
|
# -C [LINES], --context [LINES]:: Displays a context diff with LINES lines of context. Default 3 lines.
|
||||||
|
# -u:: Displays a unified diff with 3 lines of context.
|
||||||
|
# -U [LINES], --unified [LINES]:: Displays a unified diff with LINES lines of context. Default 3 lines.
|
||||||
|
# -e:: Creates an 'ed' script to change oldfile to newfile.
|
||||||
|
# -f:: Creates an 'ed' script to change oldfile to newfile in reverse order.
|
||||||
|
# -a, --text:: Treats the files as text and compares them line-by-line, even if they do not seem to be text.
|
||||||
|
# --binary:: Treats the files as binary.
|
||||||
|
# -q, --brief:: Reports only whether or not the files differ, not the details.
|
||||||
|
# --help:: Shows the command-line help.
|
||||||
|
# --version:: Shows the version of Diff::LCS.
|
||||||
|
#
|
||||||
|
# By default, runs produces an "old-style" diff, with output like UNIX diff.
|
||||||
|
#
|
||||||
|
# == Copyright
|
||||||
|
# Copyright © 2004 Austin Ziegler
|
||||||
|
#
|
||||||
|
# Part of Diff::LCS <http://rubyforge.org/projects/ruwiki/>
|
||||||
|
# Austin Ziegler <diff-lcs@halostatue.ca>
|
||||||
|
#
|
||||||
|
# This program is free software. It may be redistributed and/or modified under
|
||||||
|
# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
|
||||||
|
# Ruby licence.
|
||||||
|
module Diff::LCS::Ldiff
|
||||||
|
BANNER = <<-COPYRIGHT
|
||||||
|
ldiff #{Diff::LCS::VERSION}
|
||||||
|
Copyright © 2004 Austin Ziegler
|
||||||
|
|
||||||
|
Part of Diff::LCS.
|
||||||
|
http://rubyforge.org/projects/ruwiki/
|
||||||
|
|
||||||
|
Austin Ziegler <diff-lcs@halostatue.ca>
|
||||||
|
|
||||||
|
This program is free software. It may be redistributed and/or modified under
|
||||||
|
the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
|
||||||
|
Ruby licence.
|
||||||
|
|
||||||
|
$Id: ldiff.rb,v 1.1 2004/09/26 01:37:49 austin Exp $
|
||||||
|
COPYRIGHT
|
||||||
|
|
||||||
|
class << self
|
||||||
|
attr_reader :format, :lines #:nodoc:
|
||||||
|
attr_reader :file_old, :file_new #:nodoc:
|
||||||
|
attr_reader :data_old, :data_new #:nodoc:
|
||||||
|
|
||||||
|
def run(args, input = $stdin, output = $stdout, error = $stderr) #:nodoc:
|
||||||
|
args.options do |o|
|
||||||
|
o.banner = "Usage: #{File.basename($0)} [options] oldfile newfile"
|
||||||
|
o.separator ""
|
||||||
|
o.on('-c',
|
||||||
|
'Displays a context diff with 3 lines of',
|
||||||
|
'context.') do |ctx|
|
||||||
|
@format = :context
|
||||||
|
@lines = 3
|
||||||
|
end
|
||||||
|
o.on('-C', '--context [LINES]', Numeric,
|
||||||
|
'Displays a context diff with LINES lines',
|
||||||
|
'of context. Default 3 lines.') do |ctx|
|
||||||
|
@format = :context
|
||||||
|
@lines = ctx || 3
|
||||||
|
end
|
||||||
|
o.on('-u',
|
||||||
|
'Displays a unified diff with 3 lines of',
|
||||||
|
'context.') do |ctx|
|
||||||
|
@format = :unified
|
||||||
|
@lines = 3
|
||||||
|
end
|
||||||
|
o.on('-U', '--unified [LINES]', Numeric,
|
||||||
|
'Displays a unified diff with LINES lines',
|
||||||
|
'of context. Default 3 lines.') do |ctx|
|
||||||
|
@format = :unified
|
||||||
|
@lines = ctx || 3
|
||||||
|
end
|
||||||
|
o.on('-e',
|
||||||
|
'Creates an \'ed\' script to change',
|
||||||
|
'oldfile to newfile.') do |ctx|
|
||||||
|
@format = :ed
|
||||||
|
end
|
||||||
|
o.on('-f',
|
||||||
|
'Creates an \'ed\' script to change',
|
||||||
|
'oldfile to newfile in reverse order.') do |ctx|
|
||||||
|
@format = :reverse_ed
|
||||||
|
end
|
||||||
|
o.on('-a', '--text',
|
||||||
|
'Treat the files as text and compare them',
|
||||||
|
'line-by-line, even if they do not seem',
|
||||||
|
'to be text.') do |txt|
|
||||||
|
@binary = false
|
||||||
|
end
|
||||||
|
o.on('--binary',
|
||||||
|
'Treats the files as binary.') do |bin|
|
||||||
|
@binary = true
|
||||||
|
end
|
||||||
|
o.on('-q', '--brief',
|
||||||
|
'Report only whether or not the files',
|
||||||
|
'differ, not the details.') do |ctx|
|
||||||
|
@format = :report
|
||||||
|
end
|
||||||
|
o.on_tail('--help', 'Shows this text.') do
|
||||||
|
error << o
|
||||||
|
return 0
|
||||||
|
end
|
||||||
|
o.on_tail('--version', 'Shows the version of Diff::LCS.') do
|
||||||
|
error << BANNER
|
||||||
|
return 0
|
||||||
|
end
|
||||||
|
o.on_tail ""
|
||||||
|
o.on_tail 'By default, runs produces an "old-style" diff, with output like UNIX diff.'
|
||||||
|
o.parse!
|
||||||
|
end
|
||||||
|
|
||||||
|
unless args.size == 2
|
||||||
|
error << args.options
|
||||||
|
return 127
|
||||||
|
end
|
||||||
|
|
||||||
|
# Defaults are for old-style diff
|
||||||
|
@format ||= :old
|
||||||
|
@lines ||= 0
|
||||||
|
|
||||||
|
file_old, file_new = *ARGV
|
||||||
|
|
||||||
|
case @format
|
||||||
|
when :context
|
||||||
|
char_old = '*' * 3
|
||||||
|
char_new = '-' * 3
|
||||||
|
when :unified
|
||||||
|
char_old = '-' * 3
|
||||||
|
char_new = '+' * 3
|
||||||
|
end
|
||||||
|
|
||||||
|
# After we've read up to a certain point in each file, the number of
|
||||||
|
# items we've read from each file will differ by FLD (could be 0).
|
||||||
|
file_length_difference = 0
|
||||||
|
|
||||||
|
if @binary.nil? or @binary
|
||||||
|
data_old = IO::read(file_old)
|
||||||
|
data_new = IO::read(file_new)
|
||||||
|
|
||||||
|
# Test binary status
|
||||||
|
if @binary.nil?
|
||||||
|
old_txt = data_old[0...4096].grep(/\0/).empty?
|
||||||
|
new_txt = data_new[0...4096].grep(/\0/).empty?
|
||||||
|
@binary = (not old_txt) or (not new_txt)
|
||||||
|
old_txt = new_txt = nil
|
||||||
|
end
|
||||||
|
|
||||||
|
unless @binary
|
||||||
|
data_old = data_old.split(/\n/).map! { |e| e.chomp }
|
||||||
|
data_new = data_new.split(/\n/).map! { |e| e.chomp }
|
||||||
|
end
|
||||||
|
else
|
||||||
|
data_old = IO::readlines(file_old).map! { |e| e.chomp }
|
||||||
|
data_new = IO::readlines(file_new).map! { |e| e.chomp }
|
||||||
|
end
|
||||||
|
|
||||||
|
# diff yields lots of pieces, each of which is basically a Block object
|
||||||
|
if @binary
|
||||||
|
diffs = (data_old == data_new)
|
||||||
|
else
|
||||||
|
diffs = Diff::LCS.diff(data_old, data_new)
|
||||||
|
diffs = nil if diffs.empty?
|
||||||
|
end
|
||||||
|
|
||||||
|
return 0 unless diffs
|
||||||
|
|
||||||
|
if (@format == :report) and diffs
|
||||||
|
output << "Files #{file_old} and #{file_new} differ\n"
|
||||||
|
return 1
|
||||||
|
end
|
||||||
|
|
||||||
|
if (@format == :unified) or (@format == :context)
|
||||||
|
ft = File.stat(file_old).mtime.localtime.strftime('%Y-%m-%d %H:%M:%S %z')
|
||||||
|
puts "#{char_old} #{file_old}\t#{ft}"
|
||||||
|
ft = File.stat(file_new).mtime.localtime.strftime('%Y-%m-%d %H:%M:%S %z')
|
||||||
|
puts "#{char_new} #{file_new}\t#{ft}"
|
||||||
|
end
|
||||||
|
|
||||||
|
# Loop over hunks. If a hunk overlaps with the last hunk, join them.
|
||||||
|
# Otherwise, print out the old one.
|
||||||
|
oldhunk = hunk = nil
|
||||||
|
|
||||||
|
if @format == :ed
|
||||||
|
real_output = output
|
||||||
|
output = []
|
||||||
|
end
|
||||||
|
|
||||||
|
diffs.each do |piece|
|
||||||
|
begin
|
||||||
|
hunk = Diff::LCS::Hunk.new(data_old, data_new, piece, @lines,
|
||||||
|
file_length_difference)
|
||||||
|
file_length_difference = hunk.file_length_difference
|
||||||
|
|
||||||
|
next unless oldhunk
|
||||||
|
|
||||||
|
if (@lines > 0) and hunk.overlaps?(oldhunk)
|
||||||
|
hunk.unshift(oldhunk)
|
||||||
|
else
|
||||||
|
output << oldhunk.diff(@format)
|
||||||
|
end
|
||||||
|
ensure
|
||||||
|
oldhunk = hunk
|
||||||
|
output << "\n"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
output << oldhunk.diff(@format)
|
||||||
|
output << "\n"
|
||||||
|
|
||||||
|
if @format == :ed
|
||||||
|
output.reverse_each { |e| real_output << e.diff(:ed_finish) }
|
||||||
|
end
|
||||||
|
|
||||||
|
return 1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
19
lib/diff/lcs/string.rb
Normal file
19
lib/diff/lcs/string.rb
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
#! /usr/env/bin ruby
|
||||||
|
#--
|
||||||
|
# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
|
||||||
|
# adapted from:
|
||||||
|
# Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
|
||||||
|
# Smalltalk by Mario I. Wolczko <mario@wolczko.com>
|
||||||
|
# implements McIlroy-Hunt diff algorithm
|
||||||
|
#
|
||||||
|
# This program is free software. It may be redistributed and/or modified under
|
||||||
|
# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
|
||||||
|
# Ruby licence.
|
||||||
|
#
|
||||||
|
# $Id: string.rb,v 1.3 2004/08/08 20:33:09 austin Exp $
|
||||||
|
#++
|
||||||
|
# Includes Diff::LCS into String.
|
||||||
|
|
||||||
|
class String
|
||||||
|
include Diff::LCS
|
||||||
|
end
|
|
@ -1,11 +1,9 @@
|
||||||
require 'diff'
|
require 'xhtmldiff'
|
||||||
# Temporary class containing all rendering stuff from a Revision
|
# Temporary class containing all rendering stuff from a Revision
|
||||||
# I want to shift all rendering loguc to the controller eventually
|
# I want to shift all rendering loguc to the controller eventually
|
||||||
|
|
||||||
class PageRenderer
|
class PageRenderer
|
||||||
|
|
||||||
include HTMLDiff
|
|
||||||
|
|
||||||
def self.setup_url_generator(url_generator)
|
def self.setup_url_generator(url_generator)
|
||||||
@@url_generator = url_generator
|
@@url_generator = url_generator
|
||||||
end
|
end
|
||||||
|
@ -41,8 +39,22 @@ class PageRenderer
|
||||||
def display_diff
|
def display_diff
|
||||||
previous_revision = @revision.page.previous_revision(@revision)
|
previous_revision = @revision.page.previous_revision(@revision)
|
||||||
if previous_revision
|
if previous_revision
|
||||||
rendered_previous_revision = WikiContent.new(previous_revision, @@url_generator).render!
|
|
||||||
diff(rendered_previous_revision, display_content)
|
previous_content = "<div>\n" + WikiContent.new(previous_revision, @@url_generator).render!.to_s + "\n</div>"
|
||||||
|
current_content = "<div>\n" + display_content.to_s + "\n</div>"
|
||||||
|
diff_doc = REXML::Document.new
|
||||||
|
diff_doc << (div = REXML::Element.new 'div')
|
||||||
|
hd = XHTMLDiff.new(div)
|
||||||
|
|
||||||
|
parsed_previous_revision = REXML::HashableElementDelegator.new(
|
||||||
|
REXML::XPath.first(REXML::Document.new(previous_content), '/div'))
|
||||||
|
parsed_display_content = REXML::HashableElementDelegator.new(
|
||||||
|
REXML::XPath.first(REXML::Document.new(current_content), '/div'))
|
||||||
|
Diff::LCS.traverse_balanced(parsed_previous_revision, parsed_display_content, hd)
|
||||||
|
|
||||||
|
diffs = ''
|
||||||
|
diff_doc.write(diffs, -1, true, true)
|
||||||
|
diffs
|
||||||
else
|
else
|
||||||
display_content
|
display_content
|
||||||
end
|
end
|
||||||
|
|
179
lib/xhtmldiff.rb
Normal file
179
lib/xhtmldiff.rb
Normal file
|
@ -0,0 +1,179 @@
|
||||||
|
#!/usr/bin/env ruby
|
||||||
|
# Author: Aredridel <aredridel@nbtsc.org>
|
||||||
|
# Website: http://theinternetco.net/projects/ruby/xhtmldiff.html
|
||||||
|
# Licence: same as Ruby
|
||||||
|
# Version: 1.22
|
||||||
|
#
|
||||||
|
# Tweaks by Jacques Distler <distler@golem.ph.utexas.edu>
|
||||||
|
# -- add classnames to <del> and <ins> elements added by XHTMLDiff,
|
||||||
|
# for better CSS styling
|
||||||
|
|
||||||
|
require 'diff/lcs'
|
||||||
|
require 'rexml/document'
|
||||||
|
require 'delegate'
|
||||||
|
|
||||||
|
def Math.max(a, b)
|
||||||
|
a > b ? a : b
|
||||||
|
end
|
||||||
|
|
||||||
|
module REXML
|
||||||
|
|
||||||
|
class Text
|
||||||
|
def deep_clone
|
||||||
|
clone
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class HashableElementDelegator < DelegateClass(Element)
|
||||||
|
def initialize(sub)
|
||||||
|
super sub
|
||||||
|
end
|
||||||
|
def == other
|
||||||
|
res = other.to_s.strip == self.to_s.strip
|
||||||
|
res
|
||||||
|
end
|
||||||
|
|
||||||
|
def eql? other
|
||||||
|
self == other
|
||||||
|
end
|
||||||
|
|
||||||
|
def[](k)
|
||||||
|
r = super
|
||||||
|
if r.kind_of? __getobj__.class
|
||||||
|
self.class.new(r)
|
||||||
|
else
|
||||||
|
r
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def hash
|
||||||
|
r = __getobj__.to_s.hash
|
||||||
|
r
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
class XHTMLDiff
|
||||||
|
include REXML
|
||||||
|
attr_accessor :output
|
||||||
|
|
||||||
|
class << self
|
||||||
|
BLOCK_CONTAINERS = ['div', 'ul', 'li']
|
||||||
|
def diff(a, b)
|
||||||
|
if a == b
|
||||||
|
return a.deep_clone
|
||||||
|
end
|
||||||
|
if REXML::HashableElementDelegator === a and REXML::HashableElementDelegator === b
|
||||||
|
o = REXML::Element.new(a.name)
|
||||||
|
o.add_attributes a.attributes
|
||||||
|
hd = self.new(o)
|
||||||
|
Diff::LCS.traverse_balanced(a, b, hd)
|
||||||
|
o
|
||||||
|
elsif REXML::Text === a and REXML::Text === b
|
||||||
|
o = REXML::Element.new('span')
|
||||||
|
aa = a.value.split(/\s/)
|
||||||
|
ba = b.value.split(/\s/)
|
||||||
|
hd = XHTMLTextDiff.new(o)
|
||||||
|
Diff::LCS.traverse_balanced(aa, ba, hd)
|
||||||
|
o
|
||||||
|
else
|
||||||
|
raise ArgumentError.new("both arguments must be equal or both be elements. a is #{a.class.name} and b is #{b.class.name}")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def diff(a, b)
|
||||||
|
self.class.diff(a,b)
|
||||||
|
end
|
||||||
|
|
||||||
|
def initialize(output)
|
||||||
|
@output = output
|
||||||
|
end
|
||||||
|
|
||||||
|
# This will be called with both elements are the same
|
||||||
|
def match(event)
|
||||||
|
@output << event.old_element.deep_clone if event.old_element
|
||||||
|
end
|
||||||
|
|
||||||
|
# This will be called when there is an element in A that isn't in B
|
||||||
|
def discard_a(event)
|
||||||
|
@output << wrap(event.old_element, 'del', 'diffdel')
|
||||||
|
end
|
||||||
|
|
||||||
|
def change(event)
|
||||||
|
begin
|
||||||
|
sd = diff(event.old_element, event.new_element)
|
||||||
|
rescue ArgumentError
|
||||||
|
sd = nil
|
||||||
|
end
|
||||||
|
if sd and (ratio = (Float(rs = sd.to_s.gsub(%r{<(ins|del)>.*</\1>}, '').size) / bs = Math.max(event.old_element.to_s.size, event.new_element.to_s.size))) > 0.5
|
||||||
|
@output << sd
|
||||||
|
else
|
||||||
|
@output << wrap(event.old_element, 'del', 'diffmod')
|
||||||
|
@output << wrap(event.new_element, 'ins', 'diffmod')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# This will be called when there is an element in B that isn't in A
|
||||||
|
def discard_b(event)
|
||||||
|
@output << wrap(event.new_element, 'ins', 'diffins')
|
||||||
|
end
|
||||||
|
|
||||||
|
def choose_event(event, element, tag)
|
||||||
|
end
|
||||||
|
|
||||||
|
def wrap(element, tag = nil, class_name = nil)
|
||||||
|
if tag
|
||||||
|
el = Element.new tag
|
||||||
|
el << element.deep_clone
|
||||||
|
else
|
||||||
|
el = element.deep_clone
|
||||||
|
end
|
||||||
|
if class_name
|
||||||
|
el.add_attribute('class', class_name)
|
||||||
|
end
|
||||||
|
el
|
||||||
|
end
|
||||||
|
|
||||||
|
class XHTMLTextDiff < XHTMLDiff
|
||||||
|
def change(event)
|
||||||
|
@output << wrap(event.old_element, 'del', 'diffmod')
|
||||||
|
@output << wrap(event.new_element, 'ins', 'diffmod')
|
||||||
|
end
|
||||||
|
|
||||||
|
# This will be called with both elements are the same
|
||||||
|
def match(event)
|
||||||
|
@output << wrap(event.old_element, nil, nil) if event.old_element
|
||||||
|
end
|
||||||
|
|
||||||
|
# This will be called when there is an element in A that isn't in B
|
||||||
|
def discard_a(event)
|
||||||
|
@output << wrap(event.old_element, 'del', 'diffdel')
|
||||||
|
end
|
||||||
|
|
||||||
|
# This will be called when there is an element in B that isn't in A
|
||||||
|
def discard_b(event)
|
||||||
|
@output << wrap(event.new_element, 'ins', 'diffins')
|
||||||
|
end
|
||||||
|
|
||||||
|
def wrap(element, tag = nil, class_name = nil)
|
||||||
|
element = REXML::Text.new(" " << element) if String === element
|
||||||
|
return element unless tag
|
||||||
|
wrapper_element = REXML::Element.new(tag)
|
||||||
|
wrapper_element.add_text element
|
||||||
|
if class_name
|
||||||
|
wrapper_element.add_attribute('class', class_name)
|
||||||
|
end
|
||||||
|
wrapper_element
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
if $0 == __FILE__
|
||||||
|
|
||||||
|
$stderr.puts "No tests available yet"
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
end
|
|
@ -1,110 +1,94 @@
|
||||||
#!/usr/bin/env ruby
|
#!/usr/bin/env ruby
|
||||||
|
|
||||||
require File.expand_path(File.dirname(__FILE__) + '/../test_helper')
|
require File.expand_path(File.dirname(__FILE__) + '/../test_helper')
|
||||||
require 'diff'
|
require 'xhtmldiff'
|
||||||
|
|
||||||
class DiffTest < Test::Unit::TestCase
|
class DiffTest < Test::Unit::TestCase
|
||||||
|
|
||||||
include HTMLDiff
|
|
||||||
|
|
||||||
def setup
|
def setup
|
||||||
@builder = DiffBuilder.new('old', 'new')
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_start_of_tag
|
def diff(a,b)
|
||||||
assert @builder.start_of_tag?('<')
|
diff_doc = REXML::Document.new
|
||||||
assert(!@builder.start_of_tag?('>'))
|
diff_doc << (div = REXML::Element.new 'div' )
|
||||||
assert(!@builder.start_of_tag?('a'))
|
hd = XHTMLDiff.new(div)
|
||||||
end
|
parsed_a = REXML::HashableElementDelegator.new(
|
||||||
|
REXML::XPath.first(REXML::Document.new("<div>"+a+"</div>"), '/div'))
|
||||||
def test_end_of_tag
|
parsed_b = REXML::HashableElementDelegator.new(
|
||||||
assert @builder.end_of_tag?('>')
|
REXML::XPath.first(REXML::Document.new("<div>"+b+"</div>"), '/div'))
|
||||||
assert(!@builder.end_of_tag?('<'))
|
Diff::LCS.traverse_balanced(parsed_a, parsed_b, hd)
|
||||||
assert(!@builder.end_of_tag?('a'))
|
diffs = ''
|
||||||
end
|
diff_doc.write(diffs, -1, true, true)
|
||||||
|
diffs
|
||||||
def test_whitespace
|
|
||||||
assert @builder.whitespace?(" ")
|
|
||||||
assert @builder.whitespace?("\n")
|
|
||||||
assert @builder.whitespace?("\r")
|
|
||||||
assert(!@builder.whitespace?("a"))
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_convert_html_to_list_of_words_simple
|
|
||||||
assert_equal(
|
|
||||||
['the', ' ', 'original', ' ', 'text'],
|
|
||||||
@builder.convert_html_to_list_of_words('the original text'))
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_convert_html_to_list_of_words_should_separate_endlines
|
|
||||||
assert_equal(
|
|
||||||
['a', "\n", 'b', "\r", 'c'],
|
|
||||||
@builder.convert_html_to_list_of_words("a\nb\rc"))
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_convert_html_to_list_of_words_should_not_compress_whitespace
|
|
||||||
assert_equal(
|
|
||||||
['a', ' ', 'b', ' ', 'c', "\r \n ", 'd'],
|
|
||||||
@builder.convert_html_to_list_of_words("a b c\r \n d"))
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_convert_html_to_list_of_words_should_handle_tags_well
|
|
||||||
assert_equal(
|
|
||||||
['<p>', 'foo', ' ', 'bar', '</p>'],
|
|
||||||
@builder.convert_html_to_list_of_words("<p>foo bar</p>"))
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_convert_html_to_list_of_words_interesting
|
|
||||||
assert_equal(
|
|
||||||
['<p>', 'this', ' ', 'is', '</p>', "\r\n", '<p>', 'the', ' ', 'new', ' ', 'string',
|
|
||||||
'</p>', "\r\n", '<p>', 'around', ' ', 'the', ' ', 'world', '</p>'],
|
|
||||||
@builder.convert_html_to_list_of_words(
|
|
||||||
"<p>this is</p>\r\n<p>the new string</p>\r\n<p>around the world</p>"))
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_html_diff_simple
|
def test_html_diff_simple
|
||||||
a = 'this was the original string'
|
a = 'this was the original string'
|
||||||
b = 'this is the new string'
|
b = 'this is the new string'
|
||||||
assert_equal('this <del class="diffmod">was</del><ins class="diffmod">is</ins> the ' +
|
assert_equal("<div><span> this<del class='diffmod'> was</del><ins class='diffmod'> is</ins> the" +
|
||||||
'<del class="diffmod">original</del><ins class="diffmod">new</ins> string',
|
"<del class='diffmod'> original</del><ins class='diffmod'> new</ins> string</span></div>",
|
||||||
diff(a, b))
|
diff(a, b))
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_html_diff_with_multiple_paragraphs
|
def test_html_diff_with_multiple_paragraphs
|
||||||
a = "<p>this was the original string</p>"
|
a = "<p>this was the original string</p>"
|
||||||
b = "<p>this is</p>\r\n<p> the new string</p>\r\n<p>around the world</p>"
|
b = "<p>this is</p>\n<p> the new string</p>\n<p>around the world</p>"
|
||||||
|
|
||||||
# Some of this expected result is accidental to implementation.
|
|
||||||
# At least it's well-formed and more or less correct.
|
|
||||||
assert_equal(
|
assert_equal(
|
||||||
"<p>this <del class=\"diffmod\">was</del><ins class=\"diffmod\">is</ins></p>"+
|
"<div><p><span> this<del class='diffmod'> was</del><ins class='diffmod'> is</ins>" +
|
||||||
"<ins class=\"diffmod\">\r\n</ins><p> the " +
|
"<del class='diffdel'> the</del><del class='diffdel'> original</del><del class='diffdel'> string</del></span></p>" +
|
||||||
"<del class=\"diffmod\">original</del><ins class=\"diffmod\">new</ins>" +
|
"<ins class='diffins'>\n</ins><ins class='diffins'><p> the new string</p></ins>" +
|
||||||
" string</p><ins class=\"diffins\">\r\n</ins>" +
|
"<ins class='diffins'>\n</ins><ins class='diffins'><p>around the world</p></ins></div>",
|
||||||
"<p><ins class=\"diffins\">around the world</ins></p>",
|
|
||||||
diff(a, b))
|
diff(a, b))
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_split_paragraph_into_two
|
||||||
|
a = "<p>foo bar</p>"
|
||||||
|
b = "<p>foo</p><p>bar</p>"
|
||||||
|
assert_equal(
|
||||||
|
"<div><p><span> foo<del class='diffdel'> bar</del></span></p>" +
|
||||||
|
"<ins class='diffins'><p>bar</p></ins></div>",
|
||||||
|
diff(a,b))
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_join_two_paragraphs_into_one
|
||||||
|
a = "<p>foo</p><p>bar</p>"
|
||||||
|
b = "<p>foo bar</p>"
|
||||||
|
assert_equal(
|
||||||
|
"<div><p><span> foo<ins class='diffins'> bar</ins></span></p>" +
|
||||||
|
"<del class='diffdel'><p>bar</p></del></div>",
|
||||||
|
diff(a,b))
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_add_inline_element
|
||||||
|
a = "<p>foo bar</p>"
|
||||||
|
b = "<p>foo <b>bar</b></p>"
|
||||||
|
assert_equal(
|
||||||
|
"<div><p><span> foo<del class='diffdel'> bar</del></span>" +
|
||||||
|
"<ins class='diffins'><b>bar</b></ins></p></div>",
|
||||||
|
diff(a,b))
|
||||||
|
end
|
||||||
|
|
||||||
# FIXME this test fails (ticket #67, http://dev.instiki.org/ticket/67)
|
# FIXME this test fails (ticket #67, http://dev.instiki.org/ticket/67)
|
||||||
def test_html_diff_preserves_endlines_in_pre
|
def test_html_diff_preserves_endlines_in_pre
|
||||||
a = "<pre>\na\nb\nc\n</pre>"
|
a = "<pre>a\nb\nc\n</pre>"
|
||||||
b = "<pre>\n</pre>"
|
b = "<pre>a\n</pre>"
|
||||||
assert_equal(
|
assert_equal(
|
||||||
"<pre>\n<del class=\"diffdel\">a\nb\nc\n</del></pre>",
|
"<div><pre><span> a\n<del class='diffdel'>b\nc\n</del></span></pre></div>",
|
||||||
diff(a, b))
|
diff(a, b))
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_html_diff_with_tags
|
def test_html_diff_with_tags
|
||||||
a = ""
|
a = ""
|
||||||
b = "<div>foo</div>"
|
b = "<div>foo</div>"
|
||||||
assert_equal '<div><ins class="diffins">foo</ins></div>', diff(a, b)
|
assert_equal "<div><ins class='diffins'><div>foo</div></ins></div>", diff(a, b)
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_diff_for_tag_change
|
def test_diff_for_tag_change
|
||||||
a = "<a>x</a>"
|
a = "<a>x</a>"
|
||||||
b = "<b>x</b>"
|
b = "<b>x</b>"
|
||||||
# FIXME sad, but true - this case produces an invalid XML. If handle this you can, strong your foo is.
|
# FIXME sad, but true - this case produces an invalid XML. If handle this you can, strong your foo is.
|
||||||
assert_equal '<a><b>x</a></b>', diff(a, b)
|
assert_equal "<div><del class='diffdel'><a>x</a></del><ins class='diffins'><b>x</b></ins></div>", diff(a, b)
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
Loading…
Add table
Reference in a new issue