diff --git a/lib/chunks/engines.rb b/lib/chunks/engines.rb index 61b3d4ca..d4f583d2 100644 --- a/lib/chunks/engines.rb +++ b/lib/chunks/engines.rb @@ -44,7 +44,7 @@ module Engines require 'maruku/ext/math' html = sanitize_rexml(Maruku.new(@content.delete("\r\x01-\x08\x0B\x0C\x0E-\x1F"), {:math_enabled => false}).to_html_tree) - html.gsub(/\A
(.*)<\/div>\z/, '\1') + html.gsub(/\A
\n?(.*?)\n?<\/div>\Z/m, '\1') end end @@ -56,7 +56,7 @@ module Engines require 'maruku/ext/math' html = sanitize_rexml(Maruku.new(@content.delete("\r\x01-\x08\x0B\x0C\x0E-\x1F"), {:math_enabled => true, :math_numbered => ['\\[','\\begin{equation}']}).to_html_tree) - html.gsub(/\A
(.*)<\/div>\z/, '\1') + html.gsub(/\A
\n?(.*?)\n?<\/div>\Z/m, '\1') end end diff --git a/lib/page_renderer.rb b/lib/page_renderer.rb index 2432fc40..d1d44ffa 100644 --- a/lib/page_renderer.rb +++ b/lib/page_renderer.rb @@ -1,4 +1,5 @@ require 'xhtmldiff' + # Temporary class containing all rendering stuff from a Revision # I want to shift all rendering loguc to the controller eventually @@ -43,7 +44,9 @@ class PageRenderer previous_content = "
" + WikiContent.new(previous_revision, @@url_generator).render!.to_s + "
" current_content = "
" + display_content.to_s + "
" diff_doc = REXML::Document.new - diff_doc << (div = REXML::Element.new 'div') + div = REXML::Element.new('div', nil, {:respect_whitespace =>:all}) + div.attributes['class'] = 'xhtmldiff_wrapper' + diff_doc << div hd = XHTMLDiff.new(div) parsed_previous_revision = REXML::HashableElementDelegator.new( @@ -54,7 +57,7 @@ class PageRenderer diffs = '' diff_doc.write(diffs, -1, true, true) - diffs.gsub(/^
(.*)<\/div>$/, '\1') + diffs.gsub(/\A
(.*)<\/div>\Z/m, '\1') else display_content end diff --git a/lib/wiki_content.rb b/lib/wiki_content.rb index 3e348837..1a0f5ba4 100644 --- a/lib/wiki_content.rb +++ b/lib/wiki_content.rb @@ -1,11 +1,11 @@ require 'cgi' -require_dependency 'chunks/engines' -require_dependency 'chunks/category' +require 'chunks/engines' +require 'chunks/category' require_dependency 'chunks/include' require_dependency 'chunks/wiki' require_dependency 'chunks/literal' require_dependency 'chunks/uri' -require_dependency 'chunks/nowiki' +require 'chunks/nowiki' # Wiki content is just a string that can process itself with a chain of # actions. The actions can modify wiki content so that certain parts of diff --git a/test/unit/diff_test.rb b/test/unit/diff_test.rb index c452486b..5dc204af 100755 --- a/test/unit/diff_test.rb +++ b/test/unit/diff_test.rb @@ -11,7 +11,9 @@ class DiffTest < Test::Unit::TestCase def diff(a,b) diff_doc = REXML::Document.new - diff_doc << (div = REXML::Element.new 'div' ) + div = REXML::Element.new('div', nil, {:respect_whitespace =>:all}) + div.attributes['class'] = 'xhtmldiff_wrapper' + diff_doc << div hd = XHTMLDiff.new(div) parsed_a = REXML::HashableElementDelegator.new( REXML::XPath.first(REXML::Document.new("
"+a+"
"), '/div')) @@ -20,14 +22,14 @@ class DiffTest < Test::Unit::TestCase Diff::LCS.traverse_balanced(parsed_a, parsed_b, hd) diffs = '' diff_doc.write(diffs, -1, true, true) - diffs + diffs.gsub(/\A
(.*)<\/div>\Z/m, '\1') end def test_html_diff_simple a = 'this was the original string' b = 'this is the new string' - assert_equal("
this was is the" + - " original new string
", + assert_equal(" this was is the" + + " original new string", diff(a, b)) end @@ -35,10 +37,10 @@ class DiffTest < Test::Unit::TestCase a = "

this was the original string

" b = "

this is

\n

the new string

\n

around the world

" assert_equal( - "

this was is" + + "

this was is" + " the original string

" + "\n

the new string

" + - "\n

around the world

", + "\n

around the world

", diff(a, b)) end @@ -46,8 +48,8 @@ class DiffTest < Test::Unit::TestCase a = "

this is a paragraph

\n

this is a second paragraph

\n

this is a third paragraph

" b = "

this is a paragraph

\n

this is a third paragraph

" assert_equal( - "

this is a paragraph

\n

this is a second paragraph

" + - "\n

this is a third paragraph

", + "

this is a paragraph

\n

this is a second paragraph

" + + "\n

this is a third paragraph

", diff(a, b)) end @@ -55,8 +57,8 @@ class DiffTest < Test::Unit::TestCase a = "

foo bar

" b = "

foo

bar

" assert_equal( - "

foo bar

" + - "

bar

", + "

foo bar

" + + "

bar

", diff(a,b)) end @@ -64,8 +66,8 @@ class DiffTest < Test::Unit::TestCase a = "

foo

bar

" b = "

foo bar

" assert_equal( - "

foo bar

" + - "

bar

", + "

foo bar

" + + "

bar

", diff(a,b)) end @@ -73,31 +75,31 @@ class DiffTest < Test::Unit::TestCase a = "

foo bar

" b = "

foo bar

" assert_equal( - "

foo bar" + - "bar

", + "

foo bar" + + "bar

", diff(a,b)) end + def test_html_diff_with_tags + a = "" + b = "
foo
" + assert_equal "
foo
", diff(a, b) + end + # FIXME this test fails (ticket #67, http://dev.instiki.org/ticket/67) def test_html_diff_preserves_endlines_in_pre a = "
a\nb\nc\n
" b = "
a\n
" assert_equal( - "
 a\nb\nc\n
", + "
 a\nb\nc\n
", diff(a, b)) end - def test_html_diff_with_tags - a = "" - b = "
foo
" - assert_equal "
foo
", diff(a, b) - end - + # FIXME. xhtmldiff fails to detect any change here def test_diff_for_tag_change a = "x" b = "x" - # FIXME. xhtmldiff fails to detect any change here - assert_equal "
xx
", diff(a, b) + assert_equal "xx", diff(a, b) end end diff --git a/test/unit/maruku_tex.rb b/test/unit/maruku_tex.rb new file mode 100755 index 00000000..5757b632 --- /dev/null +++ b/test/unit/maruku_tex.rb @@ -0,0 +1,68 @@ +#!/usr/bin/env ruby + +require File.dirname(__FILE__) + '/../test_helper' + +class RedClothForTexTest < Test::Unit::TestCase + def test_basics + assert_equal '{\bf First Page}', Maruku.new('*First Page*').to_latex + assert_equal '{\em First Page}', Maruku.new('_First Page_').to_latex + assert_equal "\\begin{itemize}\n\t\\item A\n\t\t\\item B\n\t\t\\item C\n\t\\end{itemize}", Maruku.new('* A\n* B\n* C').to_latex + end + + def test_blocks + assert_equal '\section*{hello}', Maruku.new('#hello#').to_latex + assert_equal '\subsection*{hello}', Maruku.new('##hello##').to_latex + end + + def test_table_of_contents + +source = < 'Abe', 'B' => 'Babe')) + end + + def test_entities + assert_equal "Beck \\& Fowler are 100\\% cool", RedClothForTex.new("Beck & Fowler are 100% cool").to_tex + end + + def test_bracket_links + assert_equal "such a Horrible Day, but I won't be Made Useless", RedClothForTex.new("such a [[Horrible Day]], but I won't be [[Made Useless]]").to_tex + end + + def test_footnotes_on_abbreviations + assert_equal( + "such a Horrible Day\\footnote{1}, but I won't be Made Useless", + RedClothForTex.new("such a [[Horrible Day]][1], but I won't be [[Made Useless]]").to_tex + ) + end + + def test_subsection_depth + assert_equal "\\subsubsection*{Hello}", RedClothForTex.new("h4. Hello").to_tex + end +end diff --git a/test/unit/page_renderer_test.rb b/test/unit/page_renderer_test.rb index 10ec208d..94544d72 100644 --- a/test/unit/page_renderer_test.rb +++ b/test/unit/page_renderer_test.rb @@ -57,12 +57,12 @@ class PageRendererTest < Test::Unit::TestCase set_web_property :markup, :markdown assert_markup_parsed_as( - %{

My Headline

\n\n

that } + + %{

My Headline

\n\n

that } + %{Smart Engine GUI?

}, "My Headline\n===========\n\nthat SmartEngineGUI") assert_markup_parsed_as( - %{

My Headline

\n\n

that } + + %{

My Headline

\n\n

that } + %{Smart Engine GUI?

}, "#My Headline#\n\nthat SmartEngineGUI") @@ -77,7 +77,7 @@ class PageRendererTest < Test::Unit::TestCase assert_markup_parsed_as( %{

This is a code block:

\n\n
def a_method(arg)\n} +
-        %{return ThatWay\n
\n\n

Nice!

}, + %{return ThatWay\n\n

Nice!

}, code_block) end @@ -105,15 +105,15 @@ class PageRendererTest < Test::Unit::TestCase set_web_property :markup, :markdown assert_markup_parsed_as( - "

Markdown heading

\n\n" + + "

Markdown heading

\n\n" + "

h2. Textile heading

\n\n" + "

some text with -styles-

\n\n" + - "
    \n
  • list 1
  • \n
  • list 2
  • \n
", + "
    \n
  • list 1
  • \n\n
  • list 2
  • \n
", textile_and_markdown) set_web_property :markup, :textile assert_markup_parsed_as( - "

Markdown heading
================

\n\n\n\t

Textile heading

" + + "

Markdown heading
================

\n\n\n\t

Textile heading

" + "\n\n\n\t

some text with styles

" + "\n\n\n\t
    \n\t
  • list 1
  • \n\t\t
  • list 2
  • \n\t
", textile_and_markdown) diff --git a/vendor/plugins/HTML5lib/lib/html5lib/constants.rb b/vendor/plugins/HTML5lib/lib/html5lib/constants.rb index b28a6f01..8144c93f 100755 --- a/vendor/plugins/HTML5lib/lib/html5lib/constants.rb +++ b/vendor/plugins/HTML5lib/lib/html5lib/constants.rb @@ -148,6 +148,18 @@ module HTML5lib input ] + CDATA_ELEMENTS = %w[title textarea] + + RCDATA_ELEMENTS = %w[ + style + script + xmp + iframe + noembed + noframes + noscript + ] + BOOLEAN_ATTRIBUTES = { :global => %w[irrelevant], 'style' => %w[scoped], diff --git a/vendor/plugins/HTML5lib/lib/html5lib/filters.rb b/vendor/plugins/HTML5lib/lib/html5lib/filters.rb new file mode 100644 index 00000000..05c3edd4 --- /dev/null +++ b/vendor/plugins/HTML5lib/lib/html5lib/filters.rb @@ -0,0 +1 @@ +require 'html5lib/filters/optionaltags' diff --git a/vendor/plugins/HTML5lib/lib/html5lib/filters/base.rb b/vendor/plugins/HTML5lib/lib/html5lib/filters/base.rb new file mode 100644 index 00000000..c1a5c660 --- /dev/null +++ b/vendor/plugins/HTML5lib/lib/html5lib/filters/base.rb @@ -0,0 +1,10 @@ +require 'delegate' +require 'enumerator' + +module HTML5lib + module Filters + class Base < SimpleDelegator + include Enumerable + end + end +end diff --git a/vendor/plugins/HTML5lib/lib/html5lib/filters/inject_meta_charset.rb b/vendor/plugins/HTML5lib/lib/html5lib/filters/inject_meta_charset.rb new file mode 100644 index 00000000..294796e2 --- /dev/null +++ b/vendor/plugins/HTML5lib/lib/html5lib/filters/inject_meta_charset.rb @@ -0,0 +1,62 @@ +require 'html5lib/filters/base' + +module HTML5lib + module Filters + class InjectMetaCharset < Base + def initialize(source, encoding) + super(source) + @encoding = encoding + end + + def each + state = :pre_head + meta_found = @encoding.nil? + pending = [] + + __getobj__.each do |token| + case token[:type] + when :StartTag + state = :in_head if token[:name].downcase == "head" + + when :EmptyTag + if token[:name].downcase == "meta" + if token[:data].any? {|name,value| name=='charset'} + # replace charset with actual encoding + attrs=Hash[*token[:data].flatten] + attrs['charset'] = @encoding + token[:data] = attrs.to_a.sort + meta_found = true + end + + elsif token[:name].downcase == "head" and not meta_found + # insert meta into empty head + yield({:type => :StartTag, :name => "head", :data => {}}) + yield({:type => :EmptyTag, :name => "meta", + :data => {"charset" => @encoding}}) + yield({:type => :EndTag, :name => "head"}) + meta_found = true + next + end + + when :EndTag + if token[:name].downcase == "head" and pending.any? + # insert meta into head (if necessary) and flush pending queue + yield pending.shift + yield({:type => :EmptyTag, :name => "meta", + :data => {"charset" => @encoding}}) if not meta_found + yield pending.shift while pending.any? + meta_found = true + state = :post_head + end + end + + if state == :in_head + pending << token + else + yield token + end + end + end + end + end +end diff --git a/vendor/plugins/HTML5lib/lib/html5lib/filters/optionaltags.rb b/vendor/plugins/HTML5lib/lib/html5lib/filters/optionaltags.rb new file mode 100644 index 00000000..aacf3b73 --- /dev/null +++ b/vendor/plugins/HTML5lib/lib/html5lib/filters/optionaltags.rb @@ -0,0 +1,199 @@ +require 'html5lib/constants' +require 'html5lib/filters/base' + +module HTML5lib + module Filters + + class OptionalTagFilter < Base + def slider + previous1 = previous2 = nil + __getobj__.each do |token| + yield previous2, previous1, token if previous1 != nil + previous2 = previous1 + previous1 = token + end + yield previous2, previous1, nil + end + + def each + slider do |previous, token, nexttok| + type = token[:type] + if type == :StartTag + yield token unless token[:data].empty? and is_optional_start(token[:name], previous, nexttok) + elsif type == :EndTag + yield token unless is_optional_end(token[:name], nexttok) + else + yield token + end + end + end + + def is_optional_start(tagname, previous, nexttok) + type = nexttok ? nexttok[:type] : nil + if tagname == 'html' + # An html element's start tag may be omitted if the first thing + # inside the html element is not a space character or a comment. + return ![:Comment, :SpaceCharacters].include?(type) + elsif tagname == 'head' + # A head element's start tag may be omitted if the first thing + # inside the head element is an element. + return type == :StartTag + elsif tagname == 'body' + # A body element's start tag may be omitted if the first thing + # inside the body element is not a space character or a comment, + # except if the first thing inside the body element is a script + # or style element and the node immediately preceding the body + # element is a head element whose end tag has been omitted. + if [:Comment, :SpaceCharacters].include?(type) + return false + elsif type == :StartTag + # XXX: we do not look at the preceding event, so we never omit + # the body element's start tag if it's followed by a script or + # a style element. + return !%w[script style].include?(nexttok[:name]) + else + return true + end + elsif tagname == 'colgroup' + # A colgroup element's start tag may be omitted if the first thing + # inside the colgroup element is a col element, and if the element + # is not immediately preceeded by another colgroup element whose + # end tag has been omitted. + if type == :StartTag + # XXX: we do not look at the preceding event, so instead we never + # omit the colgroup element's end tag when it is immediately + # followed by another colgroup element. See is_optional_end. + return nexttok[:name] == "col" + else + return false + end + elsif tagname == 'tbody' + # A tbody element's start tag may be omitted if the first thing + # inside the tbody element is a tr element, and if the element is + # not immediately preceeded by a tbody, thead, or tfoot element + # whose end tag has been omitted. + if type == :StartTag + # omit the thead and tfoot elements' end tag when they are + # immediately followed by a tbody element. See is_optional_end. + if previous and previous[:type] == :EndTag and \ + %w(tbody thead tfoot).include?(previous[:name]) + return false + end + + return nexttok[:name] == 'tr' + else + return false + end + end + return false + end + + def is_optional_end(tagname, nexttok) + type = nexttok ? nexttok[:type] : nil + if %w[html head body].include?(tagname) + # An html element's end tag may be omitted if the html element + # is not immediately followed by a space character or a comment. + return ![:Comment, :SpaceCharacters].include?(type) + elsif %w[li optgroup option tr].include?(tagname) + # A li element's end tag may be omitted if the li element is + # immediately followed by another li element or if there is + # no more content in the parent element. + # An optgroup element's end tag may be omitted if the optgroup + # element is immediately followed by another optgroup element, + # or if there is no more content in the parent element. + # An option element's end tag may be omitted if the option + # element is immediately followed by another option element, + # or if there is no more content in the parent element. + # A tr element's end tag may be omitted if the tr element is + # immediately followed by another tr element, or if there is + # no more content in the parent element. + if type == :StartTag + return nexttok[:name] == tagname + else + return type == :EndTag || type == nil + end + elsif %w(dt dd).include?(tagname) + # A dt element's end tag may be omitted if the dt element is + # immediately followed by another dt element or a dd element. + # A dd element's end tag may be omitted if the dd element is + # immediately followed by another dd element or a dt element, + # or if there is no more content in the parent element. + if type == :StartTag + return %w(dt dd).include?(nexttok[:name]) + elsif tagname == 'dd' + return type == :EndTag || type == nil + else + return false + end + elsif tagname == 'p' + # A p element's end tag may be omitted if the p element is + # immediately followed by an address, blockquote, dl, fieldset, + # form, h1, h2, h3, h4, h5, h6, hr, menu, ol, p, pre, table, + # or ul element, or if there is no more content in the parent + # element. + if type == :StartTag + return %w(address blockquote dl fieldset form h1 h2 h3 h4 h5 + h6 hr menu ol p pre table ul).include?(nexttok[:name]) + else + return type == :EndTag || type == nil + end + elsif tagname == 'colgroup' + # A colgroup element's end tag may be omitted if the colgroup + # element is not immediately followed by a space character or + # a comment. + if [:Comment, :SpaceCharacters].include?(type) + return false + elsif type == :StartTag + # XXX: we also look for an immediately following colgroup + # element. See is_optional_start. + return nexttok[:name] != 'colgroup' + else + return true + end + elsif %w(thead tbody).include? tagname + # A thead element's end tag may be omitted if the thead element + # is immediately followed by a tbody or tfoot element. + # A tbody element's end tag may be omitted if the tbody element + # is immediately followed by a tbody or tfoot element, or if + # there is no more content in the parent element. + # A tfoot element's end tag may be omitted if the tfoot element + # is immediately followed by a tbody element, or if there is no + # more content in the parent element. + # XXX: we never omit the end tag when the following element is + # a tbody. See is_optional_start. + if type == :StartTag + return %w(tbody tfoot).include?(nexttok[:name]) + elsif tagname == 'tbody' + return (type == :EndTag or type == nil) + else + return false + end + elsif tagname == 'tfoot' + # A tfoot element's end tag may be omitted if the tfoot element + # is immediately followed by a tbody element, or if there is no + # more content in the parent element. + # XXX: we never omit the end tag when the following element is + # a tbody. See is_optional_start. + if type == :StartTag + return nexttok[:name] == 'tbody' + else + return type == :EndTag || type == nil + end + elsif %w(td th).include? tagname + # A td element's end tag may be omitted if the td element is + # immediately followed by a td or th element, or if there is + # no more content in the parent element. + # A th element's end tag may be omitted if the th element is + # immediately followed by a td or th element, or if there is + # no more content in the parent element. + if type == :StartTag + return %w(td th).include?(nexttok[:name]) + else + return type == :EndTag || type == nil + end + end + return false + end + end + end +end diff --git a/vendor/plugins/HTML5lib/lib/html5lib/filters/sanitizer.rb b/vendor/plugins/HTML5lib/lib/html5lib/filters/sanitizer.rb new file mode 100644 index 00000000..db9a12e0 --- /dev/null +++ b/vendor/plugins/HTML5lib/lib/html5lib/filters/sanitizer.rb @@ -0,0 +1,15 @@ +require 'html5lib/filters/base' +require 'html5lib/sanitizer' + +module HTML5lib + module Filters + class HTMLSanitizeFilter < Base + include HTMLSanitizeModule + def each + __getobj__.each do |token| + yield(sanitize_token(token)) + end + end + end + end +end diff --git a/vendor/plugins/HTML5lib/lib/html5lib/filters/whitespace.rb b/vendor/plugins/HTML5lib/lib/html5lib/filters/whitespace.rb new file mode 100644 index 00000000..3b85fd7b --- /dev/null +++ b/vendor/plugins/HTML5lib/lib/html5lib/filters/whitespace.rb @@ -0,0 +1,36 @@ +require 'html5lib/constants' +require 'html5lib/filters/base' + +module HTML5lib + module Filters + class WhitespaceFilter < Base + + SPACE_PRESERVE_ELEMENTS = %w[pre textarea] + RCDATA_ELEMENTS + SPACES = /[#{SPACE_CHARACTERS.join('')}]+/m + + def each + preserve = 0 + __getobj__.each do |token| + case token[:type] + when :StartTag + if preserve > 0 or SPACE_PRESERVE_ELEMENTS.include?(token[:name]) + preserve += 1 + end + + when :EndTag + preserve -= 1 if preserve > 0 + + when :SpaceCharacters + next if preserve == 0 + + when :Characters + token[:data] = token[:data].sub(SPACES,' ') if preserve == 0 + end + + yield token + end + end + end + end +end + diff --git a/vendor/plugins/HTML5lib/lib/html5lib/sanitizer.rb b/vendor/plugins/HTML5lib/lib/html5lib/sanitizer.rb index 89c13187..5af9cf51 100644 --- a/vendor/plugins/HTML5lib/lib/html5lib/sanitizer.rb +++ b/vendor/plugins/HTML5lib/lib/html5lib/sanitizer.rb @@ -1,5 +1,4 @@ require 'cgi' -require 'html5lib/filters' module HTML5lib @@ -176,15 +175,6 @@ module HTML5lib end end - class HTMLSanitizeFilter < Filters::Base - include HTMLSanitizeModule - def each - __getobj__.each do |token| - yield(sanitize_token(token)) - end - end - end - class HTMLSanitizer < HTMLTokenizer include HTMLSanitizeModule def each diff --git a/vendor/plugins/HTML5lib/lib/html5lib/serializer.rb b/vendor/plugins/HTML5lib/lib/html5lib/serializer.rb index cc21c7fa..8fe95ed2 100644 --- a/vendor/plugins/HTML5lib/lib/html5lib/serializer.rb +++ b/vendor/plugins/HTML5lib/lib/html5lib/serializer.rb @@ -1,5 +1,4 @@ require 'html5lib/constants' -require 'html5lib/filters' module HTML5lib @@ -7,7 +6,7 @@ module HTML5lib CDATA_ELEMENTS = %w[style script xmp iframe noembed noframes noscript] def self.serialize(stream, options = {}) - new(options).serialize(stream) + new(options).serialize(stream, options[:encoding]) end def initialize(options={}) @@ -40,20 +39,25 @@ module HTML5lib def serialize(treewalker, encoding=nil) in_cdata = false + @errors = [] - -@errors = [] if encoding and @inject_meta_charset - treewalker = filter_inject_meta_charset(treewalker, encoding) + require 'html5lib/filters/inject_meta_charset' + treewalker = Filters::InjectMetaCharset.new(treewalker, encoding) end + if @strip_whitespace - treewalker = filter_whitespace(treewalker) + require 'html5lib/filters/whitespace' + treewalker = Filters::WhitespaceFilter.new(treewalker) end + if @sanitize - require 'html5lib/sanitizer' - treewalker = HTMLSanitizeFilter.new(treewalker) + require 'html5lib/filters/sanitizer' + treewalker = Filters::HTMLSanitizeFilter.new(treewalker) end + if @omit_optional_tags + require 'html5lib/filters/optionaltags' treewalker = Filters::OptionalTagFilter.new(treewalker) end @@ -62,25 +66,14 @@ module HTML5lib type = token[:type] if type == :Doctype doctype = "" % token[:name] - if encoding - result << doctype.encode(encoding) - else - result << doctype - end + result << doctype elsif [:Characters, :SpaceCharacters].include? type if type == :SpaceCharacters or in_cdata if in_cdata and token[:data].include?(" " ')).any? {|c| v.include?(c)} end v = v.gsub("&", "&") - if encoding - v = v.encode(encoding, unicode_encode_errors) - end if quote_attr quote_char = @quote_char if @use_best_quote_char @@ -141,11 +130,7 @@ module HTML5lib attributes << "/" end end - if encoding - result << "<%s%s>" % [name.encode(encoding), attributes.join('')] - else - result << "<%s%s>" % [name, attributes.join('')] - end + result << "<%s%s>" % [name, attributes.join('')] elsif type == :EndTag name = token[:name] @@ -155,33 +140,29 @@ module HTML5lib serializeError(_("Unexpected child element of a CDATA element")) end end_tag = "" - end_tag = end_tag.encode(encoding) if encoding result << end_tag elsif type == :Comment data = token[:data] serializeError(_("Comment contains --")) if data.index("--") comment = "" % token[:data] - if encoding - comment = comment.encode(encoding, unicode_encode_errors) - end result << comment else serializeError(token[:data]) end end - result.join('') - end - def render(treewalker, encoding=nil) - if encoding - return "".join(list(serialize(treewalker, encoding))) + if encoding and encoding != 'utf-8' + require 'iconv' + Iconv.iconv(encoding, 'utf-8', result.join('')).first else - return "".join(list(serialize(treewalker))) + result.join('') end end + alias :render :serialize + def serializeError(data="XXX ERROR MESSAGE NEEDED") # XXX The idea is to make data mandatory. @errors.push(data) @@ -189,22 +170,6 @@ module HTML5lib raise SerializeError end end - - def filter_inject_meta_charset(treewalker, encoding) - done = false - for token in treewalker - if not done and token[:type] == :StartTag \ - and token[:name].lower() == "head" - yield({:type => :EmptyTag, :name => "meta", \ - :data => {"charset" => encoding}}) - end - yield token - end - end - - def filter_whitespace(treewalker) - raise NotImplementedError - end end # Error in serialized tree diff --git a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/base.rb b/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/base.rb index 64c280df..21d4d3f7 100644 --- a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/base.rb +++ b/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/base.rb @@ -27,13 +27,13 @@ module TokenConstructor end def text(data) - if data =~ /^([#{SPACE_CHARACTERS.join('')}]+)/ + if data =~ /\A([#{SPACE_CHARACTERS.join('')}]+)/m yield({:type => :SpaceCharacters, :data => $1}) data = data[$1.length .. -1] return if data.empty? end - if data =~ /([#{SPACE_CHARACTERS.join('')}]+)$/ + if data =~ /([#{SPACE_CHARACTERS.join('')}]+)\Z/m yield({:type => :Characters, :data => data[0 ... -$1.length]}) yield({:type => :SpaceCharacters, :data => $1}) else diff --git a/vendor/plugins/HTML5lib/parse.rb b/vendor/plugins/HTML5lib/parse.rb index 2675e884..c6baaeb3 100755 --- a/vendor/plugins/HTML5lib/parse.rb +++ b/vendor/plugins/HTML5lib/parse.rb @@ -59,7 +59,7 @@ def printOutput(parser, document, opts) require 'html5lib/treewalkers' tokens = HTML5lib::TreeWalkers[opts.treebuilder].new(document) require 'html5lib/serializer' - print HTML5lib::HTMLSerializer.serialize(tokens, :encoding=>'utf-8') + print HTML5lib::HTMLSerializer.serialize(tokens, opts.serializer) when :hilite print document.hilite when :tree @@ -80,11 +80,16 @@ require 'ostruct' options = OpenStruct.new options.profile = false options.time = false -options.output = :tree +options.output = :html options.treebuilder = 'simpletree' options.error = false options.encoding = false options.parsemethod = :parse +options.serializer = { + :encoding => 'utf-8', + :omit_optional_tags => false, + :inject_meta_charset => false +} require 'optparse' opts = OptionParser.new do |opts| @@ -96,14 +101,6 @@ opts = OptionParser.new do |opts| options.time = time end - opts.on("--[no-]tree", "Do not print output tree") do |tree| - if tree - options.output = :tree - else - options.output = nil - end - end - opts.on("-b", "--treebuilder NAME") do |treebuilder| options.treebuilder = treebuilder end @@ -116,13 +113,17 @@ opts = OptionParser.new do |opts| options.parsemethod = :parseFragment end + opts.on("--tree", "output as debug tree") do |tree| + options.output = :tree + end + opts.on("-x", "--xml", "output as xml") do |xml| options.output = :xml options.treebuilder = "rexml" end - opts.on("--html", "Output as html") do |html| - options.output = :html + opts.on("--[no-]html", "Output as html") do |html| + options.output = (html ? :html : nil) end opts.on("--hilite", "Output as formatted highlighted code.") do |hilite| @@ -133,6 +134,22 @@ opts = OptionParser.new do |opts| options.encoding = encoding end + opts.on("--[no-]inject-meta-charset", "inject ") do |inject| + options.serializer[:inject_meta_charset] = inject + end + + opts.on("--[no-]strip-whitespace", "strip unnecessary whitespace") do |strip| + options.serializer[:strip_whitespace] = strip + end + + opts.on("--[no-]sanitize", "escape unsafe tags") do |sanitize| + options.serializer[:sanitize] = sanitize + end + + opts.on("--[no-]omit-optional-tags", "Omit optional tags") do |omit| + options.serializer[:omit_optional_tags] = omit + end + opts.on_tail("-h", "--help", "Show this message") do puts opts exit diff --git a/vendor/plugins/HTML5lib/testdata/serializer/injectmeta.test b/vendor/plugins/HTML5lib/testdata/serializer/injectmeta.test new file mode 100644 index 00000000..a59f0c7a --- /dev/null +++ b/vendor/plugins/HTML5lib/testdata/serializer/injectmeta.test @@ -0,0 +1,39 @@ +{"tests": [ + +{"description": "no encoding", + "options": {"inject_meta_charset": true}, + "input": [["EmptyTag", "head", {}]], + "expected": [""] +}, + +{"description": "empytag head", + "options": {"inject_meta_charset": true, "encoding":"utf-8"}, + "input": [["EmptyTag", "head", {}]], + "expected": [""] +}, + +{"description": "head w/title", + "options": {"inject_meta_charset": true, "encoding":"utf-8"}, + "input": [["StartTag", "head", {}], ["StartTag","title",{}], ["Characters", "foo"],["EndTag", "title"], ["EndTag", "head"]], + "expected": ["foo"] +}, + +{"description": "head w/meta-charset", + "options": {"inject_meta_charset": true, "encoding":"utf-8"}, + "input": [["StartTag", "head", {}], ["EmptyTag","meta",{"charset":"ascii"}], ["EndTag", "head"]], + "expected": [""] +}, + +{"description": "head w/robots", + "options": {"inject_meta_charset": true, "encoding":"utf-8"}, + "input": [["StartTag", "head", {}], ["EmptyTag","meta",{"name":"robots","content":"noindex"}], ["EndTag", "head"]], + "expected": [""] +}, + +{"description": "head w/robots & charset", + "options": {"inject_meta_charset": true, "encoding":"utf-8"}, + "input": [["StartTag", "head", {}], ["EmptyTag","meta",{"name":"robots","content":"noindex"}], ["EmptyTag","meta",{"charset":"ascii"}], ["EndTag", "head"]], + "expected": [""] +} + +]} diff --git a/vendor/plugins/HTML5lib/tests/test_serializer.rb b/vendor/plugins/HTML5lib/tests/test_serializer.rb index 31777240..7b8eaee0 100644 --- a/vendor/plugins/HTML5lib/tests/test_serializer.rb +++ b/vendor/plugins/HTML5lib/tests/test_serializer.rb @@ -24,7 +24,7 @@ class JsonWalker < HTML5lib::TreeWalkers::Base when 'Doctype' yield doctype(token[1]) else - raise ValueError("Unknown token type: " + type) + raise "Unknown token type: " + token[0] end end end @@ -37,7 +37,10 @@ class Html5SerializeTestcase < Test::Unit::TestCase tests['tests'].each_with_index do |test, index| define_method "test_#{test_name}_#{index+1}" do - next if test_name == 'whitespace' #TODO + if test["options"] and test["options"]["encoding"] + test["options"][:encoding] = test["options"]["encoding"] + end + result = HTML5lib::HTMLSerializer. serialize(JsonWalker.new(test["input"]), (test["options"] || {})) expected = test["expected"] diff --git a/vendor/plugins/maruku/lib/maruku/output/to_html.rb b/vendor/plugins/maruku/lib/maruku/output/to_html.rb index 82fa3bbb..7e053cd6 100644 --- a/vendor/plugins/maruku/lib/maruku/output/to_html.rb +++ b/vendor/plugins/maruku/lib/maruku/output/to_html.rb @@ -157,6 +157,7 @@ Example: # Render to an HTML fragment (returns a REXML document tree) def to_html_tree div = Element.new 'div' + div.attributes['class'] = 'maruku_wrapper_div' children_to_html.each do |e| div << e end