More fixes, sync with HTML5lib
Do a better job with the wrapper <div>s added by xhtmldiff and Maruku's to_html_tree method. More tests fixed.
This commit is contained in:
parent
3ca33e52b5
commit
3de374d6c1
|
@ -44,7 +44,7 @@ module Engines
|
||||||
require 'maruku/ext/math'
|
require 'maruku/ext/math'
|
||||||
html = sanitize_rexml(Maruku.new(@content.delete("\r\x01-\x08\x0B\x0C\x0E-\x1F"),
|
html = sanitize_rexml(Maruku.new(@content.delete("\r\x01-\x08\x0B\x0C\x0E-\x1F"),
|
||||||
{:math_enabled => false}).to_html_tree)
|
{:math_enabled => false}).to_html_tree)
|
||||||
html.gsub(/\A<div>(.*)<\/div>\z/, '\1')
|
html.gsub(/\A<div class="maruku_wrapper_div">\n?(.*?)\n?<\/div>\Z/m, '\1')
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -56,7 +56,7 @@ module Engines
|
||||||
require 'maruku/ext/math'
|
require 'maruku/ext/math'
|
||||||
html = sanitize_rexml(Maruku.new(@content.delete("\r\x01-\x08\x0B\x0C\x0E-\x1F"),
|
html = sanitize_rexml(Maruku.new(@content.delete("\r\x01-\x08\x0B\x0C\x0E-\x1F"),
|
||||||
{:math_enabled => true, :math_numbered => ['\\[','\\begin{equation}']}).to_html_tree)
|
{:math_enabled => true, :math_numbered => ['\\[','\\begin{equation}']}).to_html_tree)
|
||||||
html.gsub(/\A<div>(.*)<\/div>\z/, '\1')
|
html.gsub(/\A<div class="maruku_wrapper_div">\n?(.*?)\n?<\/div>\Z/m, '\1')
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
require 'xhtmldiff'
|
require 'xhtmldiff'
|
||||||
|
|
||||||
# Temporary class containing all rendering stuff from a Revision
|
# Temporary class containing all rendering stuff from a Revision
|
||||||
# I want to shift all rendering loguc to the controller eventually
|
# I want to shift all rendering loguc to the controller eventually
|
||||||
|
|
||||||
|
@ -43,7 +44,9 @@ class PageRenderer
|
||||||
previous_content = "<div>" + WikiContent.new(previous_revision, @@url_generator).render!.to_s + "</div>"
|
previous_content = "<div>" + WikiContent.new(previous_revision, @@url_generator).render!.to_s + "</div>"
|
||||||
current_content = "<div>" + display_content.to_s + "</div>"
|
current_content = "<div>" + display_content.to_s + "</div>"
|
||||||
diff_doc = REXML::Document.new
|
diff_doc = REXML::Document.new
|
||||||
diff_doc << (div = REXML::Element.new 'div')
|
div = REXML::Element.new('div', nil, {:respect_whitespace =>:all})
|
||||||
|
div.attributes['class'] = 'xhtmldiff_wrapper'
|
||||||
|
diff_doc << div
|
||||||
hd = XHTMLDiff.new(div)
|
hd = XHTMLDiff.new(div)
|
||||||
|
|
||||||
parsed_previous_revision = REXML::HashableElementDelegator.new(
|
parsed_previous_revision = REXML::HashableElementDelegator.new(
|
||||||
|
@ -54,7 +57,7 @@ class PageRenderer
|
||||||
|
|
||||||
diffs = ''
|
diffs = ''
|
||||||
diff_doc.write(diffs, -1, true, true)
|
diff_doc.write(diffs, -1, true, true)
|
||||||
diffs.gsub(/^<div>(.*)<\/div>$/, '\1')
|
diffs.gsub(/\A<div class='xhtmldiff_wrapper'>(.*)<\/div>\Z/m, '\1')
|
||||||
else
|
else
|
||||||
display_content
|
display_content
|
||||||
end
|
end
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
require 'cgi'
|
require 'cgi'
|
||||||
require_dependency 'chunks/engines'
|
require 'chunks/engines'
|
||||||
require_dependency 'chunks/category'
|
require 'chunks/category'
|
||||||
require_dependency 'chunks/include'
|
require_dependency 'chunks/include'
|
||||||
require_dependency 'chunks/wiki'
|
require_dependency 'chunks/wiki'
|
||||||
require_dependency 'chunks/literal'
|
require_dependency 'chunks/literal'
|
||||||
require_dependency 'chunks/uri'
|
require_dependency 'chunks/uri'
|
||||||
require_dependency 'chunks/nowiki'
|
require 'chunks/nowiki'
|
||||||
|
|
||||||
# Wiki content is just a string that can process itself with a chain of
|
# Wiki content is just a string that can process itself with a chain of
|
||||||
# actions. The actions can modify wiki content so that certain parts of
|
# actions. The actions can modify wiki content so that certain parts of
|
||||||
|
|
|
@ -11,7 +11,9 @@ class DiffTest < Test::Unit::TestCase
|
||||||
|
|
||||||
def diff(a,b)
|
def diff(a,b)
|
||||||
diff_doc = REXML::Document.new
|
diff_doc = REXML::Document.new
|
||||||
diff_doc << (div = REXML::Element.new 'div' )
|
div = REXML::Element.new('div', nil, {:respect_whitespace =>:all})
|
||||||
|
div.attributes['class'] = 'xhtmldiff_wrapper'
|
||||||
|
diff_doc << div
|
||||||
hd = XHTMLDiff.new(div)
|
hd = XHTMLDiff.new(div)
|
||||||
parsed_a = REXML::HashableElementDelegator.new(
|
parsed_a = REXML::HashableElementDelegator.new(
|
||||||
REXML::XPath.first(REXML::Document.new("<div>"+a+"</div>"), '/div'))
|
REXML::XPath.first(REXML::Document.new("<div>"+a+"</div>"), '/div'))
|
||||||
|
@ -20,14 +22,14 @@ class DiffTest < Test::Unit::TestCase
|
||||||
Diff::LCS.traverse_balanced(parsed_a, parsed_b, hd)
|
Diff::LCS.traverse_balanced(parsed_a, parsed_b, hd)
|
||||||
diffs = ''
|
diffs = ''
|
||||||
diff_doc.write(diffs, -1, true, true)
|
diff_doc.write(diffs, -1, true, true)
|
||||||
diffs
|
diffs.gsub(/\A<div class='xhtmldiff_wrapper'>(.*)<\/div>\Z/m, '\1')
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_html_diff_simple
|
def test_html_diff_simple
|
||||||
a = 'this was the original string'
|
a = 'this was the original string'
|
||||||
b = 'this is the new string'
|
b = 'this is the new string'
|
||||||
assert_equal("<div><span> this<del class='diffmod'> was</del><ins class='diffmod'> is</ins> the" +
|
assert_equal("<span> this<del class='diffmod'> was</del><ins class='diffmod'> is</ins> the" +
|
||||||
"<del class='diffmod'> original</del><ins class='diffmod'> new</ins> string</span></div>",
|
"<del class='diffmod'> original</del><ins class='diffmod'> new</ins> string</span>",
|
||||||
diff(a, b))
|
diff(a, b))
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -35,10 +37,10 @@ class DiffTest < Test::Unit::TestCase
|
||||||
a = "<p>this was the original string</p>"
|
a = "<p>this was the original string</p>"
|
||||||
b = "<p>this is</p>\n<p> the new string</p>\n<p>around the world</p>"
|
b = "<p>this is</p>\n<p> the new string</p>\n<p>around the world</p>"
|
||||||
assert_equal(
|
assert_equal(
|
||||||
"<div><p><span> this<del class='diffmod'> was</del><ins class='diffmod'> is</ins>" +
|
"<p><span> this<del class='diffmod'> was</del><ins class='diffmod'> is</ins>" +
|
||||||
"<del class='diffdel'> the</del><del class='diffdel'> original</del><del class='diffdel'> string</del></span></p>" +
|
"<del class='diffdel'> the</del><del class='diffdel'> original</del><del class='diffdel'> string</del></span></p>" +
|
||||||
"<ins class='diffins'>\n</ins><ins class='diffins'><p> the new string</p></ins>" +
|
"<ins class='diffins'>\n</ins><ins class='diffins'><p> the new string</p></ins>" +
|
||||||
"<ins class='diffins'>\n</ins><ins class='diffins'><p>around the world</p></ins></div>",
|
"<ins class='diffins'>\n</ins><ins class='diffins'><p>around the world</p></ins>",
|
||||||
diff(a, b))
|
diff(a, b))
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -46,8 +48,8 @@ class DiffTest < Test::Unit::TestCase
|
||||||
a = "<p>this is a paragraph</p>\n<p>this is a second paragraph</p>\n<p>this is a third paragraph</p>"
|
a = "<p>this is a paragraph</p>\n<p>this is a second paragraph</p>\n<p>this is a third paragraph</p>"
|
||||||
b = "<p>this is a paragraph</p>\n<p>this is a third paragraph</p>"
|
b = "<p>this is a paragraph</p>\n<p>this is a third paragraph</p>"
|
||||||
assert_equal(
|
assert_equal(
|
||||||
"<div><p>this is a paragraph</p>\n<del class='diffdel'><p>this is a second paragraph</p></del>" +
|
"<p>this is a paragraph</p>\n<del class='diffdel'><p>this is a second paragraph</p></del>" +
|
||||||
"<del class='diffdel'>\n</del><p>this is a third paragraph</p></div>",
|
"<del class='diffdel'>\n</del><p>this is a third paragraph</p>",
|
||||||
diff(a, b))
|
diff(a, b))
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -55,8 +57,8 @@ class DiffTest < Test::Unit::TestCase
|
||||||
a = "<p>foo bar</p>"
|
a = "<p>foo bar</p>"
|
||||||
b = "<p>foo</p><p>bar</p>"
|
b = "<p>foo</p><p>bar</p>"
|
||||||
assert_equal(
|
assert_equal(
|
||||||
"<div><p><span> foo<del class='diffdel'> bar</del></span></p>" +
|
"<p><span> foo<del class='diffdel'> bar</del></span></p>" +
|
||||||
"<ins class='diffins'><p>bar</p></ins></div>",
|
"<ins class='diffins'><p>bar</p></ins>",
|
||||||
diff(a,b))
|
diff(a,b))
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -64,8 +66,8 @@ class DiffTest < Test::Unit::TestCase
|
||||||
a = "<p>foo</p><p>bar</p>"
|
a = "<p>foo</p><p>bar</p>"
|
||||||
b = "<p>foo bar</p>"
|
b = "<p>foo bar</p>"
|
||||||
assert_equal(
|
assert_equal(
|
||||||
"<div><p><span> foo<ins class='diffins'> bar</ins></span></p>" +
|
"<p><span> foo<ins class='diffins'> bar</ins></span></p>" +
|
||||||
"<del class='diffdel'><p>bar</p></del></div>",
|
"<del class='diffdel'><p>bar</p></del>",
|
||||||
diff(a,b))
|
diff(a,b))
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -73,31 +75,31 @@ class DiffTest < Test::Unit::TestCase
|
||||||
a = "<p>foo bar</p>"
|
a = "<p>foo bar</p>"
|
||||||
b = "<p>foo <b>bar</b></p>"
|
b = "<p>foo <b>bar</b></p>"
|
||||||
assert_equal(
|
assert_equal(
|
||||||
"<div><p><span> foo<del class='diffdel'> bar</del></span>" +
|
"<p><span> foo<del class='diffdel'> bar</del></span>" +
|
||||||
"<ins class='diffins'><b>bar</b></ins></p></div>",
|
"<ins class='diffins'><b>bar</b></ins></p>",
|
||||||
diff(a,b))
|
diff(a,b))
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_html_diff_with_tags
|
||||||
|
a = ""
|
||||||
|
b = "<div>foo</div>"
|
||||||
|
assert_equal "<ins class='diffins'><div>foo</div></ins>", diff(a, b)
|
||||||
|
end
|
||||||
|
|
||||||
# FIXME this test fails (ticket #67, http://dev.instiki.org/ticket/67)
|
# FIXME this test fails (ticket #67, http://dev.instiki.org/ticket/67)
|
||||||
def test_html_diff_preserves_endlines_in_pre
|
def test_html_diff_preserves_endlines_in_pre
|
||||||
a = "<pre>a\nb\nc\n</pre>"
|
a = "<pre>a\nb\nc\n</pre>"
|
||||||
b = "<pre>a\n</pre>"
|
b = "<pre>a\n</pre>"
|
||||||
assert_equal(
|
assert_equal(
|
||||||
"<div><pre><span> a\n<del class='diffdel'>b\nc\n</del></span></pre></div>",
|
"<pre><span> a\n<del class='diffdel'>b\nc\n</del></span></pre>",
|
||||||
diff(a, b))
|
diff(a, b))
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_html_diff_with_tags
|
# FIXME. xhtmldiff fails to detect any change here
|
||||||
a = ""
|
|
||||||
b = "<div>foo</div>"
|
|
||||||
assert_equal "<div><ins class='diffins'><div>foo</div></ins></div>", diff(a, b)
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_diff_for_tag_change
|
def test_diff_for_tag_change
|
||||||
a = "<a>x</a>"
|
a = "<a>x</a>"
|
||||||
b = "<b>x</b>"
|
b = "<b>x</b>"
|
||||||
# FIXME. xhtmldiff fails to detect any change here
|
assert_equal "<del class='diffdel'><a>x</a></del><ins class='diffins'><b>x</b></ins>", diff(a, b)
|
||||||
assert_equal "<div><del class='diffdel'><a>x</a></del><ins class='diffins'><b>x</b></ins></div>", diff(a, b)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
68
test/unit/maruku_tex.rb
Executable file
68
test/unit/maruku_tex.rb
Executable file
|
@ -0,0 +1,68 @@
|
||||||
|
#!/usr/bin/env ruby
|
||||||
|
|
||||||
|
require File.dirname(__FILE__) + '/../test_helper'
|
||||||
|
|
||||||
|
class RedClothForTexTest < Test::Unit::TestCase
|
||||||
|
def test_basics
|
||||||
|
assert_equal '{\bf First Page}', Maruku.new('*First Page*').to_latex
|
||||||
|
assert_equal '{\em First Page}', Maruku.new('_First Page_').to_latex
|
||||||
|
assert_equal "\\begin{itemize}\n\t\\item A\n\t\t\\item B\n\t\t\\item C\n\t\\end{itemize}", Maruku.new('* A\n* B\n* C').to_latex
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_blocks
|
||||||
|
assert_equal '\section*{hello}', Maruku.new('#hello#').to_latex
|
||||||
|
assert_equal '\subsection*{hello}', Maruku.new('##hello##').to_latex
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_table_of_contents
|
||||||
|
|
||||||
|
source = <<EOL
|
||||||
|
* [[A]]
|
||||||
|
** [[B]]
|
||||||
|
** [[C]]
|
||||||
|
* D
|
||||||
|
** [[E]]
|
||||||
|
*** F
|
||||||
|
EOL
|
||||||
|
|
||||||
|
expected_result = <<EOL
|
||||||
|
\\pagebreak
|
||||||
|
|
||||||
|
\\section{A}
|
||||||
|
Abe
|
||||||
|
|
||||||
|
\\subsection{B}
|
||||||
|
Babe
|
||||||
|
|
||||||
|
\\subsection{C}
|
||||||
|
\\pagebreak
|
||||||
|
|
||||||
|
\\section{D}
|
||||||
|
|
||||||
|
\\subsection{E}
|
||||||
|
|
||||||
|
\\subsubsection{F}
|
||||||
|
EOL
|
||||||
|
expected_result.chop!
|
||||||
|
assert_equal(expected_result, table_of_contents(source, 'A' => 'Abe', 'B' => 'Babe'))
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_entities
|
||||||
|
assert_equal "Beck \\& Fowler are 100\\% cool", RedClothForTex.new("Beck & Fowler are 100% cool").to_tex
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_bracket_links
|
||||||
|
assert_equal "such a Horrible Day, but I won't be Made Useless", RedClothForTex.new("such a [[Horrible Day]], but I won't be [[Made Useless]]").to_tex
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_footnotes_on_abbreviations
|
||||||
|
assert_equal(
|
||||||
|
"such a Horrible Day\\footnote{1}, but I won't be Made Useless",
|
||||||
|
RedClothForTex.new("such a [[Horrible Day]][1], but I won't be [[Made Useless]]").to_tex
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_subsection_depth
|
||||||
|
assert_equal "\\subsubsection*{Hello}", RedClothForTex.new("h4. Hello").to_tex
|
||||||
|
end
|
||||||
|
end
|
|
@ -57,12 +57,12 @@ class PageRendererTest < Test::Unit::TestCase
|
||||||
set_web_property :markup, :markdown
|
set_web_property :markup, :markdown
|
||||||
|
|
||||||
assert_markup_parsed_as(
|
assert_markup_parsed_as(
|
||||||
%{<h1>My Headline</h1>\n\n<p>that <span class="newWikiWord">} +
|
%{<h1 id="my_headline">My Headline</h1>\n\n<p>that <span class="newWikiWord">} +
|
||||||
%{Smart Engine GUI<a href="../show/SmartEngineGUI">?</a></span></p>},
|
%{Smart Engine GUI<a href="../show/SmartEngineGUI">?</a></span></p>},
|
||||||
"My Headline\n===========\n\nthat SmartEngineGUI")
|
"My Headline\n===========\n\nthat SmartEngineGUI")
|
||||||
|
|
||||||
assert_markup_parsed_as(
|
assert_markup_parsed_as(
|
||||||
%{<h1>My Headline</h1>\n\n<p>that <span class="newWikiWord">} +
|
%{<h1 id="my_headline">My Headline</h1>\n\n<p>that <span class="newWikiWord">} +
|
||||||
%{Smart Engine GUI<a href="../show/SmartEngineGUI">?</a></span></p>},
|
%{Smart Engine GUI<a href="../show/SmartEngineGUI">?</a></span></p>},
|
||||||
"#My Headline#\n\nthat SmartEngineGUI")
|
"#My Headline#\n\nthat SmartEngineGUI")
|
||||||
|
|
||||||
|
@ -77,7 +77,7 @@ class PageRendererTest < Test::Unit::TestCase
|
||||||
|
|
||||||
assert_markup_parsed_as(
|
assert_markup_parsed_as(
|
||||||
%{<p>This is a code block:</p>\n\n<pre><code>def a_method(arg)\n} +
|
%{<p>This is a code block:</p>\n\n<pre><code>def a_method(arg)\n} +
|
||||||
%{return ThatWay\n</code></pre>\n\n<p>Nice!</p>},
|
%{return ThatWay</code></pre>\n\n<p>Nice!</p>},
|
||||||
code_block)
|
code_block)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -105,15 +105,15 @@ class PageRendererTest < Test::Unit::TestCase
|
||||||
|
|
||||||
set_web_property :markup, :markdown
|
set_web_property :markup, :markdown
|
||||||
assert_markup_parsed_as(
|
assert_markup_parsed_as(
|
||||||
"<h1>Markdown heading</h1>\n\n" +
|
"<h1 id=\"markdown_heading\">Markdown heading</h1>\n\n" +
|
||||||
"<p>h2. Textile heading</p>\n\n" +
|
"<p>h2. Textile heading</p>\n\n" +
|
||||||
"<p><em>some</em> <strong>text</strong> <em>with</em> -styles-</p>\n\n" +
|
"<p><em>some</em> <strong>text</strong> <em>with</em> -styles-</p>\n\n" +
|
||||||
"<ul>\n<li>list 1</li>\n<li>list 2</li>\n</ul>",
|
"<ul>\n<li>list 1</li>\n\n<li>list 2</li>\n</ul>",
|
||||||
textile_and_markdown)
|
textile_and_markdown)
|
||||||
|
|
||||||
set_web_property :markup, :textile
|
set_web_property :markup, :textile
|
||||||
assert_markup_parsed_as(
|
assert_markup_parsed_as(
|
||||||
"<p>Markdown heading<br />================</p>\n\n\n\t<h2>Textile heading</h2>" +
|
"<p>Markdown heading<br/>================</p>\n\n\n\t<h2>Textile heading</h2>" +
|
||||||
"\n\n\n\t<p><strong>some</strong> <b>text</b> <em>with</em> <del>styles</del></p>" +
|
"\n\n\n\t<p><strong>some</strong> <b>text</b> <em>with</em> <del>styles</del></p>" +
|
||||||
"\n\n\n\t<ul>\n\t<li>list 1</li>\n\t\t<li>list 2</li>\n\t</ul>",
|
"\n\n\n\t<ul>\n\t<li>list 1</li>\n\t\t<li>list 2</li>\n\t</ul>",
|
||||||
textile_and_markdown)
|
textile_and_markdown)
|
||||||
|
|
|
@ -148,6 +148,18 @@ module HTML5lib
|
||||||
input
|
input
|
||||||
]
|
]
|
||||||
|
|
||||||
|
CDATA_ELEMENTS = %w[title textarea]
|
||||||
|
|
||||||
|
RCDATA_ELEMENTS = %w[
|
||||||
|
style
|
||||||
|
script
|
||||||
|
xmp
|
||||||
|
iframe
|
||||||
|
noembed
|
||||||
|
noframes
|
||||||
|
noscript
|
||||||
|
]
|
||||||
|
|
||||||
BOOLEAN_ATTRIBUTES = {
|
BOOLEAN_ATTRIBUTES = {
|
||||||
:global => %w[irrelevant],
|
:global => %w[irrelevant],
|
||||||
'style' => %w[scoped],
|
'style' => %w[scoped],
|
||||||
|
|
1
vendor/plugins/HTML5lib/lib/html5lib/filters.rb
vendored
Normal file
1
vendor/plugins/HTML5lib/lib/html5lib/filters.rb
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
require 'html5lib/filters/optionaltags'
|
10
vendor/plugins/HTML5lib/lib/html5lib/filters/base.rb
vendored
Normal file
10
vendor/plugins/HTML5lib/lib/html5lib/filters/base.rb
vendored
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
require 'delegate'
|
||||||
|
require 'enumerator'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
module Filters
|
||||||
|
class Base < SimpleDelegator
|
||||||
|
include Enumerable
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
62
vendor/plugins/HTML5lib/lib/html5lib/filters/inject_meta_charset.rb
vendored
Normal file
62
vendor/plugins/HTML5lib/lib/html5lib/filters/inject_meta_charset.rb
vendored
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
require 'html5lib/filters/base'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
module Filters
|
||||||
|
class InjectMetaCharset < Base
|
||||||
|
def initialize(source, encoding)
|
||||||
|
super(source)
|
||||||
|
@encoding = encoding
|
||||||
|
end
|
||||||
|
|
||||||
|
def each
|
||||||
|
state = :pre_head
|
||||||
|
meta_found = @encoding.nil?
|
||||||
|
pending = []
|
||||||
|
|
||||||
|
__getobj__.each do |token|
|
||||||
|
case token[:type]
|
||||||
|
when :StartTag
|
||||||
|
state = :in_head if token[:name].downcase == "head"
|
||||||
|
|
||||||
|
when :EmptyTag
|
||||||
|
if token[:name].downcase == "meta"
|
||||||
|
if token[:data].any? {|name,value| name=='charset'}
|
||||||
|
# replace charset with actual encoding
|
||||||
|
attrs=Hash[*token[:data].flatten]
|
||||||
|
attrs['charset'] = @encoding
|
||||||
|
token[:data] = attrs.to_a.sort
|
||||||
|
meta_found = true
|
||||||
|
end
|
||||||
|
|
||||||
|
elsif token[:name].downcase == "head" and not meta_found
|
||||||
|
# insert meta into empty head
|
||||||
|
yield({:type => :StartTag, :name => "head", :data => {}})
|
||||||
|
yield({:type => :EmptyTag, :name => "meta",
|
||||||
|
:data => {"charset" => @encoding}})
|
||||||
|
yield({:type => :EndTag, :name => "head"})
|
||||||
|
meta_found = true
|
||||||
|
next
|
||||||
|
end
|
||||||
|
|
||||||
|
when :EndTag
|
||||||
|
if token[:name].downcase == "head" and pending.any?
|
||||||
|
# insert meta into head (if necessary) and flush pending queue
|
||||||
|
yield pending.shift
|
||||||
|
yield({:type => :EmptyTag, :name => "meta",
|
||||||
|
:data => {"charset" => @encoding}}) if not meta_found
|
||||||
|
yield pending.shift while pending.any?
|
||||||
|
meta_found = true
|
||||||
|
state = :post_head
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
if state == :in_head
|
||||||
|
pending << token
|
||||||
|
else
|
||||||
|
yield token
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
199
vendor/plugins/HTML5lib/lib/html5lib/filters/optionaltags.rb
vendored
Normal file
199
vendor/plugins/HTML5lib/lib/html5lib/filters/optionaltags.rb
vendored
Normal file
|
@ -0,0 +1,199 @@
|
||||||
|
require 'html5lib/constants'
|
||||||
|
require 'html5lib/filters/base'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
module Filters
|
||||||
|
|
||||||
|
class OptionalTagFilter < Base
|
||||||
|
def slider
|
||||||
|
previous1 = previous2 = nil
|
||||||
|
__getobj__.each do |token|
|
||||||
|
yield previous2, previous1, token if previous1 != nil
|
||||||
|
previous2 = previous1
|
||||||
|
previous1 = token
|
||||||
|
end
|
||||||
|
yield previous2, previous1, nil
|
||||||
|
end
|
||||||
|
|
||||||
|
def each
|
||||||
|
slider do |previous, token, nexttok|
|
||||||
|
type = token[:type]
|
||||||
|
if type == :StartTag
|
||||||
|
yield token unless token[:data].empty? and is_optional_start(token[:name], previous, nexttok)
|
||||||
|
elsif type == :EndTag
|
||||||
|
yield token unless is_optional_end(token[:name], nexttok)
|
||||||
|
else
|
||||||
|
yield token
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def is_optional_start(tagname, previous, nexttok)
|
||||||
|
type = nexttok ? nexttok[:type] : nil
|
||||||
|
if tagname == 'html'
|
||||||
|
# An html element's start tag may be omitted if the first thing
|
||||||
|
# inside the html element is not a space character or a comment.
|
||||||
|
return ![:Comment, :SpaceCharacters].include?(type)
|
||||||
|
elsif tagname == 'head'
|
||||||
|
# A head element's start tag may be omitted if the first thing
|
||||||
|
# inside the head element is an element.
|
||||||
|
return type == :StartTag
|
||||||
|
elsif tagname == 'body'
|
||||||
|
# A body element's start tag may be omitted if the first thing
|
||||||
|
# inside the body element is not a space character or a comment,
|
||||||
|
# except if the first thing inside the body element is a script
|
||||||
|
# or style element and the node immediately preceding the body
|
||||||
|
# element is a head element whose end tag has been omitted.
|
||||||
|
if [:Comment, :SpaceCharacters].include?(type)
|
||||||
|
return false
|
||||||
|
elsif type == :StartTag
|
||||||
|
# XXX: we do not look at the preceding event, so we never omit
|
||||||
|
# the body element's start tag if it's followed by a script or
|
||||||
|
# a style element.
|
||||||
|
return !%w[script style].include?(nexttok[:name])
|
||||||
|
else
|
||||||
|
return true
|
||||||
|
end
|
||||||
|
elsif tagname == 'colgroup'
|
||||||
|
# A colgroup element's start tag may be omitted if the first thing
|
||||||
|
# inside the colgroup element is a col element, and if the element
|
||||||
|
# is not immediately preceeded by another colgroup element whose
|
||||||
|
# end tag has been omitted.
|
||||||
|
if type == :StartTag
|
||||||
|
# XXX: we do not look at the preceding event, so instead we never
|
||||||
|
# omit the colgroup element's end tag when it is immediately
|
||||||
|
# followed by another colgroup element. See is_optional_end.
|
||||||
|
return nexttok[:name] == "col"
|
||||||
|
else
|
||||||
|
return false
|
||||||
|
end
|
||||||
|
elsif tagname == 'tbody'
|
||||||
|
# A tbody element's start tag may be omitted if the first thing
|
||||||
|
# inside the tbody element is a tr element, and if the element is
|
||||||
|
# not immediately preceeded by a tbody, thead, or tfoot element
|
||||||
|
# whose end tag has been omitted.
|
||||||
|
if type == :StartTag
|
||||||
|
# omit the thead and tfoot elements' end tag when they are
|
||||||
|
# immediately followed by a tbody element. See is_optional_end.
|
||||||
|
if previous and previous[:type] == :EndTag and \
|
||||||
|
%w(tbody thead tfoot).include?(previous[:name])
|
||||||
|
return false
|
||||||
|
end
|
||||||
|
|
||||||
|
return nexttok[:name] == 'tr'
|
||||||
|
else
|
||||||
|
return false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return false
|
||||||
|
end
|
||||||
|
|
||||||
|
def is_optional_end(tagname, nexttok)
|
||||||
|
type = nexttok ? nexttok[:type] : nil
|
||||||
|
if %w[html head body].include?(tagname)
|
||||||
|
# An html element's end tag may be omitted if the html element
|
||||||
|
# is not immediately followed by a space character or a comment.
|
||||||
|
return ![:Comment, :SpaceCharacters].include?(type)
|
||||||
|
elsif %w[li optgroup option tr].include?(tagname)
|
||||||
|
# A li element's end tag may be omitted if the li element is
|
||||||
|
# immediately followed by another li element or if there is
|
||||||
|
# no more content in the parent element.
|
||||||
|
# An optgroup element's end tag may be omitted if the optgroup
|
||||||
|
# element is immediately followed by another optgroup element,
|
||||||
|
# or if there is no more content in the parent element.
|
||||||
|
# An option element's end tag may be omitted if the option
|
||||||
|
# element is immediately followed by another option element,
|
||||||
|
# or if there is no more content in the parent element.
|
||||||
|
# A tr element's end tag may be omitted if the tr element is
|
||||||
|
# immediately followed by another tr element, or if there is
|
||||||
|
# no more content in the parent element.
|
||||||
|
if type == :StartTag
|
||||||
|
return nexttok[:name] == tagname
|
||||||
|
else
|
||||||
|
return type == :EndTag || type == nil
|
||||||
|
end
|
||||||
|
elsif %w(dt dd).include?(tagname)
|
||||||
|
# A dt element's end tag may be omitted if the dt element is
|
||||||
|
# immediately followed by another dt element or a dd element.
|
||||||
|
# A dd element's end tag may be omitted if the dd element is
|
||||||
|
# immediately followed by another dd element or a dt element,
|
||||||
|
# or if there is no more content in the parent element.
|
||||||
|
if type == :StartTag
|
||||||
|
return %w(dt dd).include?(nexttok[:name])
|
||||||
|
elsif tagname == 'dd'
|
||||||
|
return type == :EndTag || type == nil
|
||||||
|
else
|
||||||
|
return false
|
||||||
|
end
|
||||||
|
elsif tagname == 'p'
|
||||||
|
# A p element's end tag may be omitted if the p element is
|
||||||
|
# immediately followed by an address, blockquote, dl, fieldset,
|
||||||
|
# form, h1, h2, h3, h4, h5, h6, hr, menu, ol, p, pre, table,
|
||||||
|
# or ul element, or if there is no more content in the parent
|
||||||
|
# element.
|
||||||
|
if type == :StartTag
|
||||||
|
return %w(address blockquote dl fieldset form h1 h2 h3 h4 h5
|
||||||
|
h6 hr menu ol p pre table ul).include?(nexttok[:name])
|
||||||
|
else
|
||||||
|
return type == :EndTag || type == nil
|
||||||
|
end
|
||||||
|
elsif tagname == 'colgroup'
|
||||||
|
# A colgroup element's end tag may be omitted if the colgroup
|
||||||
|
# element is not immediately followed by a space character or
|
||||||
|
# a comment.
|
||||||
|
if [:Comment, :SpaceCharacters].include?(type)
|
||||||
|
return false
|
||||||
|
elsif type == :StartTag
|
||||||
|
# XXX: we also look for an immediately following colgroup
|
||||||
|
# element. See is_optional_start.
|
||||||
|
return nexttok[:name] != 'colgroup'
|
||||||
|
else
|
||||||
|
return true
|
||||||
|
end
|
||||||
|
elsif %w(thead tbody).include? tagname
|
||||||
|
# A thead element's end tag may be omitted if the thead element
|
||||||
|
# is immediately followed by a tbody or tfoot element.
|
||||||
|
# A tbody element's end tag may be omitted if the tbody element
|
||||||
|
# is immediately followed by a tbody or tfoot element, or if
|
||||||
|
# there is no more content in the parent element.
|
||||||
|
# A tfoot element's end tag may be omitted if the tfoot element
|
||||||
|
# is immediately followed by a tbody element, or if there is no
|
||||||
|
# more content in the parent element.
|
||||||
|
# XXX: we never omit the end tag when the following element is
|
||||||
|
# a tbody. See is_optional_start.
|
||||||
|
if type == :StartTag
|
||||||
|
return %w(tbody tfoot).include?(nexttok[:name])
|
||||||
|
elsif tagname == 'tbody'
|
||||||
|
return (type == :EndTag or type == nil)
|
||||||
|
else
|
||||||
|
return false
|
||||||
|
end
|
||||||
|
elsif tagname == 'tfoot'
|
||||||
|
# A tfoot element's end tag may be omitted if the tfoot element
|
||||||
|
# is immediately followed by a tbody element, or if there is no
|
||||||
|
# more content in the parent element.
|
||||||
|
# XXX: we never omit the end tag when the following element is
|
||||||
|
# a tbody. See is_optional_start.
|
||||||
|
if type == :StartTag
|
||||||
|
return nexttok[:name] == 'tbody'
|
||||||
|
else
|
||||||
|
return type == :EndTag || type == nil
|
||||||
|
end
|
||||||
|
elsif %w(td th).include? tagname
|
||||||
|
# A td element's end tag may be omitted if the td element is
|
||||||
|
# immediately followed by a td or th element, or if there is
|
||||||
|
# no more content in the parent element.
|
||||||
|
# A th element's end tag may be omitted if the th element is
|
||||||
|
# immediately followed by a td or th element, or if there is
|
||||||
|
# no more content in the parent element.
|
||||||
|
if type == :StartTag
|
||||||
|
return %w(td th).include?(nexttok[:name])
|
||||||
|
else
|
||||||
|
return type == :EndTag || type == nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
15
vendor/plugins/HTML5lib/lib/html5lib/filters/sanitizer.rb
vendored
Normal file
15
vendor/plugins/HTML5lib/lib/html5lib/filters/sanitizer.rb
vendored
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
require 'html5lib/filters/base'
|
||||||
|
require 'html5lib/sanitizer'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
module Filters
|
||||||
|
class HTMLSanitizeFilter < Base
|
||||||
|
include HTMLSanitizeModule
|
||||||
|
def each
|
||||||
|
__getobj__.each do |token|
|
||||||
|
yield(sanitize_token(token))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
36
vendor/plugins/HTML5lib/lib/html5lib/filters/whitespace.rb
vendored
Normal file
36
vendor/plugins/HTML5lib/lib/html5lib/filters/whitespace.rb
vendored
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
require 'html5lib/constants'
|
||||||
|
require 'html5lib/filters/base'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
module Filters
|
||||||
|
class WhitespaceFilter < Base
|
||||||
|
|
||||||
|
SPACE_PRESERVE_ELEMENTS = %w[pre textarea] + RCDATA_ELEMENTS
|
||||||
|
SPACES = /[#{SPACE_CHARACTERS.join('')}]+/m
|
||||||
|
|
||||||
|
def each
|
||||||
|
preserve = 0
|
||||||
|
__getobj__.each do |token|
|
||||||
|
case token[:type]
|
||||||
|
when :StartTag
|
||||||
|
if preserve > 0 or SPACE_PRESERVE_ELEMENTS.include?(token[:name])
|
||||||
|
preserve += 1
|
||||||
|
end
|
||||||
|
|
||||||
|
when :EndTag
|
||||||
|
preserve -= 1 if preserve > 0
|
||||||
|
|
||||||
|
when :SpaceCharacters
|
||||||
|
next if preserve == 0
|
||||||
|
|
||||||
|
when :Characters
|
||||||
|
token[:data] = token[:data].sub(SPACES,' ') if preserve == 0
|
||||||
|
end
|
||||||
|
|
||||||
|
yield token
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
require 'cgi'
|
require 'cgi'
|
||||||
require 'html5lib/filters'
|
|
||||||
|
|
||||||
module HTML5lib
|
module HTML5lib
|
||||||
|
|
||||||
|
@ -176,15 +175,6 @@ module HTML5lib
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class HTMLSanitizeFilter < Filters::Base
|
|
||||||
include HTMLSanitizeModule
|
|
||||||
def each
|
|
||||||
__getobj__.each do |token|
|
|
||||||
yield(sanitize_token(token))
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class HTMLSanitizer < HTMLTokenizer
|
class HTMLSanitizer < HTMLTokenizer
|
||||||
include HTMLSanitizeModule
|
include HTMLSanitizeModule
|
||||||
def each
|
def each
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
require 'html5lib/constants'
|
require 'html5lib/constants'
|
||||||
require 'html5lib/filters'
|
|
||||||
|
|
||||||
module HTML5lib
|
module HTML5lib
|
||||||
|
|
||||||
|
@ -7,7 +6,7 @@ module HTML5lib
|
||||||
CDATA_ELEMENTS = %w[style script xmp iframe noembed noframes noscript]
|
CDATA_ELEMENTS = %w[style script xmp iframe noembed noframes noscript]
|
||||||
|
|
||||||
def self.serialize(stream, options = {})
|
def self.serialize(stream, options = {})
|
||||||
new(options).serialize(stream)
|
new(options).serialize(stream, options[:encoding])
|
||||||
end
|
end
|
||||||
|
|
||||||
def initialize(options={})
|
def initialize(options={})
|
||||||
|
@ -40,20 +39,25 @@ module HTML5lib
|
||||||
|
|
||||||
def serialize(treewalker, encoding=nil)
|
def serialize(treewalker, encoding=nil)
|
||||||
in_cdata = false
|
in_cdata = false
|
||||||
|
@errors = []
|
||||||
|
|
||||||
|
|
||||||
@errors = []
|
|
||||||
if encoding and @inject_meta_charset
|
if encoding and @inject_meta_charset
|
||||||
treewalker = filter_inject_meta_charset(treewalker, encoding)
|
require 'html5lib/filters/inject_meta_charset'
|
||||||
|
treewalker = Filters::InjectMetaCharset.new(treewalker, encoding)
|
||||||
end
|
end
|
||||||
|
|
||||||
if @strip_whitespace
|
if @strip_whitespace
|
||||||
treewalker = filter_whitespace(treewalker)
|
require 'html5lib/filters/whitespace'
|
||||||
|
treewalker = Filters::WhitespaceFilter.new(treewalker)
|
||||||
end
|
end
|
||||||
|
|
||||||
if @sanitize
|
if @sanitize
|
||||||
require 'html5lib/sanitizer'
|
require 'html5lib/filters/sanitizer'
|
||||||
treewalker = HTMLSanitizeFilter.new(treewalker)
|
treewalker = Filters::HTMLSanitizeFilter.new(treewalker)
|
||||||
end
|
end
|
||||||
|
|
||||||
if @omit_optional_tags
|
if @omit_optional_tags
|
||||||
|
require 'html5lib/filters/optionaltags'
|
||||||
treewalker = Filters::OptionalTagFilter.new(treewalker)
|
treewalker = Filters::OptionalTagFilter.new(treewalker)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -62,25 +66,14 @@ module HTML5lib
|
||||||
type = token[:type]
|
type = token[:type]
|
||||||
if type == :Doctype
|
if type == :Doctype
|
||||||
doctype = "<!DOCTYPE %s>" % token[:name]
|
doctype = "<!DOCTYPE %s>" % token[:name]
|
||||||
if encoding
|
result << doctype
|
||||||
result << doctype.encode(encoding)
|
|
||||||
else
|
|
||||||
result << doctype
|
|
||||||
end
|
|
||||||
|
|
||||||
elsif [:Characters, :SpaceCharacters].include? type
|
elsif [:Characters, :SpaceCharacters].include? type
|
||||||
if type == :SpaceCharacters or in_cdata
|
if type == :SpaceCharacters or in_cdata
|
||||||
if in_cdata and token[:data].include?("</")
|
if in_cdata and token[:data].include?("</")
|
||||||
serializeError(_("Unexpected </ in CDATA"))
|
serializeError(_("Unexpected </ in CDATA"))
|
||||||
end
|
end
|
||||||
if encoding
|
result << token[:data]
|
||||||
result << token[:data].encode(encoding, errors || "strict")
|
|
||||||
else
|
|
||||||
result << token[:data]
|
|
||||||
end
|
|
||||||
elsif encoding
|
|
||||||
result << token[:data].replace("&", "&").
|
|
||||||
encode(encoding, unicode_encode_errors)
|
|
||||||
else
|
else
|
||||||
result << token[:data].
|
result << token[:data].
|
||||||
gsub("&", "&").
|
gsub("&", "&").
|
||||||
|
@ -97,7 +90,6 @@ module HTML5lib
|
||||||
end
|
end
|
||||||
attributes = []
|
attributes = []
|
||||||
for k,v in attrs = token[:data].to_a.sort
|
for k,v in attrs = token[:data].to_a.sort
|
||||||
k = k.encode(encoding) if encoding
|
|
||||||
attributes << ' '
|
attributes << ' '
|
||||||
|
|
||||||
attributes << k
|
attributes << k
|
||||||
|
@ -111,9 +103,6 @@ module HTML5lib
|
||||||
quote_attr = (SPACE_CHARACTERS + %w(< > " ')).any? {|c| v.include?(c)}
|
quote_attr = (SPACE_CHARACTERS + %w(< > " ')).any? {|c| v.include?(c)}
|
||||||
end
|
end
|
||||||
v = v.gsub("&", "&")
|
v = v.gsub("&", "&")
|
||||||
if encoding
|
|
||||||
v = v.encode(encoding, unicode_encode_errors)
|
|
||||||
end
|
|
||||||
if quote_attr
|
if quote_attr
|
||||||
quote_char = @quote_char
|
quote_char = @quote_char
|
||||||
if @use_best_quote_char
|
if @use_best_quote_char
|
||||||
|
@ -141,11 +130,7 @@ module HTML5lib
|
||||||
attributes << "/"
|
attributes << "/"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
if encoding
|
result << "<%s%s>" % [name, attributes.join('')]
|
||||||
result << "<%s%s>" % [name.encode(encoding), attributes.join('')]
|
|
||||||
else
|
|
||||||
result << "<%s%s>" % [name, attributes.join('')]
|
|
||||||
end
|
|
||||||
|
|
||||||
elsif type == :EndTag
|
elsif type == :EndTag
|
||||||
name = token[:name]
|
name = token[:name]
|
||||||
|
@ -155,33 +140,29 @@ module HTML5lib
|
||||||
serializeError(_("Unexpected child element of a CDATA element"))
|
serializeError(_("Unexpected child element of a CDATA element"))
|
||||||
end
|
end
|
||||||
end_tag = "</#{name}>"
|
end_tag = "</#{name}>"
|
||||||
end_tag = end_tag.encode(encoding) if encoding
|
|
||||||
result << end_tag
|
result << end_tag
|
||||||
|
|
||||||
elsif type == :Comment
|
elsif type == :Comment
|
||||||
data = token[:data]
|
data = token[:data]
|
||||||
serializeError(_("Comment contains --")) if data.index("--")
|
serializeError(_("Comment contains --")) if data.index("--")
|
||||||
comment = "<!--%s-->" % token[:data]
|
comment = "<!--%s-->" % token[:data]
|
||||||
if encoding
|
|
||||||
comment = comment.encode(encoding, unicode_encode_errors)
|
|
||||||
end
|
|
||||||
result << comment
|
result << comment
|
||||||
|
|
||||||
else
|
else
|
||||||
serializeError(token[:data])
|
serializeError(token[:data])
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
result.join('')
|
|
||||||
end
|
|
||||||
|
|
||||||
def render(treewalker, encoding=nil)
|
if encoding and encoding != 'utf-8'
|
||||||
if encoding
|
require 'iconv'
|
||||||
return "".join(list(serialize(treewalker, encoding)))
|
Iconv.iconv(encoding, 'utf-8', result.join('')).first
|
||||||
else
|
else
|
||||||
return "".join(list(serialize(treewalker)))
|
result.join('')
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
alias :render :serialize
|
||||||
|
|
||||||
def serializeError(data="XXX ERROR MESSAGE NEEDED")
|
def serializeError(data="XXX ERROR MESSAGE NEEDED")
|
||||||
# XXX The idea is to make data mandatory.
|
# XXX The idea is to make data mandatory.
|
||||||
@errors.push(data)
|
@errors.push(data)
|
||||||
|
@ -189,22 +170,6 @@ module HTML5lib
|
||||||
raise SerializeError
|
raise SerializeError
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def filter_inject_meta_charset(treewalker, encoding)
|
|
||||||
done = false
|
|
||||||
for token in treewalker
|
|
||||||
if not done and token[:type] == :StartTag \
|
|
||||||
and token[:name].lower() == "head"
|
|
||||||
yield({:type => :EmptyTag, :name => "meta", \
|
|
||||||
:data => {"charset" => encoding}})
|
|
||||||
end
|
|
||||||
yield token
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def filter_whitespace(treewalker)
|
|
||||||
raise NotImplementedError
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
# Error in serialized tree
|
# Error in serialized tree
|
||||||
|
|
|
@ -27,13 +27,13 @@ module TokenConstructor
|
||||||
end
|
end
|
||||||
|
|
||||||
def text(data)
|
def text(data)
|
||||||
if data =~ /^([#{SPACE_CHARACTERS.join('')}]+)/
|
if data =~ /\A([#{SPACE_CHARACTERS.join('')}]+)/m
|
||||||
yield({:type => :SpaceCharacters, :data => $1})
|
yield({:type => :SpaceCharacters, :data => $1})
|
||||||
data = data[$1.length .. -1]
|
data = data[$1.length .. -1]
|
||||||
return if data.empty?
|
return if data.empty?
|
||||||
end
|
end
|
||||||
|
|
||||||
if data =~ /([#{SPACE_CHARACTERS.join('')}]+)$/
|
if data =~ /([#{SPACE_CHARACTERS.join('')}]+)\Z/m
|
||||||
yield({:type => :Characters, :data => data[0 ... -$1.length]})
|
yield({:type => :Characters, :data => data[0 ... -$1.length]})
|
||||||
yield({:type => :SpaceCharacters, :data => $1})
|
yield({:type => :SpaceCharacters, :data => $1})
|
||||||
else
|
else
|
||||||
|
|
41
vendor/plugins/HTML5lib/parse.rb
vendored
41
vendor/plugins/HTML5lib/parse.rb
vendored
|
@ -59,7 +59,7 @@ def printOutput(parser, document, opts)
|
||||||
require 'html5lib/treewalkers'
|
require 'html5lib/treewalkers'
|
||||||
tokens = HTML5lib::TreeWalkers[opts.treebuilder].new(document)
|
tokens = HTML5lib::TreeWalkers[opts.treebuilder].new(document)
|
||||||
require 'html5lib/serializer'
|
require 'html5lib/serializer'
|
||||||
print HTML5lib::HTMLSerializer.serialize(tokens, :encoding=>'utf-8')
|
print HTML5lib::HTMLSerializer.serialize(tokens, opts.serializer)
|
||||||
when :hilite
|
when :hilite
|
||||||
print document.hilite
|
print document.hilite
|
||||||
when :tree
|
when :tree
|
||||||
|
@ -80,11 +80,16 @@ require 'ostruct'
|
||||||
options = OpenStruct.new
|
options = OpenStruct.new
|
||||||
options.profile = false
|
options.profile = false
|
||||||
options.time = false
|
options.time = false
|
||||||
options.output = :tree
|
options.output = :html
|
||||||
options.treebuilder = 'simpletree'
|
options.treebuilder = 'simpletree'
|
||||||
options.error = false
|
options.error = false
|
||||||
options.encoding = false
|
options.encoding = false
|
||||||
options.parsemethod = :parse
|
options.parsemethod = :parse
|
||||||
|
options.serializer = {
|
||||||
|
:encoding => 'utf-8',
|
||||||
|
:omit_optional_tags => false,
|
||||||
|
:inject_meta_charset => false
|
||||||
|
}
|
||||||
|
|
||||||
require 'optparse'
|
require 'optparse'
|
||||||
opts = OptionParser.new do |opts|
|
opts = OptionParser.new do |opts|
|
||||||
|
@ -96,14 +101,6 @@ opts = OptionParser.new do |opts|
|
||||||
options.time = time
|
options.time = time
|
||||||
end
|
end
|
||||||
|
|
||||||
opts.on("--[no-]tree", "Do not print output tree") do |tree|
|
|
||||||
if tree
|
|
||||||
options.output = :tree
|
|
||||||
else
|
|
||||||
options.output = nil
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("-b", "--treebuilder NAME") do |treebuilder|
|
opts.on("-b", "--treebuilder NAME") do |treebuilder|
|
||||||
options.treebuilder = treebuilder
|
options.treebuilder = treebuilder
|
||||||
end
|
end
|
||||||
|
@ -116,13 +113,17 @@ opts = OptionParser.new do |opts|
|
||||||
options.parsemethod = :parseFragment
|
options.parsemethod = :parseFragment
|
||||||
end
|
end
|
||||||
|
|
||||||
|
opts.on("--tree", "output as debug tree") do |tree|
|
||||||
|
options.output = :tree
|
||||||
|
end
|
||||||
|
|
||||||
opts.on("-x", "--xml", "output as xml") do |xml|
|
opts.on("-x", "--xml", "output as xml") do |xml|
|
||||||
options.output = :xml
|
options.output = :xml
|
||||||
options.treebuilder = "rexml"
|
options.treebuilder = "rexml"
|
||||||
end
|
end
|
||||||
|
|
||||||
opts.on("--html", "Output as html") do |html|
|
opts.on("--[no-]html", "Output as html") do |html|
|
||||||
options.output = :html
|
options.output = (html ? :html : nil)
|
||||||
end
|
end
|
||||||
|
|
||||||
opts.on("--hilite", "Output as formatted highlighted code.") do |hilite|
|
opts.on("--hilite", "Output as formatted highlighted code.") do |hilite|
|
||||||
|
@ -133,6 +134,22 @@ opts = OptionParser.new do |opts|
|
||||||
options.encoding = encoding
|
options.encoding = encoding
|
||||||
end
|
end
|
||||||
|
|
||||||
|
opts.on("--[no-]inject-meta-charset", "inject <meta charset>") do |inject|
|
||||||
|
options.serializer[:inject_meta_charset] = inject
|
||||||
|
end
|
||||||
|
|
||||||
|
opts.on("--[no-]strip-whitespace", "strip unnecessary whitespace") do |strip|
|
||||||
|
options.serializer[:strip_whitespace] = strip
|
||||||
|
end
|
||||||
|
|
||||||
|
opts.on("--[no-]sanitize", "escape unsafe tags") do |sanitize|
|
||||||
|
options.serializer[:sanitize] = sanitize
|
||||||
|
end
|
||||||
|
|
||||||
|
opts.on("--[no-]omit-optional-tags", "Omit optional tags") do |omit|
|
||||||
|
options.serializer[:omit_optional_tags] = omit
|
||||||
|
end
|
||||||
|
|
||||||
opts.on_tail("-h", "--help", "Show this message") do
|
opts.on_tail("-h", "--help", "Show this message") do
|
||||||
puts opts
|
puts opts
|
||||||
exit
|
exit
|
||||||
|
|
39
vendor/plugins/HTML5lib/testdata/serializer/injectmeta.test
vendored
Normal file
39
vendor/plugins/HTML5lib/testdata/serializer/injectmeta.test
vendored
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
{"tests": [
|
||||||
|
|
||||||
|
{"description": "no encoding",
|
||||||
|
"options": {"inject_meta_charset": true},
|
||||||
|
"input": [["EmptyTag", "head", {}]],
|
||||||
|
"expected": ["<head>"]
|
||||||
|
},
|
||||||
|
|
||||||
|
{"description": "empytag head",
|
||||||
|
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||||
|
"input": [["EmptyTag", "head", {}]],
|
||||||
|
"expected": ["<head><meta charset=utf-8>"]
|
||||||
|
},
|
||||||
|
|
||||||
|
{"description": "head w/title",
|
||||||
|
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||||
|
"input": [["StartTag", "head", {}], ["StartTag","title",{}], ["Characters", "foo"],["EndTag", "title"], ["EndTag", "head"]],
|
||||||
|
"expected": ["<head><meta charset=utf-8><title>foo</title>"]
|
||||||
|
},
|
||||||
|
|
||||||
|
{"description": "head w/meta-charset",
|
||||||
|
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||||
|
"input": [["StartTag", "head", {}], ["EmptyTag","meta",{"charset":"ascii"}], ["EndTag", "head"]],
|
||||||
|
"expected": ["<head><meta charset=utf-8>"]
|
||||||
|
},
|
||||||
|
|
||||||
|
{"description": "head w/robots",
|
||||||
|
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||||
|
"input": [["StartTag", "head", {}], ["EmptyTag","meta",{"name":"robots","content":"noindex"}], ["EndTag", "head"]],
|
||||||
|
"expected": ["<head><meta charset=utf-8><meta content=noindex name=robots>"]
|
||||||
|
},
|
||||||
|
|
||||||
|
{"description": "head w/robots & charset",
|
||||||
|
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||||
|
"input": [["StartTag", "head", {}], ["EmptyTag","meta",{"name":"robots","content":"noindex"}], ["EmptyTag","meta",{"charset":"ascii"}], ["EndTag", "head"]],
|
||||||
|
"expected": ["<head><meta content=noindex name=robots><meta charset=utf-8>"]
|
||||||
|
}
|
||||||
|
|
||||||
|
]}
|
|
@ -24,7 +24,7 @@ class JsonWalker < HTML5lib::TreeWalkers::Base
|
||||||
when 'Doctype'
|
when 'Doctype'
|
||||||
yield doctype(token[1])
|
yield doctype(token[1])
|
||||||
else
|
else
|
||||||
raise ValueError("Unknown token type: " + type)
|
raise "Unknown token type: " + token[0]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -37,7 +37,10 @@ class Html5SerializeTestcase < Test::Unit::TestCase
|
||||||
tests['tests'].each_with_index do |test, index|
|
tests['tests'].each_with_index do |test, index|
|
||||||
|
|
||||||
define_method "test_#{test_name}_#{index+1}" do
|
define_method "test_#{test_name}_#{index+1}" do
|
||||||
next if test_name == 'whitespace' #TODO
|
if test["options"] and test["options"]["encoding"]
|
||||||
|
test["options"][:encoding] = test["options"]["encoding"]
|
||||||
|
end
|
||||||
|
|
||||||
result = HTML5lib::HTMLSerializer.
|
result = HTML5lib::HTMLSerializer.
|
||||||
serialize(JsonWalker.new(test["input"]), (test["options"] || {}))
|
serialize(JsonWalker.new(test["input"]), (test["options"] || {}))
|
||||||
expected = test["expected"]
|
expected = test["expected"]
|
||||||
|
|
|
@ -157,6 +157,7 @@ Example:
|
||||||
# Render to an HTML fragment (returns a REXML document tree)
|
# Render to an HTML fragment (returns a REXML document tree)
|
||||||
def to_html_tree
|
def to_html_tree
|
||||||
div = Element.new 'div'
|
div = Element.new 'div'
|
||||||
|
div.attributes['class'] = 'maruku_wrapper_div'
|
||||||
children_to_html.each do |e|
|
children_to_html.each do |e|
|
||||||
div << e
|
div << e
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue