Performance
My REXML::Element.to_ncr (and REXML::Element.to_utf8) is horribly slow. For long documents, it proves more efficient to serialize to a string, apply String.to_ncr (or String.to_utf8) and then Sanitize the string.
This commit is contained in:
parent
0eb1ab56b0
commit
198d7847bd
5 changed files with 121 additions and 16 deletions
|
@ -680,10 +680,25 @@ class WikiControllerTest < Test::Unit::TestCase
|
|||
%
|
||||
% Unresolved issues:
|
||||
%
|
||||
% \binom{}{}
|
||||
%
|
||||
% \righttoleftarrow
|
||||
% \lefttorightarrow
|
||||
%
|
||||
% \color{} with HTML colorspec
|
||||
% \bgcolor
|
||||
% \array
|
||||
|
||||
% Of the standard HTML named colors, white, black, red, green, blue and yellow
|
||||
% are predefined in the color package. Here are the rest.
|
||||
\definecolor{aqua}{rgb}{0, 1.0, 1.0}
|
||||
\definecolor{fuschia}{rgb}{1.0, 0, 1.0}
|
||||
\definecolor{gray}{rgb}{0.502, 0.502, 0.502}
|
||||
\definecolor{lime}{rgb}{0, 1.0, 0}
|
||||
\definecolor{maroon}{rgb}{0.502, 0, 0}
|
||||
\definecolor{navy}{rgb}{0, 0, 0.502}
|
||||
\definecolor{olive}{rgb}{0.502, 0.502, 0}
|
||||
\definecolor{purple}{rgb}{0.502, 0, 0.502}
|
||||
\definecolor{silver}{rgb}{0.753, 0.753, 0.753}
|
||||
\definecolor{teal}{rgb}{0, 0.502, 0.502}
|
||||
|
||||
% Because of conflicts, \space and \mathop are converted to
|
||||
% \itexspace and \operatorname during preprocessing.
|
||||
|
@ -842,6 +857,8 @@ class WikiControllerTest < Test::Unit::TestCase
|
|||
\renewcommand{\scriptsize}{\scriptstyle}
|
||||
\newcommand{\scriptscriptsize}{\scriptscriptstyle}
|
||||
\newcommand{\mathfr}{\mathfrak}
|
||||
\newcommand{\statusline}[2]{#2}
|
||||
\newcommand{\toggle}[2]{#1}
|
||||
|
||||
%-------------------------------------------------------------------
|
||||
|
||||
|
|
|
@ -164,14 +164,14 @@ class PageRendererTest < Test::Unit::TestCase
|
|||
# wikiwords are invalid as styles, must be in "name: value" form
|
||||
def test_content_with_wikiword_in_style_tag
|
||||
assert_markup_parsed_as(
|
||||
"<p>That is some <em style=\"\">Stylish Emphasis</em></p>",
|
||||
"<p>That is some <em style=''>Stylish Emphasis</em></p>",
|
||||
'That is some <em style="WikiWord">Stylish Emphasis</em>')
|
||||
end
|
||||
|
||||
# validates format of style..
|
||||
def test_content_with_valid_style_in_style_tag
|
||||
assert_markup_parsed_as(
|
||||
"<p>That is some <em style=\"text-align: right;\">Stylish Emphasis</em></p>",
|
||||
"<p>That is some <em style='text-align: right;'>Stylish Emphasis</em></p>",
|
||||
'That is some <em style="text-align: right">Stylish Emphasis</em>')
|
||||
end
|
||||
|
||||
|
@ -199,24 +199,24 @@ class PageRendererTest < Test::Unit::TestCase
|
|||
|
||||
def test_content_with_link_in_parentheses
|
||||
assert_markup_parsed_as(
|
||||
"<p>(<a href=\"http://wiki.org/wiki.cgi?WhatIsWiki\">What is a wiki?</a>)</p>",
|
||||
"<p>(<a href='http://wiki.org/wiki.cgi?WhatIsWiki'>What is a wiki?</a>)</p>",
|
||||
'([What is a wiki?](http://wiki.org/wiki.cgi?WhatIsWiki))')
|
||||
end
|
||||
|
||||
def test_content_with_image_link
|
||||
assert_markup_parsed_as(
|
||||
"<p>This <img alt=\"\" src=\"http://hobix.com/sample.jpg\" /> is a Markdown image link.</p>",
|
||||
"<p>This <img src='http://hobix.com/sample.jpg' alt=''/> is a Markdown image link.</p>",
|
||||
'This  is a Markdown image link.')
|
||||
end
|
||||
|
||||
def test_content_with_inlined_img_tag
|
||||
assert_markup_parsed_as(
|
||||
"<p>This <img alt=\"\" src=\"http://hobix.com/sample.jpg\" /> is an inline image link.</p>",
|
||||
"<p>This <img src='http://hobix.com/sample.jpg' alt=''/> is an inline image link.</p>",
|
||||
'This <img src="http://hobix.com/sample.jpg" alt="" /> is an inline image link.')
|
||||
|
||||
# currently, upper case HTML elements are not allowed
|
||||
assert_markup_parsed_as(
|
||||
'<p>This <IMG SRC="http://hobix.com/sample.jpg" alt=""></IMG> is an inline image link.</p>',
|
||||
'<p>This <IMG SRC="http://hobix.com/sample.jpg" alt=""/> is an inline image link.</p>',
|
||||
'This <IMG SRC="http://hobix.com/sample.jpg" alt="" /> is an inline image link.')
|
||||
end
|
||||
|
||||
|
@ -361,7 +361,7 @@ class PageRendererTest < Test::Unit::TestCase
|
|||
EOL
|
||||
|
||||
assert_markup_parsed_as(
|
||||
"<ul>\n<li><a href=\"~b\">a</a></li>\n\n<li>c~ d</li>\n</ul>",
|
||||
"<ul>\n<li><a href='~b'>a</a></li>\n\n<li>c~ d</li>\n</ul>",
|
||||
list_with_tildas)
|
||||
end
|
||||
|
||||
|
|
32
test/unit/sanitize_test.rb
Normal file
32
test/unit/sanitize_test.rb
Normal file
|
@ -0,0 +1,32 @@
|
|||
#!/usr/bin/env ruby
|
||||
|
||||
require File.expand_path(File.dirname(__FILE__) + '/../test_helper')
|
||||
require 'sanitize'
|
||||
|
||||
class SanitizeTest < Test::Unit::TestCase
|
||||
|
||||
def setup
|
||||
|
||||
end
|
||||
|
||||
def rexml_doc(string)
|
||||
REXML::Document.new(
|
||||
"<div xmlns='http://www.w3.org/1999/xhtml'>#{string}</div>")
|
||||
end
|
||||
|
||||
def my_rex(string)
|
||||
sanitize_rexml(rexml_doc(string)).gsub(/\A<div xmlns="http:\/\/www.w3.org\/1999\/xhtml">(.*)<\/div>\Z/m, '\1')
|
||||
end
|
||||
|
||||
def test_sanitize_named_entities
|
||||
input = '<p>Greek φ, double-struck 𝔸, numeric 𝔸 ⁗</p>'
|
||||
output = "<p>Greek \317\225, double-struck \360\235\224\270, numeric \360\235\224\270 \342\201\227</p>"
|
||||
output2 = "<p>Greek \317\225, double-struck \360\235\224\270, numeric 𝔸 ⁗</p>"
|
||||
assert_equal(output, sanitize_xhtml(input))
|
||||
assert_equal(output, sanitize_html(input))
|
||||
assert_equal(output, my_rex(input))
|
||||
assert_equal(output2, input.to_utf8)
|
||||
end
|
||||
|
||||
|
||||
end
|
Loading…
Add table
Add a link
Reference in a new issue