How did a well-formedness bug creep into
the code? I *swear* this used to work.
This commit is contained in:
Jacques Distler 2011-02-18 12:39:19 -06:00
parent 1bef71bbf1
commit 3843fa608d
2 changed files with 8 additions and 16 deletions

View file

@ -2329,24 +2329,16 @@ end
when /\Aquot\z/ni then '"'
when /\Aapos\z/ni then "'"
when /\A#0*(\d+)\z/n then
if Integer($1) < 256
Integer($1).chr
if Integer($1) < 1114111
[Integer($1)].pack("U")
else
if Integer($1) < 1114111
[Integer($1)].pack("U")
else
"&##{$1};"
end
"&##{$1};"
end
when /\A#x([0-9a-f]+)\z/ni then
if $1.hex < 256
if $1.hex < 1114111
[$1.hex].pack("U")
else
if $1.hex < 1114111
[$1.hex].pack("U")
else
"&#x#{$1};"
end
"&#x#{$1};"
end
else
"&#{match};"

View file

@ -23,9 +23,9 @@ class SanitizerTest < Test::Unit::TestCase
end
def test_sanitize_named_entities
input = '<p>Greek &phis; &phi;, double-struck &Aopf;, numeric &#x1D538; &#8279;, uppercase &TRADE; &LT;</p>'
output = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric \360\235\224\270 \342\201\227, uppercase \342\204\242 &lt;</p>"
output2 = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric &#x1D538; &#8279;, uppercase \342\204\242 &lt;</p>"
input = '<p>Greek &phis; &phi;, double-struck &Aopf;, numeric &nbsp; &#xA0; &#x1D538; &#8279;, uppercase &TRADE; &LT;</p>'
output = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric \302\240 \302\240 \360\235\224\270 \342\201\227, uppercase \342\204\242 &lt;</p>"
output2 = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric \302\240 &#xA0; &#x1D538; &#8279;, uppercase \342\204\242 &lt;</p>"
check_sanitization(input, output, output, output)
assert_equal(output2, input.to_utf8.as_bytes)
end