Nasty!
How did a well-formedness bug creep into the code? I *swear* this used to work.
This commit is contained in:
parent
1bef71bbf1
commit
3843fa608d
|
@ -2329,24 +2329,16 @@ end
|
||||||
when /\Aquot\z/ni then '"'
|
when /\Aquot\z/ni then '"'
|
||||||
when /\Aapos\z/ni then "'"
|
when /\Aapos\z/ni then "'"
|
||||||
when /\A#0*(\d+)\z/n then
|
when /\A#0*(\d+)\z/n then
|
||||||
if Integer($1) < 256
|
if Integer($1) < 1114111
|
||||||
Integer($1).chr
|
[Integer($1)].pack("U")
|
||||||
else
|
else
|
||||||
if Integer($1) < 1114111
|
"&##{$1};"
|
||||||
[Integer($1)].pack("U")
|
|
||||||
else
|
|
||||||
"&##{$1};"
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
when /\A#x([0-9a-f]+)\z/ni then
|
when /\A#x([0-9a-f]+)\z/ni then
|
||||||
if $1.hex < 256
|
if $1.hex < 1114111
|
||||||
[$1.hex].pack("U")
|
[$1.hex].pack("U")
|
||||||
else
|
else
|
||||||
if $1.hex < 1114111
|
"&#x#{$1};"
|
||||||
[$1.hex].pack("U")
|
|
||||||
else
|
|
||||||
"&#x#{$1};"
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
"&#{match};"
|
"&#{match};"
|
||||||
|
|
|
@ -23,9 +23,9 @@ class SanitizerTest < Test::Unit::TestCase
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_sanitize_named_entities
|
def test_sanitize_named_entities
|
||||||
input = '<p>Greek &phis; φ, double-struck 𝔸, numeric 𝔸 ⁗, uppercase ™ <</p>'
|
input = '<p>Greek &phis; φ, double-struck 𝔸, numeric   𝔸 ⁗, uppercase ™ <</p>'
|
||||||
output = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric \360\235\224\270 \342\201\227, uppercase \342\204\242 <</p>"
|
output = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric \302\240 \302\240 \360\235\224\270 \342\201\227, uppercase \342\204\242 <</p>"
|
||||||
output2 = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric 𝔸 ⁗, uppercase \342\204\242 <</p>"
|
output2 = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric \302\240   𝔸 ⁗, uppercase \342\204\242 <</p>"
|
||||||
check_sanitization(input, output, output, output)
|
check_sanitization(input, output, output, output)
|
||||||
assert_equal(output2, input.to_utf8.as_bytes)
|
assert_equal(output2, input.to_utf8.as_bytes)
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue