Nasty!
How did a well-formedness bug creep into the code? I *swear* this used to work.
This commit is contained in:
parent
1bef71bbf1
commit
3843fa608d
|
@ -2329,25 +2329,17 @@ end
|
|||
when /\Aquot\z/ni then '"'
|
||||
when /\Aapos\z/ni then "'"
|
||||
when /\A#0*(\d+)\z/n then
|
||||
if Integer($1) < 256
|
||||
Integer($1).chr
|
||||
else
|
||||
if Integer($1) < 1114111
|
||||
[Integer($1)].pack("U")
|
||||
else
|
||||
"&##{$1};"
|
||||
end
|
||||
end
|
||||
when /\A#x([0-9a-f]+)\z/ni then
|
||||
if $1.hex < 256
|
||||
[$1.hex].pack("U")
|
||||
else
|
||||
if $1.hex < 1114111
|
||||
[$1.hex].pack("U")
|
||||
else
|
||||
"&#x#{$1};"
|
||||
end
|
||||
end
|
||||
else
|
||||
"&#{match};"
|
||||
end
|
||||
|
|
|
@ -23,9 +23,9 @@ class SanitizerTest < Test::Unit::TestCase
|
|||
end
|
||||
|
||||
def test_sanitize_named_entities
|
||||
input = '<p>Greek &phis; φ, double-struck 𝔸, numeric 𝔸 ⁗, uppercase ™ <</p>'
|
||||
output = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric \360\235\224\270 \342\201\227, uppercase \342\204\242 <</p>"
|
||||
output2 = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric 𝔸 ⁗, uppercase \342\204\242 <</p>"
|
||||
input = '<p>Greek &phis; φ, double-struck 𝔸, numeric   𝔸 ⁗, uppercase ™ <</p>'
|
||||
output = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric \302\240 \302\240 \360\235\224\270 \342\201\227, uppercase \342\204\242 <</p>"
|
||||
output2 = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric \302\240   𝔸 ⁗, uppercase \342\204\242 <</p>"
|
||||
check_sanitization(input, output, output, output)
|
||||
assert_equal(output2, input.to_utf8.as_bytes)
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue