diff --git a/lib/instiki_stringsupport.rb b/lib/instiki_stringsupport.rb index 5ea52a55..0c25bcd3 100644 --- a/lib/instiki_stringsupport.rb +++ b/lib/instiki_stringsupport.rb @@ -2329,24 +2329,16 @@ end when /\Aquot\z/ni then '"' when /\Aapos\z/ni then "'" when /\A#0*(\d+)\z/n then - if Integer($1) < 256 - Integer($1).chr + if Integer($1) < 1114111 + [Integer($1)].pack("U") else - if Integer($1) < 1114111 - [Integer($1)].pack("U") - else - "&##{$1};" - end + "&##{$1};" end when /\A#x([0-9a-f]+)\z/ni then - if $1.hex < 256 + if $1.hex < 1114111 [$1.hex].pack("U") else - if $1.hex < 1114111 - [$1.hex].pack("U") - else - "&#x#{$1};" - end + "&#x#{$1};" end else "&#{match};" diff --git a/test/unit/sanitizer_test.rb b/test/unit/sanitizer_test.rb index 7579674a..a5cea1bf 100644 --- a/test/unit/sanitizer_test.rb +++ b/test/unit/sanitizer_test.rb @@ -23,9 +23,9 @@ class SanitizerTest < Test::Unit::TestCase end def test_sanitize_named_entities - input = '

Greek &phis; φ, double-struck 𝔸, numeric 𝔸 ⁗, uppercase ™ <

' - output = "

Greek \317\225 \317\206, double-struck \360\235\224\270, numeric \360\235\224\270 \342\201\227, uppercase \342\204\242 <

" - output2 = "

Greek \317\225 \317\206, double-struck \360\235\224\270, numeric 𝔸 ⁗, uppercase \342\204\242 <

" + input = '

Greek &phis; φ, double-struck 𝔸, numeric     𝔸 ⁗, uppercase ™ <

' + output = "

Greek \317\225 \317\206, double-struck \360\235\224\270, numeric \302\240 \302\240 \360\235\224\270 \342\201\227, uppercase \342\204\242 <

" + output2 = "

Greek \317\225 \317\206, double-struck \360\235\224\270, numeric \302\240   𝔸 ⁗, uppercase \342\204\242 <

" check_sanitization(input, output, output, output) assert_equal(output2, input.to_utf8.as_bytes) end