Merge branch 'bzr/golem' of /Users/distler/Sites/code/instiki
This commit is contained in:
commit
a35921a90d
4 changed files with 68 additions and 41 deletions
|
@ -436,7 +436,7 @@ class String
|
|||
'plussim' => '⨦',
|
||||
'plustwo' => '⨧',
|
||||
'prod' => '∏',
|
||||
'race' => '⧚',
|
||||
'race' => '∽̱',
|
||||
'roplus' => '⨮',
|
||||
'rotimes' => '⨵',
|
||||
'rthree' => '⋌',
|
||||
|
@ -624,7 +624,7 @@ class String
|
|||
'iiota' => '℩',
|
||||
'image' => 'ℑ',
|
||||
'imath' => 'ı',
|
||||
'jmath' => 'j',
|
||||
'jmath' => 'ȷ',
|
||||
'laemptyv' => '⦴',
|
||||
'lltri' => '◺',
|
||||
'lrtri' => '⊿',
|
||||
|
@ -638,7 +638,7 @@ class String
|
|||
'range' => '⦥',
|
||||
'real' => 'ℜ',
|
||||
'tbrk' => '⎴',
|
||||
'trpezium' => '�',
|
||||
'trpezium' => '⏢',
|
||||
'ultri' => '◸',
|
||||
'urtri' => '◹',
|
||||
'vzigzag' => '⦚',
|
||||
|
@ -844,8 +844,10 @@ class String
|
|||
'nu' => 'ν',
|
||||
'Omega' => 'Ω',
|
||||
'omega' => 'ω',
|
||||
'phgr' => 'φ',
|
||||
'Phi' => 'Φ',
|
||||
'phi' => 'ϕ',
|
||||
'phi' => 'φ',
|
||||
'phis' => 'ϕ',
|
||||
'phiv' => 'φ',
|
||||
'Pi' => 'Π',
|
||||
'pi' => 'π',
|
||||
|
@ -1006,7 +1008,7 @@ class String
|
|||
'andv' => '⩚',
|
||||
'angrt' => '∟',
|
||||
'angsph' => '∢',
|
||||
'angst' => 'Å',
|
||||
'angst' => 'Å',
|
||||
'ap' => '≈',
|
||||
'apacir' => '⩯',
|
||||
'awconint' => '∳',
|
||||
|
@ -1036,7 +1038,7 @@ class String
|
|||
'dsol' => '⧶',
|
||||
'dtdot' => '⋱',
|
||||
'dwangle' => '⦦',
|
||||
'elinters' => '�',
|
||||
'elinters' => '⏧',
|
||||
'epar' => '⋕',
|
||||
'eparsl' => '⧣',
|
||||
'equiv' => '≡',
|
||||
|
@ -1063,13 +1065,13 @@ class String
|
|||
'isinsv' => '⋳',
|
||||
'isinv' => '∈',
|
||||
'lagran' => 'ℒ',
|
||||
'Lang' => '《',
|
||||
'lang' => '〈',
|
||||
'Lang' => '⟪',
|
||||
'lang' => '⟨',
|
||||
'lArr' => '⇐',
|
||||
'lbbrk' => '〔',
|
||||
'lbbrk' => '❲',
|
||||
'le' => '≤',
|
||||
'loang' => '〘',
|
||||
'lobrk' => '〚',
|
||||
'loang' => '⟬',
|
||||
'lobrk' => '⟦',
|
||||
'lopar' => '⦅',
|
||||
'lowast' => '∗',
|
||||
'minus' => '−',
|
||||
|
@ -1123,12 +1125,12 @@ class String
|
|||
'qprime' => '⁗',
|
||||
'quatint' => '⨖',
|
||||
'radic' => '√',
|
||||
'Rang' => '》',
|
||||
'rang' => '〉',
|
||||
'Rang' => '⟫',
|
||||
'rang' => '⟩',
|
||||
'rArr' => '⇒',
|
||||
'rbbrk' => '〕',
|
||||
'roang' => '〙',
|
||||
'robrk' => '〛',
|
||||
'rbbrk' => '❳',
|
||||
'roang' => '⟭',
|
||||
'robrk' => '⟧',
|
||||
'ropar' => '⦆',
|
||||
'rppolint' => '⨒',
|
||||
'scpolint' => '⨓',
|
||||
|
@ -1529,7 +1531,7 @@ class String
|
|||
'nbsp' => ' ',
|
||||
'not' => '¬',
|
||||
'num' => '#',
|
||||
'ohm' => 'Ω',
|
||||
'ohm' => 'Ω',
|
||||
'ordf' => 'ª',
|
||||
'ordm' => 'º',
|
||||
'para' => '¶',
|
||||
|
@ -1590,6 +1592,7 @@ class String
|
|||
'fflig' => 'ff',
|
||||
'ffllig' => 'ffl',
|
||||
'filig' => 'fi',
|
||||
'fjlig' => 'fj',
|
||||
'flat' => '♭',
|
||||
'fllig' => 'fl',
|
||||
'frac13' => '⅓',
|
||||
|
@ -1714,9 +1717,9 @@ class String
|
|||
'NotSucceedsTilde' => '≿̸',
|
||||
'oopf' => '𝕠',
|
||||
'OverBar' => '¯',
|
||||
'OverBrace' => '︷',
|
||||
'OverBrace' => '⏞',
|
||||
'OverBracket' => '⎴',
|
||||
'OverParenthesis' => '︵',
|
||||
'OverParenthesis' => '⏜',
|
||||
'planckh' => 'ℎ',
|
||||
'popf' => '𝕡',
|
||||
'Product' => '∏',
|
||||
|
@ -1738,9 +1741,9 @@ class String
|
|||
'ThickSpace' => '   ',
|
||||
'topf' => '𝕥',
|
||||
'UnderBar' => '̲',
|
||||
'UnderBrace' => '︸',
|
||||
'UnderBrace' => '⏟',
|
||||
'UnderBracket' => '⎵',
|
||||
'UnderParenthesis' => '︶',
|
||||
'UnderParenthesis' => '⏝',
|
||||
'uopf' => '𝕦',
|
||||
'UpArrowBar' => '⤒',
|
||||
'Upsilon' => 'Υ',
|
||||
|
@ -1944,11 +1947,11 @@ class String
|
|||
'intprod' => '⨼',
|
||||
'InvisibleComma' => '⁣',
|
||||
'InvisibleTimes' => '⁢',
|
||||
'langle' => '〈',
|
||||
'langle' => '⟨',
|
||||
'Laplacetrf' => 'ℒ',
|
||||
'lbrace' => '{',
|
||||
'lbrack' => '[',
|
||||
'LeftAngleBracket' => '〈',
|
||||
'LeftAngleBracket' => '⟨',
|
||||
'LeftArrow' => '←',
|
||||
'Leftarrow' => '⇐',
|
||||
'leftarrow' => '←',
|
||||
|
@ -1956,7 +1959,7 @@ class String
|
|||
'LeftArrowRightArrow' => '⇆',
|
||||
'leftarrowtail' => '↢',
|
||||
'LeftCeiling' => '⌈',
|
||||
'LeftDoubleBracket' => '〚',
|
||||
'LeftDoubleBracket' => '⟦',
|
||||
'LeftDownVector' => '⇃',
|
||||
'LeftFloor' => '⌊',
|
||||
'leftharpoondown' => '↽',
|
||||
|
@ -2136,7 +2139,7 @@ class String
|
|||
'propto' => '∝',
|
||||
'quaternions' => 'ℍ',
|
||||
'questeq' => '≟',
|
||||
'rangle' => '〉',
|
||||
'rangle' => '⟩',
|
||||
'rationals' => 'ℚ',
|
||||
'rbrace' => '}',
|
||||
'rbrack' => ']',
|
||||
|
@ -2147,7 +2150,7 @@ class String
|
|||
'ReverseElement' => '∋',
|
||||
'ReverseEquilibrium' => '⇋',
|
||||
'ReverseUpEquilibrium' => '⥯',
|
||||
'RightAngleBracket' => '〉',
|
||||
'RightAngleBracket' => '⟩',
|
||||
'RightArrow' => '→',
|
||||
'Rightarrow' => '⇒',
|
||||
'rightarrow' => '→',
|
||||
|
@ -2155,7 +2158,7 @@ class String
|
|||
'RightArrowLeftArrow' => '⇄',
|
||||
'rightarrowtail' => '↣',
|
||||
'RightCeiling' => '⌉',
|
||||
'RightDoubleBracket' => '〛',
|
||||
'RightDoubleBracket' => '⟧',
|
||||
'RightDownVector' => '⇂',
|
||||
'RightFloor' => '⌋',
|
||||
'rightharpoondown' => '⇁',
|
||||
|
@ -2299,7 +2302,14 @@ class String
|
|||
'wedge' => '∧',
|
||||
'wp' => '℘',
|
||||
'wr' => '≀',
|
||||
'zeetrf' => 'ℨ'
|
||||
'zeetrf' => 'ℨ',
|
||||
'AMP' => '&',
|
||||
'COPY' => '©',
|
||||
'GT' => '>',
|
||||
'LT' => '<',
|
||||
'QUOT' => '"',
|
||||
'REG' => '®',
|
||||
'TRADE' => '™'
|
||||
} unless const_defined? "MATHML_ENTITIES"
|
||||
#:startdoc:
|
||||
|
||||
|
@ -2363,8 +2373,8 @@ class String
|
|||
end
|
||||
|
||||
def convert_to_utf8 #:nodoc:
|
||||
if self =~ /^(lt|gt|amp|quot|apos)$/
|
||||
self.replace "&" + self + ";"
|
||||
if self =~ /^(lt|gt|amp|quot|apos)$/i
|
||||
self.replace "&" + self.downcase + ";"
|
||||
elsif MATHML_ENTITIES.has_key?(self)
|
||||
self.replace MATHML_ENTITIES[self].split(';').collect {|s| s.gsub(/^&#x([A-F0-9]+)$/, '\1').hex }.pack('U*')
|
||||
else
|
||||
|
|
|
@ -313,7 +313,7 @@ class String
|
|||
'plussim' => '⨦',
|
||||
'plustwo' => '⨧',
|
||||
'prod' => '∏',
|
||||
'race' => '⧚',
|
||||
'race' => '∽̱',
|
||||
'roplus' => '⨮',
|
||||
'rotimes' => '⨵',
|
||||
'rthree' => '⋌',
|
||||
|
@ -721,8 +721,10 @@ class String
|
|||
'nu' => 'ν',
|
||||
'Omega' => 'Ω',
|
||||
'omega' => 'ω',
|
||||
'phgr' => 'φ',
|
||||
'Phi' => 'Φ',
|
||||
'phi' => 'ϕ',
|
||||
'phi' => 'φ',
|
||||
'phis' => 'ϕ',
|
||||
'phiv' => 'φ',
|
||||
'Pi' => 'Π',
|
||||
'pi' => 'π',
|
||||
|
@ -883,7 +885,7 @@ class String
|
|||
'andv' => '⩚',
|
||||
'angrt' => '∟',
|
||||
'angsph' => '∢',
|
||||
'angst' => 'Å',
|
||||
'angst' => 'Å',
|
||||
'ap' => '≈',
|
||||
'apacir' => '⩯',
|
||||
'awconint' => '∳',
|
||||
|
@ -1406,7 +1408,7 @@ class String
|
|||
'nbsp' => ' ',
|
||||
'not' => '¬',
|
||||
'num' => '#',
|
||||
'ohm' => 'Ω',
|
||||
'ohm' => 'Ω',
|
||||
'ordf' => 'ª',
|
||||
'ordm' => 'º',
|
||||
'para' => '¶',
|
||||
|
@ -2177,7 +2179,14 @@ class String
|
|||
'wedge' => '∧',
|
||||
'wp' => '℘',
|
||||
'wr' => '≀',
|
||||
'zeetrf' => 'ℨ'
|
||||
'zeetrf' => 'ℨ',
|
||||
'AMP' => '&',
|
||||
'COPY' => '©',
|
||||
'GT' => '>',
|
||||
'LT' => '<',
|
||||
'QUOT' => '"',
|
||||
'REG' => '®',
|
||||
'TRADE' => '™'
|
||||
}
|
||||
#:startdoc:
|
||||
|
||||
|
@ -2286,8 +2295,8 @@ class String
|
|||
end
|
||||
|
||||
def convert_to_utf8 #:nodoc:
|
||||
if self =~ /^(lt|gt|amp|quot|apos)$/
|
||||
self.replace "&" + self + ";"
|
||||
if self =~ /^(lt|gt|amp|quot|apos)$/i
|
||||
self.replace "&" + self.downcase + ";"
|
||||
elsif MATHML_ENTITIES.has_key?(self)
|
||||
self.replace MATHML_ENTITIES[self].split(';').collect {|s| s.gsub(/^&#x([A-F0-9]+)$/, '\1').hex }.pack('U*')
|
||||
else
|
||||
|
|
|
@ -27,13 +27,13 @@ class SanitizeTest < Test::Unit::TestCase
|
|||
end
|
||||
|
||||
def my_rex(string)
|
||||
sanitize_rexml(rexml_doc(string)).gsub(/\A<div xmlns="http:\/\/www.w3.org\/1999\/xhtml">(.*)<\/div>\Z/m, '\1')
|
||||
sanitize_rexml(rexml_doc(string.to_utf8)).gsub(/\A<div xmlns="http:\/\/www.w3.org\/1999\/xhtml">(.*)<\/div>\Z/m, '\1')
|
||||
end
|
||||
|
||||
def test_sanitize_named_entities
|
||||
input = '<p>Greek φ, double-struck 𝔸, numeric 𝔸 ⁗</p>'
|
||||
output = "<p>Greek \317\225, double-struck \360\235\224\270, numeric \360\235\224\270 \342\201\227</p>"
|
||||
output2 = "<p>Greek \317\225, double-struck \360\235\224\270, numeric 𝔸 ⁗</p>"
|
||||
input = '<p>Greek &phis; φ, double-struck 𝔸, numeric 𝔸 ⁗, uppercase ™ <</p>'
|
||||
output = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric \360\235\224\270 \342\201\227, uppercase \342\204\242 <</p>"
|
||||
output2 = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric 𝔸 ⁗, uppercase \342\204\242 <</p>"
|
||||
assert_equal(output, sanitize_xhtml(input))
|
||||
assert_equal(output, sanitize_html(input))
|
||||
assert_equal(output, my_rex(input))
|
||||
|
|
|
@ -22,6 +22,14 @@ class SanitizerTest < Test::Unit::TestCase
|
|||
assert_equal xhtmloutput, do_sanitize_xhtml(input)
|
||||
end
|
||||
|
||||
def test_sanitize_named_entities
|
||||
input = '<p>Greek &phis; φ, double-struck 𝔸, numeric 𝔸 ⁗, uppercase ™ <</p>'
|
||||
output = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric \360\235\224\270 \342\201\227, uppercase \342\204\242 <</p>"
|
||||
output2 = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric 𝔸 ⁗, uppercase \342\204\242 <</p>"
|
||||
check_sanitization(input, output, output, output)
|
||||
assert_equal(output2, input.to_utf8)
|
||||
end
|
||||
|
||||
Sanitizer::ALLOWED_ELEMENTS.each do |tag_name|
|
||||
define_method "test_should_allow_#{tag_name}_tag" do
|
||||
input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
|
||||
|
|
Loading…
Reference in a new issue