Merge branch 'bzr/golem' of /Users/distler/Sites/code/instiki
This commit is contained in:
commit
a35921a90d
|
@ -436,7 +436,7 @@ class String
|
||||||
'plussim' => '⨦',
|
'plussim' => '⨦',
|
||||||
'plustwo' => '⨧',
|
'plustwo' => '⨧',
|
||||||
'prod' => '∏',
|
'prod' => '∏',
|
||||||
'race' => '⧚',
|
'race' => '∽̱',
|
||||||
'roplus' => '⨮',
|
'roplus' => '⨮',
|
||||||
'rotimes' => '⨵',
|
'rotimes' => '⨵',
|
||||||
'rthree' => '⋌',
|
'rthree' => '⋌',
|
||||||
|
@ -624,7 +624,7 @@ class String
|
||||||
'iiota' => '℩',
|
'iiota' => '℩',
|
||||||
'image' => 'ℑ',
|
'image' => 'ℑ',
|
||||||
'imath' => 'ı',
|
'imath' => 'ı',
|
||||||
'jmath' => 'j',
|
'jmath' => 'ȷ',
|
||||||
'laemptyv' => '⦴',
|
'laemptyv' => '⦴',
|
||||||
'lltri' => '◺',
|
'lltri' => '◺',
|
||||||
'lrtri' => '⊿',
|
'lrtri' => '⊿',
|
||||||
|
@ -638,7 +638,7 @@ class String
|
||||||
'range' => '⦥',
|
'range' => '⦥',
|
||||||
'real' => 'ℜ',
|
'real' => 'ℜ',
|
||||||
'tbrk' => '⎴',
|
'tbrk' => '⎴',
|
||||||
'trpezium' => '�',
|
'trpezium' => '⏢',
|
||||||
'ultri' => '◸',
|
'ultri' => '◸',
|
||||||
'urtri' => '◹',
|
'urtri' => '◹',
|
||||||
'vzigzag' => '⦚',
|
'vzigzag' => '⦚',
|
||||||
|
@ -844,8 +844,10 @@ class String
|
||||||
'nu' => 'ν',
|
'nu' => 'ν',
|
||||||
'Omega' => 'Ω',
|
'Omega' => 'Ω',
|
||||||
'omega' => 'ω',
|
'omega' => 'ω',
|
||||||
|
'phgr' => 'φ',
|
||||||
'Phi' => 'Φ',
|
'Phi' => 'Φ',
|
||||||
'phi' => 'ϕ',
|
'phi' => 'φ',
|
||||||
|
'phis' => 'ϕ',
|
||||||
'phiv' => 'φ',
|
'phiv' => 'φ',
|
||||||
'Pi' => 'Π',
|
'Pi' => 'Π',
|
||||||
'pi' => 'π',
|
'pi' => 'π',
|
||||||
|
@ -1006,7 +1008,7 @@ class String
|
||||||
'andv' => '⩚',
|
'andv' => '⩚',
|
||||||
'angrt' => '∟',
|
'angrt' => '∟',
|
||||||
'angsph' => '∢',
|
'angsph' => '∢',
|
||||||
'angst' => 'Å',
|
'angst' => 'Å',
|
||||||
'ap' => '≈',
|
'ap' => '≈',
|
||||||
'apacir' => '⩯',
|
'apacir' => '⩯',
|
||||||
'awconint' => '∳',
|
'awconint' => '∳',
|
||||||
|
@ -1036,7 +1038,7 @@ class String
|
||||||
'dsol' => '⧶',
|
'dsol' => '⧶',
|
||||||
'dtdot' => '⋱',
|
'dtdot' => '⋱',
|
||||||
'dwangle' => '⦦',
|
'dwangle' => '⦦',
|
||||||
'elinters' => '�',
|
'elinters' => '⏧',
|
||||||
'epar' => '⋕',
|
'epar' => '⋕',
|
||||||
'eparsl' => '⧣',
|
'eparsl' => '⧣',
|
||||||
'equiv' => '≡',
|
'equiv' => '≡',
|
||||||
|
@ -1063,13 +1065,13 @@ class String
|
||||||
'isinsv' => '⋳',
|
'isinsv' => '⋳',
|
||||||
'isinv' => '∈',
|
'isinv' => '∈',
|
||||||
'lagran' => 'ℒ',
|
'lagran' => 'ℒ',
|
||||||
'Lang' => '《',
|
'Lang' => '⟪',
|
||||||
'lang' => '〈',
|
'lang' => '⟨',
|
||||||
'lArr' => '⇐',
|
'lArr' => '⇐',
|
||||||
'lbbrk' => '〔',
|
'lbbrk' => '❲',
|
||||||
'le' => '≤',
|
'le' => '≤',
|
||||||
'loang' => '〘',
|
'loang' => '⟬',
|
||||||
'lobrk' => '〚',
|
'lobrk' => '⟦',
|
||||||
'lopar' => '⦅',
|
'lopar' => '⦅',
|
||||||
'lowast' => '∗',
|
'lowast' => '∗',
|
||||||
'minus' => '−',
|
'minus' => '−',
|
||||||
|
@ -1123,12 +1125,12 @@ class String
|
||||||
'qprime' => '⁗',
|
'qprime' => '⁗',
|
||||||
'quatint' => '⨖',
|
'quatint' => '⨖',
|
||||||
'radic' => '√',
|
'radic' => '√',
|
||||||
'Rang' => '》',
|
'Rang' => '⟫',
|
||||||
'rang' => '〉',
|
'rang' => '⟩',
|
||||||
'rArr' => '⇒',
|
'rArr' => '⇒',
|
||||||
'rbbrk' => '〕',
|
'rbbrk' => '❳',
|
||||||
'roang' => '〙',
|
'roang' => '⟭',
|
||||||
'robrk' => '〛',
|
'robrk' => '⟧',
|
||||||
'ropar' => '⦆',
|
'ropar' => '⦆',
|
||||||
'rppolint' => '⨒',
|
'rppolint' => '⨒',
|
||||||
'scpolint' => '⨓',
|
'scpolint' => '⨓',
|
||||||
|
@ -1529,7 +1531,7 @@ class String
|
||||||
'nbsp' => ' ',
|
'nbsp' => ' ',
|
||||||
'not' => '¬',
|
'not' => '¬',
|
||||||
'num' => '#',
|
'num' => '#',
|
||||||
'ohm' => 'Ω',
|
'ohm' => 'Ω',
|
||||||
'ordf' => 'ª',
|
'ordf' => 'ª',
|
||||||
'ordm' => 'º',
|
'ordm' => 'º',
|
||||||
'para' => '¶',
|
'para' => '¶',
|
||||||
|
@ -1590,6 +1592,7 @@ class String
|
||||||
'fflig' => 'ff',
|
'fflig' => 'ff',
|
||||||
'ffllig' => 'ffl',
|
'ffllig' => 'ffl',
|
||||||
'filig' => 'fi',
|
'filig' => 'fi',
|
||||||
|
'fjlig' => 'fj',
|
||||||
'flat' => '♭',
|
'flat' => '♭',
|
||||||
'fllig' => 'fl',
|
'fllig' => 'fl',
|
||||||
'frac13' => '⅓',
|
'frac13' => '⅓',
|
||||||
|
@ -1714,9 +1717,9 @@ class String
|
||||||
'NotSucceedsTilde' => '≿̸',
|
'NotSucceedsTilde' => '≿̸',
|
||||||
'oopf' => '𝕠',
|
'oopf' => '𝕠',
|
||||||
'OverBar' => '¯',
|
'OverBar' => '¯',
|
||||||
'OverBrace' => '︷',
|
'OverBrace' => '⏞',
|
||||||
'OverBracket' => '⎴',
|
'OverBracket' => '⎴',
|
||||||
'OverParenthesis' => '︵',
|
'OverParenthesis' => '⏜',
|
||||||
'planckh' => 'ℎ',
|
'planckh' => 'ℎ',
|
||||||
'popf' => '𝕡',
|
'popf' => '𝕡',
|
||||||
'Product' => '∏',
|
'Product' => '∏',
|
||||||
|
@ -1738,9 +1741,9 @@ class String
|
||||||
'ThickSpace' => '   ',
|
'ThickSpace' => '   ',
|
||||||
'topf' => '𝕥',
|
'topf' => '𝕥',
|
||||||
'UnderBar' => '̲',
|
'UnderBar' => '̲',
|
||||||
'UnderBrace' => '︸',
|
'UnderBrace' => '⏟',
|
||||||
'UnderBracket' => '⎵',
|
'UnderBracket' => '⎵',
|
||||||
'UnderParenthesis' => '︶',
|
'UnderParenthesis' => '⏝',
|
||||||
'uopf' => '𝕦',
|
'uopf' => '𝕦',
|
||||||
'UpArrowBar' => '⤒',
|
'UpArrowBar' => '⤒',
|
||||||
'Upsilon' => 'Υ',
|
'Upsilon' => 'Υ',
|
||||||
|
@ -1944,11 +1947,11 @@ class String
|
||||||
'intprod' => '⨼',
|
'intprod' => '⨼',
|
||||||
'InvisibleComma' => '⁣',
|
'InvisibleComma' => '⁣',
|
||||||
'InvisibleTimes' => '⁢',
|
'InvisibleTimes' => '⁢',
|
||||||
'langle' => '〈',
|
'langle' => '⟨',
|
||||||
'Laplacetrf' => 'ℒ',
|
'Laplacetrf' => 'ℒ',
|
||||||
'lbrace' => '{',
|
'lbrace' => '{',
|
||||||
'lbrack' => '[',
|
'lbrack' => '[',
|
||||||
'LeftAngleBracket' => '〈',
|
'LeftAngleBracket' => '⟨',
|
||||||
'LeftArrow' => '←',
|
'LeftArrow' => '←',
|
||||||
'Leftarrow' => '⇐',
|
'Leftarrow' => '⇐',
|
||||||
'leftarrow' => '←',
|
'leftarrow' => '←',
|
||||||
|
@ -1956,7 +1959,7 @@ class String
|
||||||
'LeftArrowRightArrow' => '⇆',
|
'LeftArrowRightArrow' => '⇆',
|
||||||
'leftarrowtail' => '↢',
|
'leftarrowtail' => '↢',
|
||||||
'LeftCeiling' => '⌈',
|
'LeftCeiling' => '⌈',
|
||||||
'LeftDoubleBracket' => '〚',
|
'LeftDoubleBracket' => '⟦',
|
||||||
'LeftDownVector' => '⇃',
|
'LeftDownVector' => '⇃',
|
||||||
'LeftFloor' => '⌊',
|
'LeftFloor' => '⌊',
|
||||||
'leftharpoondown' => '↽',
|
'leftharpoondown' => '↽',
|
||||||
|
@ -2136,7 +2139,7 @@ class String
|
||||||
'propto' => '∝',
|
'propto' => '∝',
|
||||||
'quaternions' => 'ℍ',
|
'quaternions' => 'ℍ',
|
||||||
'questeq' => '≟',
|
'questeq' => '≟',
|
||||||
'rangle' => '〉',
|
'rangle' => '⟩',
|
||||||
'rationals' => 'ℚ',
|
'rationals' => 'ℚ',
|
||||||
'rbrace' => '}',
|
'rbrace' => '}',
|
||||||
'rbrack' => ']',
|
'rbrack' => ']',
|
||||||
|
@ -2147,7 +2150,7 @@ class String
|
||||||
'ReverseElement' => '∋',
|
'ReverseElement' => '∋',
|
||||||
'ReverseEquilibrium' => '⇋',
|
'ReverseEquilibrium' => '⇋',
|
||||||
'ReverseUpEquilibrium' => '⥯',
|
'ReverseUpEquilibrium' => '⥯',
|
||||||
'RightAngleBracket' => '〉',
|
'RightAngleBracket' => '⟩',
|
||||||
'RightArrow' => '→',
|
'RightArrow' => '→',
|
||||||
'Rightarrow' => '⇒',
|
'Rightarrow' => '⇒',
|
||||||
'rightarrow' => '→',
|
'rightarrow' => '→',
|
||||||
|
@ -2155,7 +2158,7 @@ class String
|
||||||
'RightArrowLeftArrow' => '⇄',
|
'RightArrowLeftArrow' => '⇄',
|
||||||
'rightarrowtail' => '↣',
|
'rightarrowtail' => '↣',
|
||||||
'RightCeiling' => '⌉',
|
'RightCeiling' => '⌉',
|
||||||
'RightDoubleBracket' => '〛',
|
'RightDoubleBracket' => '⟧',
|
||||||
'RightDownVector' => '⇂',
|
'RightDownVector' => '⇂',
|
||||||
'RightFloor' => '⌋',
|
'RightFloor' => '⌋',
|
||||||
'rightharpoondown' => '⇁',
|
'rightharpoondown' => '⇁',
|
||||||
|
@ -2299,7 +2302,14 @@ class String
|
||||||
'wedge' => '∧',
|
'wedge' => '∧',
|
||||||
'wp' => '℘',
|
'wp' => '℘',
|
||||||
'wr' => '≀',
|
'wr' => '≀',
|
||||||
'zeetrf' => 'ℨ'
|
'zeetrf' => 'ℨ',
|
||||||
|
'AMP' => '&',
|
||||||
|
'COPY' => '©',
|
||||||
|
'GT' => '>',
|
||||||
|
'LT' => '<',
|
||||||
|
'QUOT' => '"',
|
||||||
|
'REG' => '®',
|
||||||
|
'TRADE' => '™'
|
||||||
} unless const_defined? "MATHML_ENTITIES"
|
} unless const_defined? "MATHML_ENTITIES"
|
||||||
#:startdoc:
|
#:startdoc:
|
||||||
|
|
||||||
|
@ -2363,8 +2373,8 @@ class String
|
||||||
end
|
end
|
||||||
|
|
||||||
def convert_to_utf8 #:nodoc:
|
def convert_to_utf8 #:nodoc:
|
||||||
if self =~ /^(lt|gt|amp|quot|apos)$/
|
if self =~ /^(lt|gt|amp|quot|apos)$/i
|
||||||
self.replace "&" + self + ";"
|
self.replace "&" + self.downcase + ";"
|
||||||
elsif MATHML_ENTITIES.has_key?(self)
|
elsif MATHML_ENTITIES.has_key?(self)
|
||||||
self.replace MATHML_ENTITIES[self].split(';').collect {|s| s.gsub(/^&#x([A-F0-9]+)$/, '\1').hex }.pack('U*')
|
self.replace MATHML_ENTITIES[self].split(';').collect {|s| s.gsub(/^&#x([A-F0-9]+)$/, '\1').hex }.pack('U*')
|
||||||
else
|
else
|
||||||
|
|
|
@ -313,7 +313,7 @@ class String
|
||||||
'plussim' => '⨦',
|
'plussim' => '⨦',
|
||||||
'plustwo' => '⨧',
|
'plustwo' => '⨧',
|
||||||
'prod' => '∏',
|
'prod' => '∏',
|
||||||
'race' => '⧚',
|
'race' => '∽̱',
|
||||||
'roplus' => '⨮',
|
'roplus' => '⨮',
|
||||||
'rotimes' => '⨵',
|
'rotimes' => '⨵',
|
||||||
'rthree' => '⋌',
|
'rthree' => '⋌',
|
||||||
|
@ -721,8 +721,10 @@ class String
|
||||||
'nu' => 'ν',
|
'nu' => 'ν',
|
||||||
'Omega' => 'Ω',
|
'Omega' => 'Ω',
|
||||||
'omega' => 'ω',
|
'omega' => 'ω',
|
||||||
|
'phgr' => 'φ',
|
||||||
'Phi' => 'Φ',
|
'Phi' => 'Φ',
|
||||||
'phi' => 'ϕ',
|
'phi' => 'φ',
|
||||||
|
'phis' => 'ϕ',
|
||||||
'phiv' => 'φ',
|
'phiv' => 'φ',
|
||||||
'Pi' => 'Π',
|
'Pi' => 'Π',
|
||||||
'pi' => 'π',
|
'pi' => 'π',
|
||||||
|
@ -883,7 +885,7 @@ class String
|
||||||
'andv' => '⩚',
|
'andv' => '⩚',
|
||||||
'angrt' => '∟',
|
'angrt' => '∟',
|
||||||
'angsph' => '∢',
|
'angsph' => '∢',
|
||||||
'angst' => 'Å',
|
'angst' => 'Å',
|
||||||
'ap' => '≈',
|
'ap' => '≈',
|
||||||
'apacir' => '⩯',
|
'apacir' => '⩯',
|
||||||
'awconint' => '∳',
|
'awconint' => '∳',
|
||||||
|
@ -1406,7 +1408,7 @@ class String
|
||||||
'nbsp' => ' ',
|
'nbsp' => ' ',
|
||||||
'not' => '¬',
|
'not' => '¬',
|
||||||
'num' => '#',
|
'num' => '#',
|
||||||
'ohm' => 'Ω',
|
'ohm' => 'Ω',
|
||||||
'ordf' => 'ª',
|
'ordf' => 'ª',
|
||||||
'ordm' => 'º',
|
'ordm' => 'º',
|
||||||
'para' => '¶',
|
'para' => '¶',
|
||||||
|
@ -2177,7 +2179,14 @@ class String
|
||||||
'wedge' => '∧',
|
'wedge' => '∧',
|
||||||
'wp' => '℘',
|
'wp' => '℘',
|
||||||
'wr' => '≀',
|
'wr' => '≀',
|
||||||
'zeetrf' => 'ℨ'
|
'zeetrf' => 'ℨ',
|
||||||
|
'AMP' => '&',
|
||||||
|
'COPY' => '©',
|
||||||
|
'GT' => '>',
|
||||||
|
'LT' => '<',
|
||||||
|
'QUOT' => '"',
|
||||||
|
'REG' => '®',
|
||||||
|
'TRADE' => '™'
|
||||||
}
|
}
|
||||||
#:startdoc:
|
#:startdoc:
|
||||||
|
|
||||||
|
@ -2286,8 +2295,8 @@ class String
|
||||||
end
|
end
|
||||||
|
|
||||||
def convert_to_utf8 #:nodoc:
|
def convert_to_utf8 #:nodoc:
|
||||||
if self =~ /^(lt|gt|amp|quot|apos)$/
|
if self =~ /^(lt|gt|amp|quot|apos)$/i
|
||||||
self.replace "&" + self + ";"
|
self.replace "&" + self.downcase + ";"
|
||||||
elsif MATHML_ENTITIES.has_key?(self)
|
elsif MATHML_ENTITIES.has_key?(self)
|
||||||
self.replace MATHML_ENTITIES[self].split(';').collect {|s| s.gsub(/^&#x([A-F0-9]+)$/, '\1').hex }.pack('U*')
|
self.replace MATHML_ENTITIES[self].split(';').collect {|s| s.gsub(/^&#x([A-F0-9]+)$/, '\1').hex }.pack('U*')
|
||||||
else
|
else
|
||||||
|
|
|
@ -27,13 +27,13 @@ class SanitizeTest < Test::Unit::TestCase
|
||||||
end
|
end
|
||||||
|
|
||||||
def my_rex(string)
|
def my_rex(string)
|
||||||
sanitize_rexml(rexml_doc(string)).gsub(/\A<div xmlns="http:\/\/www.w3.org\/1999\/xhtml">(.*)<\/div>\Z/m, '\1')
|
sanitize_rexml(rexml_doc(string.to_utf8)).gsub(/\A<div xmlns="http:\/\/www.w3.org\/1999\/xhtml">(.*)<\/div>\Z/m, '\1')
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_sanitize_named_entities
|
def test_sanitize_named_entities
|
||||||
input = '<p>Greek φ, double-struck 𝔸, numeric 𝔸 ⁗</p>'
|
input = '<p>Greek &phis; φ, double-struck 𝔸, numeric 𝔸 ⁗, uppercase ™ <</p>'
|
||||||
output = "<p>Greek \317\225, double-struck \360\235\224\270, numeric \360\235\224\270 \342\201\227</p>"
|
output = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric \360\235\224\270 \342\201\227, uppercase \342\204\242 <</p>"
|
||||||
output2 = "<p>Greek \317\225, double-struck \360\235\224\270, numeric 𝔸 ⁗</p>"
|
output2 = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric 𝔸 ⁗, uppercase \342\204\242 <</p>"
|
||||||
assert_equal(output, sanitize_xhtml(input))
|
assert_equal(output, sanitize_xhtml(input))
|
||||||
assert_equal(output, sanitize_html(input))
|
assert_equal(output, sanitize_html(input))
|
||||||
assert_equal(output, my_rex(input))
|
assert_equal(output, my_rex(input))
|
||||||
|
|
|
@ -22,6 +22,14 @@ class SanitizerTest < Test::Unit::TestCase
|
||||||
assert_equal xhtmloutput, do_sanitize_xhtml(input)
|
assert_equal xhtmloutput, do_sanitize_xhtml(input)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_sanitize_named_entities
|
||||||
|
input = '<p>Greek &phis; φ, double-struck 𝔸, numeric 𝔸 ⁗, uppercase ™ <</p>'
|
||||||
|
output = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric \360\235\224\270 \342\201\227, uppercase \342\204\242 <</p>"
|
||||||
|
output2 = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric 𝔸 ⁗, uppercase \342\204\242 <</p>"
|
||||||
|
check_sanitization(input, output, output, output)
|
||||||
|
assert_equal(output2, input.to_utf8)
|
||||||
|
end
|
||||||
|
|
||||||
Sanitizer::ALLOWED_ELEMENTS.each do |tag_name|
|
Sanitizer::ALLOWED_ELEMENTS.each do |tag_name|
|
||||||
define_method "test_should_allow_#{tag_name}_tag" do
|
define_method "test_should_allow_#{tag_name}_tag" do
|
||||||
input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
|
input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
|
||||||
|
|
Loading…
Reference in a new issue