Merge branch 'bzr/golem' of /Users/distler/Sites/code/instiki

This commit is contained in:
Jacques Distler 2009-09-25 16:02:23 -05:00
commit a35921a90d
4 changed files with 68 additions and 41 deletions

View file

@ -436,7 +436,7 @@ class String
'plussim' => '⨦',
'plustwo' => '⨧',
'prod' => '∏',
'race' => '⧚',
'race' => '∽̱',
'roplus' => '⨮',
'rotimes' => '⨵',
'rthree' => '⋌',
@ -624,7 +624,7 @@ class String
'iiota' => '℩',
'image' => 'ℑ',
'imath' => 'ı',
'jmath' => 'j',
'jmath' => 'ȷ',
'laemptyv' => '⦴',
'lltri' => '◺',
'lrtri' => '⊿',
@ -638,7 +638,7 @@ class String
'range' => '⦥',
'real' => 'ℜ',
'tbrk' => '⎴',
'trpezium' => '�',
'trpezium' => '⏢',
'ultri' => '◸',
'urtri' => '◹',
'vzigzag' => '⦚',
@ -844,8 +844,10 @@ class String
'nu' => 'ν',
'Omega' => 'Ω',
'omega' => 'ω',
'phgr' => 'φ',
'Phi' => 'Φ',
'phi' => 'ϕ',
'phi' => 'φ',
'phis' => 'ϕ',
'phiv' => 'φ',
'Pi' => 'Π',
'pi' => 'π',
@ -1006,7 +1008,7 @@ class String
'andv' => '⩚',
'angrt' => '∟',
'angsph' => '∢',
'angst' => 'Å',
'angst' => 'Å',
'ap' => '≈',
'apacir' => '⩯',
'awconint' => '∳',
@ -1036,7 +1038,7 @@ class String
'dsol' => '⧶',
'dtdot' => '⋱',
'dwangle' => '⦦',
'elinters' => '�',
'elinters' => '⏧',
'epar' => '⋕',
'eparsl' => '⧣',
'equiv' => '≡',
@ -1063,13 +1065,13 @@ class String
'isinsv' => '⋳',
'isinv' => '∈',
'lagran' => 'ℒ',
'Lang' => '《',
'lang' => '〈',
'Lang' => '⟪',
'lang' => '⟨',
'lArr' => '⇐',
'lbbrk' => '〔',
'lbbrk' => '❲',
'le' => '≤',
'loang' => '〘',
'lobrk' => '〚',
'loang' => '⟬',
'lobrk' => '⟦',
'lopar' => '⦅',
'lowast' => '∗',
'minus' => '−',
@ -1123,12 +1125,12 @@ class String
'qprime' => '⁗',
'quatint' => '⨖',
'radic' => '√',
'Rang' => '》',
'rang' => '〉',
'Rang' => '⟫',
'rang' => '⟩',
'rArr' => '⇒',
'rbbrk' => '〕',
'roang' => '〙',
'robrk' => '〛',
'rbbrk' => '❳',
'roang' => '⟭',
'robrk' => '⟧',
'ropar' => '⦆',
'rppolint' => '⨒',
'scpolint' => '⨓',
@ -1529,7 +1531,7 @@ class String
'nbsp' => ' ',
'not' => '¬',
'num' => '#',
'ohm' => 'Ω',
'ohm' => 'Ω',
'ordf' => 'ª',
'ordm' => 'º',
'para' => '¶',
@ -1590,6 +1592,7 @@ class String
'fflig' => 'ff',
'ffllig' => 'ffl',
'filig' => 'fi',
'fjlig' => 'fj',
'flat' => '♭',
'fllig' => 'fl',
'frac13' => '⅓',
@ -1714,9 +1717,9 @@ class String
'NotSucceedsTilde' => '≿̸',
'oopf' => '𝕠',
'OverBar' => '¯',
'OverBrace' => '︷',
'OverBrace' => '⏞',
'OverBracket' => '⎴',
'OverParenthesis' => '︵',
'OverParenthesis' => '⏜',
'planckh' => 'ℎ',
'popf' => '𝕡',
'Product' => '∏',
@ -1738,9 +1741,9 @@ class String
'ThickSpace' => '   ',
'topf' => '𝕥',
'UnderBar' => '̲',
'UnderBrace' => '︸',
'UnderBrace' => '⏟',
'UnderBracket' => '⎵',
'UnderParenthesis' => '︶',
'UnderParenthesis' => '⏝',
'uopf' => '𝕦',
'UpArrowBar' => '⤒',
'Upsilon' => 'Υ',
@ -1944,11 +1947,11 @@ class String
'intprod' => '⨼',
'InvisibleComma' => '⁣',
'InvisibleTimes' => '⁢',
'langle' => '〈',
'langle' => '⟨',
'Laplacetrf' => 'ℒ',
'lbrace' => '{',
'lbrack' => '[',
'LeftAngleBracket' => '〈',
'LeftAngleBracket' => '⟨',
'LeftArrow' => '←',
'Leftarrow' => '⇐',
'leftarrow' => '←',
@ -1956,7 +1959,7 @@ class String
'LeftArrowRightArrow' => '⇆',
'leftarrowtail' => '↢',
'LeftCeiling' => '⌈',
'LeftDoubleBracket' => '〚',
'LeftDoubleBracket' => '⟦',
'LeftDownVector' => '⇃',
'LeftFloor' => '⌊',
'leftharpoondown' => '↽',
@ -2136,7 +2139,7 @@ class String
'propto' => '∝',
'quaternions' => 'ℍ',
'questeq' => '≟',
'rangle' => '〉',
'rangle' => '⟩',
'rationals' => 'ℚ',
'rbrace' => '}',
'rbrack' => ']',
@ -2147,7 +2150,7 @@ class String
'ReverseElement' => '∋',
'ReverseEquilibrium' => '⇋',
'ReverseUpEquilibrium' => '⥯',
'RightAngleBracket' => '〉',
'RightAngleBracket' => '⟩',
'RightArrow' => '→',
'Rightarrow' => '⇒',
'rightarrow' => '→',
@ -2155,7 +2158,7 @@ class String
'RightArrowLeftArrow' => '⇄',
'rightarrowtail' => '↣',
'RightCeiling' => '⌉',
'RightDoubleBracket' => '〛',
'RightDoubleBracket' => '⟧',
'RightDownVector' => '⇂',
'RightFloor' => '⌋',
'rightharpoondown' => '⇁',
@ -2299,7 +2302,14 @@ class String
'wedge' => '∧',
'wp' => '℘',
'wr' => '≀',
'zeetrf' => 'ℨ'
'zeetrf' => 'ℨ',
'AMP' => '&#x0026',
'COPY' => '&#x00A9',
'GT' => '&#x003E',
'LT' => '&#x003C',
'QUOT' => '&#x0022',
'REG' => '&#x00AE',
'TRADE' => '&#x2122'
} unless const_defined? "MATHML_ENTITIES"
#:startdoc:
@ -2363,8 +2373,8 @@ class String
end
def convert_to_utf8 #:nodoc:
if self =~ /^(lt|gt|amp|quot|apos)$/
self.replace "&" + self + ";"
if self =~ /^(lt|gt|amp|quot|apos)$/i
self.replace "&" + self.downcase + ";"
elsif MATHML_ENTITIES.has_key?(self)
self.replace MATHML_ENTITIES[self].split(';').collect {|s| s.gsub(/^&#x([A-F0-9]+)$/, '\1').hex }.pack('U*')
else

View file

@ -313,7 +313,7 @@ class String
'plussim' => '⨦',
'plustwo' => '⨧',
'prod' => '∏',
'race' => '⧚',
'race' => '∽̱',
'roplus' => '⨮',
'rotimes' => '⨵',
'rthree' => '⋌',
@ -721,8 +721,10 @@ class String
'nu' => 'ν',
'Omega' => 'Ω',
'omega' => 'ω',
'phgr' => 'φ',
'Phi' => 'Φ',
'phi' => 'ϕ',
'phi' => 'φ',
'phis' => 'ϕ',
'phiv' => 'φ',
'Pi' => 'Π',
'pi' => 'π',
@ -883,7 +885,7 @@ class String
'andv' => '⩚',
'angrt' => '∟',
'angsph' => '∢',
'angst' => 'Å',
'angst' => 'Å',
'ap' => '≈',
'apacir' => '⩯',
'awconint' => '∳',
@ -1406,7 +1408,7 @@ class String
'nbsp' => ' ',
'not' => '¬',
'num' => '#',
'ohm' => 'Ω',
'ohm' => 'Ω',
'ordf' => 'ª',
'ordm' => 'º',
'para' => '¶',
@ -2177,7 +2179,14 @@ class String
'wedge' => '∧',
'wp' => '℘',
'wr' => '≀',
'zeetrf' => 'ℨ'
'zeetrf' => 'ℨ',
'AMP' => '&#x0026',
'COPY' => '&#x00A9',
'GT' => '&#x003E',
'LT' => '&#x003C',
'QUOT' => '&#x0022',
'REG' => '&#x00AE',
'TRADE' => '&#x2122'
}
#:startdoc:
@ -2286,8 +2295,8 @@ class String
end
def convert_to_utf8 #:nodoc:
if self =~ /^(lt|gt|amp|quot|apos)$/
self.replace "&" + self + ";"
if self =~ /^(lt|gt|amp|quot|apos)$/i
self.replace "&" + self.downcase + ";"
elsif MATHML_ENTITIES.has_key?(self)
self.replace MATHML_ENTITIES[self].split(';').collect {|s| s.gsub(/^&#x([A-F0-9]+)$/, '\1').hex }.pack('U*')
else

View file

@ -27,13 +27,13 @@ class SanitizeTest < Test::Unit::TestCase
end
def my_rex(string)
sanitize_rexml(rexml_doc(string)).gsub(/\A<div xmlns="http:\/\/www.w3.org\/1999\/xhtml">(.*)<\/div>\Z/m, '\1')
sanitize_rexml(rexml_doc(string.to_utf8)).gsub(/\A<div xmlns="http:\/\/www.w3.org\/1999\/xhtml">(.*)<\/div>\Z/m, '\1')
end
def test_sanitize_named_entities
input = '<p>Greek &phi;, double-struck &Aopf;, numeric &#x1D538; &#8279;</p>'
output = "<p>Greek \317\225, double-struck \360\235\224\270, numeric \360\235\224\270 \342\201\227</p>"
output2 = "<p>Greek \317\225, double-struck \360\235\224\270, numeric &#x1D538; &#8279;</p>"
input = '<p>Greek &phis; &phi;, double-struck &Aopf;, numeric &#x1D538; &#8279;, uppercase &TRADE; &LT;</p>'
output = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric \360\235\224\270 \342\201\227, uppercase \342\204\242 &lt;</p>"
output2 = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric &#x1D538; &#8279;, uppercase \342\204\242 &lt;</p>"
assert_equal(output, sanitize_xhtml(input))
assert_equal(output, sanitize_html(input))
assert_equal(output, my_rex(input))

View file

@ -22,6 +22,14 @@ class SanitizerTest < Test::Unit::TestCase
assert_equal xhtmloutput, do_sanitize_xhtml(input)
end
def test_sanitize_named_entities
input = '<p>Greek &phis; &phi;, double-struck &Aopf;, numeric &#x1D538; &#8279;, uppercase &TRADE; &LT;</p>'
output = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric \360\235\224\270 \342\201\227, uppercase \342\204\242 &lt;</p>"
output2 = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric &#x1D538; &#8279;, uppercase \342\204\242 &lt;</p>"
check_sanitization(input, output, output, output)
assert_equal(output2, input.to_utf8)
end
Sanitizer::ALLOWED_ELEMENTS.each do |tag_name|
define_method "test_should_allow_#{tag_name}_tag" do
input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"