Merge branch 'bzr/golem' of /Users/distler/Sites/code/instiki

This commit is contained in:
Jacques Distler 2009-09-25 16:02:23 -05:00
commit a35921a90d
4 changed files with 68 additions and 41 deletions

View file

@ -436,7 +436,7 @@ class String
'plussim' => '⨦', 'plussim' => '⨦',
'plustwo' => '⨧', 'plustwo' => '⨧',
'prod' => '∏', 'prod' => '∏',
'race' => '⧚', 'race' => '∽̱',
'roplus' => '⨮', 'roplus' => '⨮',
'rotimes' => '⨵', 'rotimes' => '⨵',
'rthree' => '⋌', 'rthree' => '⋌',
@ -624,7 +624,7 @@ class String
'iiota' => '℩', 'iiota' => '℩',
'image' => 'ℑ', 'image' => 'ℑ',
'imath' => 'ı', 'imath' => 'ı',
'jmath' => 'j', 'jmath' => 'ȷ',
'laemptyv' => '⦴', 'laemptyv' => '⦴',
'lltri' => '◺', 'lltri' => '◺',
'lrtri' => '⊿', 'lrtri' => '⊿',
@ -638,7 +638,7 @@ class String
'range' => '⦥', 'range' => '⦥',
'real' => 'ℜ', 'real' => 'ℜ',
'tbrk' => '⎴', 'tbrk' => '⎴',
'trpezium' => '�', 'trpezium' => '⏢',
'ultri' => '◸', 'ultri' => '◸',
'urtri' => '◹', 'urtri' => '◹',
'vzigzag' => '⦚', 'vzigzag' => '⦚',
@ -844,8 +844,10 @@ class String
'nu' => 'ν', 'nu' => 'ν',
'Omega' => 'Ω', 'Omega' => 'Ω',
'omega' => 'ω', 'omega' => 'ω',
'phgr' => 'φ',
'Phi' => 'Φ', 'Phi' => 'Φ',
'phi' => 'ϕ', 'phi' => 'φ',
'phis' => 'ϕ',
'phiv' => 'φ', 'phiv' => 'φ',
'Pi' => 'Π', 'Pi' => 'Π',
'pi' => 'π', 'pi' => 'π',
@ -1006,7 +1008,7 @@ class String
'andv' => '⩚', 'andv' => '⩚',
'angrt' => '∟', 'angrt' => '∟',
'angsph' => '∢', 'angsph' => '∢',
'angst' => 'Å', 'angst' => 'Å',
'ap' => '≈', 'ap' => '≈',
'apacir' => '⩯', 'apacir' => '⩯',
'awconint' => '∳', 'awconint' => '∳',
@ -1036,7 +1038,7 @@ class String
'dsol' => '⧶', 'dsol' => '⧶',
'dtdot' => '⋱', 'dtdot' => '⋱',
'dwangle' => '⦦', 'dwangle' => '⦦',
'elinters' => '�', 'elinters' => '⏧',
'epar' => '⋕', 'epar' => '⋕',
'eparsl' => '⧣', 'eparsl' => '⧣',
'equiv' => '≡', 'equiv' => '≡',
@ -1063,13 +1065,13 @@ class String
'isinsv' => '⋳', 'isinsv' => '⋳',
'isinv' => '∈', 'isinv' => '∈',
'lagran' => 'ℒ', 'lagran' => 'ℒ',
'Lang' => '《', 'Lang' => '⟪',
'lang' => '〈', 'lang' => '⟨',
'lArr' => '⇐', 'lArr' => '⇐',
'lbbrk' => '〔', 'lbbrk' => '❲',
'le' => '≤', 'le' => '≤',
'loang' => '〘', 'loang' => '⟬',
'lobrk' => '〚', 'lobrk' => '⟦',
'lopar' => '⦅', 'lopar' => '⦅',
'lowast' => '∗', 'lowast' => '∗',
'minus' => '−', 'minus' => '−',
@ -1123,12 +1125,12 @@ class String
'qprime' => '⁗', 'qprime' => '⁗',
'quatint' => '⨖', 'quatint' => '⨖',
'radic' => '√', 'radic' => '√',
'Rang' => '》', 'Rang' => '⟫',
'rang' => '〉', 'rang' => '⟩',
'rArr' => '⇒', 'rArr' => '⇒',
'rbbrk' => '〕', 'rbbrk' => '❳',
'roang' => '〙', 'roang' => '⟭',
'robrk' => '〛', 'robrk' => '⟧',
'ropar' => '⦆', 'ropar' => '⦆',
'rppolint' => '⨒', 'rppolint' => '⨒',
'scpolint' => '⨓', 'scpolint' => '⨓',
@ -1529,7 +1531,7 @@ class String
'nbsp' => ' ', 'nbsp' => ' ',
'not' => '¬', 'not' => '¬',
'num' => '#', 'num' => '#',
'ohm' => 'Ω', 'ohm' => 'Ω',
'ordf' => 'ª', 'ordf' => 'ª',
'ordm' => 'º', 'ordm' => 'º',
'para' => '¶', 'para' => '¶',
@ -1590,6 +1592,7 @@ class String
'fflig' => 'ff', 'fflig' => 'ff',
'ffllig' => 'ffl', 'ffllig' => 'ffl',
'filig' => 'fi', 'filig' => 'fi',
'fjlig' => 'fj',
'flat' => '♭', 'flat' => '♭',
'fllig' => 'fl', 'fllig' => 'fl',
'frac13' => '⅓', 'frac13' => '⅓',
@ -1714,9 +1717,9 @@ class String
'NotSucceedsTilde' => '≿̸', 'NotSucceedsTilde' => '≿̸',
'oopf' => '𝕠', 'oopf' => '𝕠',
'OverBar' => '¯', 'OverBar' => '¯',
'OverBrace' => '︷', 'OverBrace' => '⏞',
'OverBracket' => '⎴', 'OverBracket' => '⎴',
'OverParenthesis' => '︵', 'OverParenthesis' => '⏜',
'planckh' => 'ℎ', 'planckh' => 'ℎ',
'popf' => '𝕡', 'popf' => '𝕡',
'Product' => '∏', 'Product' => '∏',
@ -1738,9 +1741,9 @@ class String
'ThickSpace' => '   ', 'ThickSpace' => '   ',
'topf' => '𝕥', 'topf' => '𝕥',
'UnderBar' => '̲', 'UnderBar' => '̲',
'UnderBrace' => '︸', 'UnderBrace' => '⏟',
'UnderBracket' => '⎵', 'UnderBracket' => '⎵',
'UnderParenthesis' => '︶', 'UnderParenthesis' => '⏝',
'uopf' => '𝕦', 'uopf' => '𝕦',
'UpArrowBar' => '⤒', 'UpArrowBar' => '⤒',
'Upsilon' => 'Υ', 'Upsilon' => 'Υ',
@ -1944,11 +1947,11 @@ class String
'intprod' => '⨼', 'intprod' => '⨼',
'InvisibleComma' => '⁣', 'InvisibleComma' => '⁣',
'InvisibleTimes' => '⁢', 'InvisibleTimes' => '⁢',
'langle' => '〈', 'langle' => '⟨',
'Laplacetrf' => 'ℒ', 'Laplacetrf' => 'ℒ',
'lbrace' => '{', 'lbrace' => '{',
'lbrack' => '[', 'lbrack' => '[',
'LeftAngleBracket' => '〈', 'LeftAngleBracket' => '⟨',
'LeftArrow' => '←', 'LeftArrow' => '←',
'Leftarrow' => '⇐', 'Leftarrow' => '⇐',
'leftarrow' => '←', 'leftarrow' => '←',
@ -1956,7 +1959,7 @@ class String
'LeftArrowRightArrow' => '⇆', 'LeftArrowRightArrow' => '⇆',
'leftarrowtail' => '↢', 'leftarrowtail' => '↢',
'LeftCeiling' => '⌈', 'LeftCeiling' => '⌈',
'LeftDoubleBracket' => '〚', 'LeftDoubleBracket' => '⟦',
'LeftDownVector' => '⇃', 'LeftDownVector' => '⇃',
'LeftFloor' => '⌊', 'LeftFloor' => '⌊',
'leftharpoondown' => '↽', 'leftharpoondown' => '↽',
@ -2136,7 +2139,7 @@ class String
'propto' => '∝', 'propto' => '∝',
'quaternions' => 'ℍ', 'quaternions' => 'ℍ',
'questeq' => '≟', 'questeq' => '≟',
'rangle' => '〉', 'rangle' => '⟩',
'rationals' => 'ℚ', 'rationals' => 'ℚ',
'rbrace' => '}', 'rbrace' => '}',
'rbrack' => ']', 'rbrack' => ']',
@ -2147,7 +2150,7 @@ class String
'ReverseElement' => '∋', 'ReverseElement' => '∋',
'ReverseEquilibrium' => '⇋', 'ReverseEquilibrium' => '⇋',
'ReverseUpEquilibrium' => '⥯', 'ReverseUpEquilibrium' => '⥯',
'RightAngleBracket' => '〉', 'RightAngleBracket' => '⟩',
'RightArrow' => '→', 'RightArrow' => '→',
'Rightarrow' => '⇒', 'Rightarrow' => '⇒',
'rightarrow' => '→', 'rightarrow' => '→',
@ -2155,7 +2158,7 @@ class String
'RightArrowLeftArrow' => '⇄', 'RightArrowLeftArrow' => '⇄',
'rightarrowtail' => '↣', 'rightarrowtail' => '↣',
'RightCeiling' => '⌉', 'RightCeiling' => '⌉',
'RightDoubleBracket' => '〛', 'RightDoubleBracket' => '⟧',
'RightDownVector' => '⇂', 'RightDownVector' => '⇂',
'RightFloor' => '⌋', 'RightFloor' => '⌋',
'rightharpoondown' => '⇁', 'rightharpoondown' => '⇁',
@ -2299,7 +2302,14 @@ class String
'wedge' => '∧', 'wedge' => '∧',
'wp' => '℘', 'wp' => '℘',
'wr' => '≀', 'wr' => '≀',
'zeetrf' => 'ℨ' 'zeetrf' => 'ℨ',
'AMP' => '&#x0026',
'COPY' => '&#x00A9',
'GT' => '&#x003E',
'LT' => '&#x003C',
'QUOT' => '&#x0022',
'REG' => '&#x00AE',
'TRADE' => '&#x2122'
} unless const_defined? "MATHML_ENTITIES" } unless const_defined? "MATHML_ENTITIES"
#:startdoc: #:startdoc:
@ -2363,8 +2373,8 @@ class String
end end
def convert_to_utf8 #:nodoc: def convert_to_utf8 #:nodoc:
if self =~ /^(lt|gt|amp|quot|apos)$/ if self =~ /^(lt|gt|amp|quot|apos)$/i
self.replace "&" + self + ";" self.replace "&" + self.downcase + ";"
elsif MATHML_ENTITIES.has_key?(self) elsif MATHML_ENTITIES.has_key?(self)
self.replace MATHML_ENTITIES[self].split(';').collect {|s| s.gsub(/^&#x([A-F0-9]+)$/, '\1').hex }.pack('U*') self.replace MATHML_ENTITIES[self].split(';').collect {|s| s.gsub(/^&#x([A-F0-9]+)$/, '\1').hex }.pack('U*')
else else

View file

@ -313,7 +313,7 @@ class String
'plussim' => '⨦', 'plussim' => '⨦',
'plustwo' => '⨧', 'plustwo' => '⨧',
'prod' => '∏', 'prod' => '∏',
'race' => '⧚', 'race' => '∽̱',
'roplus' => '⨮', 'roplus' => '⨮',
'rotimes' => '⨵', 'rotimes' => '⨵',
'rthree' => '⋌', 'rthree' => '⋌',
@ -721,8 +721,10 @@ class String
'nu' => 'ν', 'nu' => 'ν',
'Omega' => 'Ω', 'Omega' => 'Ω',
'omega' => 'ω', 'omega' => 'ω',
'phgr' => 'φ',
'Phi' => 'Φ', 'Phi' => 'Φ',
'phi' => 'ϕ', 'phi' => 'φ',
'phis' => 'ϕ',
'phiv' => 'φ', 'phiv' => 'φ',
'Pi' => 'Π', 'Pi' => 'Π',
'pi' => 'π', 'pi' => 'π',
@ -883,7 +885,7 @@ class String
'andv' => '⩚', 'andv' => '⩚',
'angrt' => '∟', 'angrt' => '∟',
'angsph' => '∢', 'angsph' => '∢',
'angst' => 'Å', 'angst' => 'Å',
'ap' => '≈', 'ap' => '≈',
'apacir' => '⩯', 'apacir' => '⩯',
'awconint' => '∳', 'awconint' => '∳',
@ -1406,7 +1408,7 @@ class String
'nbsp' => ' ', 'nbsp' => ' ',
'not' => '¬', 'not' => '¬',
'num' => '#', 'num' => '#',
'ohm' => 'Ω', 'ohm' => 'Ω',
'ordf' => 'ª', 'ordf' => 'ª',
'ordm' => 'º', 'ordm' => 'º',
'para' => '¶', 'para' => '¶',
@ -2177,7 +2179,14 @@ class String
'wedge' => '∧', 'wedge' => '∧',
'wp' => '℘', 'wp' => '℘',
'wr' => '≀', 'wr' => '≀',
'zeetrf' => 'ℨ' 'zeetrf' => 'ℨ',
'AMP' => '&#x0026',
'COPY' => '&#x00A9',
'GT' => '&#x003E',
'LT' => '&#x003C',
'QUOT' => '&#x0022',
'REG' => '&#x00AE',
'TRADE' => '&#x2122'
} }
#:startdoc: #:startdoc:
@ -2286,8 +2295,8 @@ class String
end end
def convert_to_utf8 #:nodoc: def convert_to_utf8 #:nodoc:
if self =~ /^(lt|gt|amp|quot|apos)$/ if self =~ /^(lt|gt|amp|quot|apos)$/i
self.replace "&" + self + ";" self.replace "&" + self.downcase + ";"
elsif MATHML_ENTITIES.has_key?(self) elsif MATHML_ENTITIES.has_key?(self)
self.replace MATHML_ENTITIES[self].split(';').collect {|s| s.gsub(/^&#x([A-F0-9]+)$/, '\1').hex }.pack('U*') self.replace MATHML_ENTITIES[self].split(';').collect {|s| s.gsub(/^&#x([A-F0-9]+)$/, '\1').hex }.pack('U*')
else else

View file

@ -27,13 +27,13 @@ class SanitizeTest < Test::Unit::TestCase
end end
def my_rex(string) def my_rex(string)
sanitize_rexml(rexml_doc(string)).gsub(/\A<div xmlns="http:\/\/www.w3.org\/1999\/xhtml">(.*)<\/div>\Z/m, '\1') sanitize_rexml(rexml_doc(string.to_utf8)).gsub(/\A<div xmlns="http:\/\/www.w3.org\/1999\/xhtml">(.*)<\/div>\Z/m, '\1')
end end
def test_sanitize_named_entities def test_sanitize_named_entities
input = '<p>Greek &phi;, double-struck &Aopf;, numeric &#x1D538; &#8279;</p>' input = '<p>Greek &phis; &phi;, double-struck &Aopf;, numeric &#x1D538; &#8279;, uppercase &TRADE; &LT;</p>'
output = "<p>Greek \317\225, double-struck \360\235\224\270, numeric \360\235\224\270 \342\201\227</p>" output = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric \360\235\224\270 \342\201\227, uppercase \342\204\242 &lt;</p>"
output2 = "<p>Greek \317\225, double-struck \360\235\224\270, numeric &#x1D538; &#8279;</p>" output2 = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric &#x1D538; &#8279;, uppercase \342\204\242 &lt;</p>"
assert_equal(output, sanitize_xhtml(input)) assert_equal(output, sanitize_xhtml(input))
assert_equal(output, sanitize_html(input)) assert_equal(output, sanitize_html(input))
assert_equal(output, my_rex(input)) assert_equal(output, my_rex(input))

View file

@ -22,6 +22,14 @@ class SanitizerTest < Test::Unit::TestCase
assert_equal xhtmloutput, do_sanitize_xhtml(input) assert_equal xhtmloutput, do_sanitize_xhtml(input)
end end
def test_sanitize_named_entities
input = '<p>Greek &phis; &phi;, double-struck &Aopf;, numeric &#x1D538; &#8279;, uppercase &TRADE; &LT;</p>'
output = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric \360\235\224\270 \342\201\227, uppercase \342\204\242 &lt;</p>"
output2 = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric &#x1D538; &#8279;, uppercase \342\204\242 &lt;</p>"
check_sanitization(input, output, output, output)
assert_equal(output2, input.to_utf8)
end
Sanitizer::ALLOWED_ELEMENTS.each do |tag_name| Sanitizer::ALLOWED_ELEMENTS.each do |tag_name|
define_method "test_should_allow_#{tag_name}_tag" do define_method "test_should_allow_#{tag_name}_tag" do
input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>" input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"