# == Introduction # # This module provides sanitization of XHTML+MathML+SVG # and of inline style attributes. Its genesis is {described here}[http://golem.ph.utexas.edu/~distler/blog/archives/001181.html]. # # Uses the {HTML5lib parser}[http://code.google.com/p/html5lib/], so that the parsing behaviour should # resemble that of browsers. # # sanitize_xhtml() is a case-sensitive sanitizer, suitable for XHTML # sanitize_html() is a case-insensitive sanitizer suitable for HTML # sanitize_rexml() sanitizes a REXML tree, returning a string # safe_sanitize_xhtml() makes extra-sure that the result is well-formed XHTML # by running the output of sanitize_xhtml() through REXML # # == Files # # {sanitize.rb}[http://golem.ph.utexas.edu/~distler/code/instiki/svn/lib/sanitize.rb], # {HTML5lib}[http://golem.ph.utexas.edu/~distler/code/instiki/svn/vendor/plugins/HTML5lib/] # # == Author # # {Jacques Distler}[http://golem.ph.utexas.edu/~distler/] # # == License # # Ruby License module Sanitize require 'html5/html5parser' require 'html5/liberalxmlparser' require 'html5/treewalkers' require 'html5/treebuilders' require 'html5/serializer' require 'html5/sanitizer' include HTML5 # Sanitize a string, parsed using XHTML parsing rules. # # :call-seq: # sanitize_xhtml(string) -> string # sanitize_xhtml(string, {:encoding => 'iso-8859-1', :to_tree => true}) -> REXML::Document # # Unless otherwise specified, the string is assumed to be utf-8 encoded. # By default, the output is a string. But, optionally, you can return a REXML tree. # # The string returned is utf-8 encoded. If you want, you can use iconv to convert it to some other encoding. # (REXML trees are always utf-8 encoded.) def sanitize_xhtml(html, options = {}) @encoding = 'utf-8' @treebuilder = TreeBuilders::REXML::TreeBuilder @to_tree = false options.each do |name, value| next unless %w(encoding treebuilder to_tree).include? name.to_s if name.to_s == 'treebuilder' @treebuilder = HTML5lib::TreeBuilders.get_tree_builder(value) else instance_variable_set("@#{name}", value) end end if @encoding == 'utf-8' parsed = XHTMLParser.parse_fragment(html.to_utf8, {:tokenizer => HTMLSanitizer, :lowercase_element_name => false, :lowercase_attr_name => false, :encoding => @encoding, :tree => @treebuilder }) else parsed = XHTMLParser.parse_fragment(html.to_ncr, {:tokenizer => HTMLSanitizer, :lowercase_element_name => false, :lowercase_attr_name => false, :encoding => @encoding, :tree => @treebuilder }) end return parsed if @to_tree return parsed.to_s end # Sanitize a string, parsed using XHTML parsing rules. Reparse the result to # ensure well-formedness. # # :call-seq: # safe_sanitize_xhtml(string) -> string # # Unless otherwise specified, the string is assumed to be utf-8 encoded. # # The string returned is utf-8 encoded. If you want, you can use iconv to convert it to some other encoding. # (REXML trees are always utf-8 encoded.) def safe_sanitize_xhtml(html, options = {}) options[:to_tree] = false sanitized = sanitize_xhtml(html, options) doc = REXML::Document.new("
#{sanitized}
") sanitized = doc.to_s.gsub(/\A
(.*)<\/div>\Z/m, '\1') rescue REXML::ParseException sanitized = sanitized.escapeHTML end # Sanitize a string, parsed using HTML parsing rules. # # :call-seq: # sanitize_html( string ) -> string # sanitize_html( string, {:encoding => 'iso-8859-1', :to_tree => true} ) -> REXML::Document # # Unless otherwise specified, the string is assumed to be utf-8 encoded. # By default, the output is a string. But, optionally, you can return a REXML tree. # # The string returned is utf-8 encoded. If you want, you can use iconv to convert it to some other encoding. # (REXML trees are always utf-8 encoded.) def sanitize_html(html, options = {}) @encoding = 'utf-8' @treebuilder = TreeBuilders::REXML::TreeBuilder @to_tree = false options.each do |name, value| next unless %w(encoding treebuilder to_tree).include? name.to_s if name.to_s == 'treebuilder' @treebuilder = HTML5lib::TreeBuilders.get_tree_builder(value) else instance_variable_set("@#{name}", value) end end if @encoding == 'utf-8' parsed = HTMLParser.parse_fragment(html.to_utf8, {:tokenizer => HTMLSanitizer, :encoding => @encoding, :tree => @treebuilder }) else parsed = HTMLParser.parse_fragment(html.to_ncr, {:tokenizer => HTMLSanitizer, :encoding => @encoding, :tree => @treebuilder }) end return parsed if @to_tree return parsed.to_s end # Sanitize a REXML tree. The output is a string. # # :call-seq: # sanitize_rexml(tree) -> string # def sanitize_rexml(tree) tokens = TreeWalkers.get_tree_walker('rexml2').new(tree) XHTMLSerializer.serialize(tokens, {:encoding=>'utf-8', :space_before_trailing_solidus => true, :inject_meta_charset => false, :sanitize => true}) end end # Some useful additions to the String class class String # Check whether a string is valid utf-8 # # :call-seq: # string.is_utf8? -> boolean # # returns true if the sequence of bytes in string is valid utf-8 #-- def is_utf8? #expand NCRs to utf-8 text = self.gsub(/&#[xX]([a-fA-F0-9]+);/) { |m| [$1.hex].pack('U*') } text.gsub!(/&#(\d+);/) { |m| [$1.to_i].pack('U*') } # You might think this is faster, but it isn't #pieces = self.split(/&#[xX]([a-fA-F0-9]+);/) #1.step(pieces.length-1, 2) {|i| pieces[i] = [pieces[i].hex].pack('U*')} #pieces = pieces.join.split(/&#(\d+);/) #1.step(pieces.length-1, 2) {|i| pieces[i] = [pieces[i].to_i].pack('U*')} #text = pieces.join #ensure the resulting string of bytes is valid utf-8 text =~ /\A( [\x09\x0A\x0D\x20-\x7E] # ASCII | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs | [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte | \xEF[\x80-\xBE]{2} # | \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15 | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 )*\Z/nx; end #++ #:stopdoc: MATHML_ENTITIES = { 'Alpha' => 'Α', 'Beta' => 'Β', 'Epsilon' => 'Ε', 'Zeta' => 'Ζ', 'Eta' => 'Η', 'Iota' => 'Ι', 'Kappa' => 'Κ', 'Mu' => 'Μ', 'Nu' => 'Ν', 'Omicron' => 'Ο', 'Rho' => 'Ρ', 'Tau' => 'Τ', 'Chi' => 'Χ', 'epsilon' => 'ε', 'zeta' => 'ζ', 'omicron' => 'ο', 'sigmaf' => 'ς', 'thetasym' => 'ϑ', 'upsih' => 'ϒ', 'oline' => '‾', 'frasl' => '⁄', 'alefsym' => 'ℵ', 'crarr' => '↵', 'empty' => '∅', 'amp' => '&', 'lt' => '<', 'zwnj' => '‌', 'zwj' => '‍', 'lrm' => '‎', 'rlm' => '‏', 'sbquo' => '‚', 'bdquo' => '„', 'lsaquo' => '‹', 'rsaquo' => '›', 'euro' => '€', 'angzarr' => '⍼', 'cirmid' => '⫯', 'cudarrl' => '⤸', 'cudarrr' => '⤵', 'cularr' => '↶', 'cularrp' => '⤽', 'curarr' => '↷', 'curarrm' => '⤼', 'Darr' => '↡', 'dArr' => '⇓', 'ddarr' => '⇊', 'DDotrahd' => '⤑', 'dfisht' => '⥿', 'dHar' => '⥥', 'dharl' => '⇃', 'dharr' => '⇂', 'duarr' => '⇵', 'duhar' => '⥯', 'dzigrarr' => '⟿', 'erarr' => '⥱', 'hArr' => '⇔', 'harr' => '↔', 'harrcir' => '⥈', 'harrw' => '↭', 'hoarr' => '⇿', 'imof' => '⊷', 'lAarr' => '⇚', 'Larr' => '↞', 'larrbfs' => '⤟', 'larrfs' => '⤝', 'larrhk' => '↩', 'larrlp' => '↫', 'larrpl' => '⤹', 'larrsim' => '⥳', 'larrtl' => '↢', 'lAtail' => '⤛', 'latail' => '⤙', 'lBarr' => '⤎', 'lbarr' => '⤌', 'ldca' => '⤶', 'ldrdhar' => '⥧', 'ldrushar' => '⥋', 'ldsh' => '↲', 'lfisht' => '⥼', 'lHar' => '⥢', 'lhard' => '↽', 'lharu' => '↼', 'lharul' => '⥪', 'llarr' => '⇇', 'llhard' => '⥫', 'loarr' => '⇽', 'lrarr' => '⇆', 'lrhar' => '⇋', 'lrhard' => '⥭', 'lsh' => '↰', 'lurdshar' => '⥊', 'luruhar' => '⥦', 'Map' => '⤅', 'map' => '↦', 'midcir' => '⫰', 'mumap' => '⊸', 'nearhk' => '⤤', 'neArr' => '⇗', 'nearr' => '↗', 'nesear' => '⤨', 'nhArr' => '⇎', 'nharr' => '↮', 'nlArr' => '⇍', 'nlarr' => '↚', 'nrArr' => '⇏', 'nrarr' => '↛', 'nrarrc' => '⤳̸', 'nrarrw' => '↝̸', 'nvHarr' => '⤄', 'nvlArr' => '⤂', 'nvrArr' => '⤃', 'nwarhk' => '⤣', 'nwArr' => '⇖', 'nwarr' => '↖', 'nwnear' => '⤧', 'olarr' => '↺', 'orarr' => '↻', 'origof' => '⊶', 'rAarr' => '⇛', 'Rarr' => '↠', 'rarrap' => '⥵', 'rarrbfs' => '⤠', 'rarrc' => '⤳', 'rarrfs' => '⤞', 'rarrhk' => '↪', 'rarrlp' => '↬', 'rarrpl' => '⥅', 'rarrsim' => '⥴', 'Rarrtl' => '⤖', 'rarrtl' => '↣', 'rarrw' => '↝', 'rAtail' => '⤜', 'ratail' => '⤚', 'RBarr' => '⤐', 'rBarr' => '⤏', 'rbarr' => '⤍', 'rdca' => '⤷', 'rdldhar' => '⥩', 'rdsh' => '↳', 'rfisht' => '⥽', 'rHar' => '⥤', 'rhard' => '⇁', 'rharu' => '⇀', 'rharul' => '⥬', 'rlarr' => '⇄', 'rlhar' => '⇌', 'roarr' => '⇾', 'rrarr' => '⇉', 'rsh' => '↱', 'ruluhar' => '⥨', 'searhk' => '⤥', 'seArr' => '⇘', 'searr' => '↘', 'seswar' => '⤩', 'simrarr' => '⥲', 'slarr' => '←', 'srarr' => '→', 'swarhk' => '⤦', 'swArr' => '⇙', 'swarr' => '↙', 'swnwar' => '⤪', 'Uarr' => '↟', 'uArr' => '⇑', 'Uarrocir' => '⥉', 'udarr' => '⇅', 'udhar' => '⥮', 'ufisht' => '⥾', 'uHar' => '⥣', 'uharl' => '↿', 'uharr' => '↾', 'uuarr' => '⇈', 'vArr' => '⇕', 'varr' => '↕', 'xhArr' => '⟺', 'xharr' => '⟷', 'xlArr' => '⟸', 'xlarr' => '⟵', 'xmap' => '⟼', 'xrArr' => '⟹', 'xrarr' => '⟶', 'zigrarr' => '⇝', 'ac' => '∾', 'acE' => '∾̳', 'amalg' => '⨿', 'barvee' => '⊽', 'Barwed' => '⌆', 'barwed' => '⌅', 'bsolb' => '⧅', 'Cap' => '⋒', 'capand' => '⩄', 'capbrcup' => '⩉', 'capcap' => '⩋', 'capcup' => '⩇', 'capdot' => '⩀', 'caps' => '∩︀', 'ccaps' => '⩍', 'ccups' => '⩌', 'ccupssm' => '⩐', 'coprod' => '∐', 'Cup' => '⋓', 'cupbrcap' => '⩈', 'cupcap' => '⩆', 'cupcup' => '⩊', 'cupdot' => '⊍', 'cupor' => '⩅', 'cups' => '∪︀', 'cuvee' => '⋎', 'cuwed' => '⋏', 'Dagger' => '‡', 'dagger' => '†', 'diam' => '⋄', 'divonx' => '⋇', 'eplus' => '⩱', 'hercon' => '⊹', 'intcal' => '⊺', 'iprod' => '⨼', 'loplus' => '⨭', 'lotimes' => '⨴', 'lthree' => '⋋', 'ltimes' => '⋉', 'midast' => '*', 'minusb' => '⊟', 'minusd' => '∸', 'minusdu' => '⨪', 'ncap' => '⩃', 'ncup' => '⩂', 'oast' => '⊛', 'ocir' => '⊚', 'odash' => '⊝', 'odiv' => '⨸', 'odot' => '⊙', 'odsold' => '⦼', 'ofcir' => '⦿', 'ogt' => '⧁', 'ohbar' => '⦵', 'olcir' => '⦾', 'olt' => '⧀', 'omid' => '⦶', 'ominus' => '⊖', 'opar' => '⦷', 'operp' => '⦹', 'oplus' => '⊕', 'osol' => '⊘', 'Otimes' => '⨷', 'otimes' => '⊗', 'otimesas' => '⨶', 'ovbar' => '⌽', 'plusacir' => '⨣', 'plusb' => '⊞', 'pluscir' => '⨢', 'plusdo' => '∔', 'plusdu' => '⨥', 'pluse' => '⩲', 'plussim' => '⨦', 'plustwo' => '⨧', 'prod' => '∏', 'race' => '∽̱', 'roplus' => '⨮', 'rotimes' => '⨵', 'rthree' => '⋌', 'rtimes' => '⋊', 'sdot' => '⋅', 'sdotb' => '⊡', 'setmn' => '∖', 'simplus' => '⨤', 'smashp' => '⨳', 'solb' => '⧄', 'sqcap' => '⊓', 'sqcaps' => '⊓︀', 'sqcup' => '⊔', 'sqcups' => '⊔︀', 'ssetmn' => '∖', 'sstarf' => '⋆', 'subdot' => '⪽', 'sum' => '∑', 'supdot' => '⪾', 'timesb' => '⊠', 'timesbar' => '⨱', 'timesd' => '⨰', 'tridot' => '◬', 'triminus' => '⨺', 'triplus' => '⨹', 'trisb' => '⧍', 'tritime' => '⨻', 'uplus' => '⊎', 'veebar' => '⊻', 'wedbar' => '⩟', 'wreath' => '≀', 'xcap' => '⋂', 'xcirc' => '◯', 'xcup' => '⋃', 'xdtri' => '▽', 'xodot' => '⨀', 'xoplus' => '⨁', 'xotime' => '⨂', 'xsqcup' => '⨆', 'xuplus' => '⨄', 'xutri' => '△', 'xvee' => '⋁', 'xwedge' => '⋀', 'dlcorn' => '⌞', 'drcorn' => '⌟', 'gtlPar' => '⦕', 'langd' => '⦑', 'lbrke' => '⦋', 'lbrksld' => '⦏', 'lbrkslu' => '⦍', 'lceil' => '⌈', 'lfloor' => '⌊', 'lmoust' => '⎰', 'lparlt' => '⦓', 'ltrPar' => '⦖', 'rangd' => '⦒', 'rbrke' => '⦌', 'rbrksld' => '⦎', 'rbrkslu' => '⦐', 'rceil' => '⌉', 'rfloor' => '⌋', 'rmoust' => '⎱', 'rpargt' => '⦔', 'ulcorn' => '⌜', 'urcorn' => '⌝', 'gnap' => '⪊', 'gnE' => '≩', 'gne' => '⪈', 'gnsim' => '⋧', 'gvnE' => '≩︀', 'lnap' => '⪉', 'lnE' => '≨', 'lne' => '⪇', 'lnsim' => '⋦', 'lvnE' => '≨︀', 'nap' => '≉', 'napE' => '⩰̸', 'napid' => '≋̸', 'ncong' => '≇', 'ncongdot' => '⩭̸', 'nequiv' => '≢', 'ngE' => '≧̸', 'nge' => '≱', 'nges' => '⩾̸', 'nGg' => '⋙̸', 'ngsim' => '≵', 'nGt' => '≫⃒', 'ngt' => '≯', 'nGtv' => '≫̸', 'nlE' => '≦̸', 'nle' => '≰', 'nles' => '⩽̸', 'nLl' => '⋘̸', 'nlsim' => '≴', 'nLt' => '≪⃒', 'nlt' => '≮', 'nltri' => '⋪', 'nltrie' => '⋬', 'nLtv' => '≪̸', 'nmid' => '∤', 'npar' => '∦', 'npr' => '⊀', 'nprcue' => '⋠', 'npre' => '⪯̸', 'nrtri' => '⋫', 'nrtrie' => '⋭', 'nsc' => '⊁', 'nsccue' => '⋡', 'nsce' => '⪰̸', 'nsim' => '≁', 'nsime' => '≄', 'nsmid' => '∤', 'nspar' => '∦', 'nsqsube' => '⋢', 'nsqsupe' => '⋣', 'nsub' => '⊄', 'nsubE' => '⫅̸', 'nsube' => '⊈', 'nsup' => '⊅', 'nsupE' => '⫆̸', 'nsupe' => '⊉', 'ntgl' => '≹', 'ntlg' => '≸', 'nvap' => '≍⃒', 'nVDash' => '⊯', 'nVdash' => '⊮', 'nvDash' => '⊭', 'nvdash' => '⊬', 'nvge' => '≥⃒', 'nvgt' => '>⃒', 'nvle' => '≤⃒', 'nvltrie' => '⊴⃒', 'nvrtrie' => '⊵⃒', 'nvsim' => '∼⃒', 'parsim' => '⫳', 'prnap' => '⪹', 'prnE' => '⪵', 'prnsim' => '⋨', 'rnmid' => '⫮', 'scnap' => '⪺', 'scnE' => '⪶', 'scnsim' => '⋩', 'simne' => '≆', 'solbar' => '⌿', 'subnE' => '⫋', 'subne' => '⊊', 'supnE' => '⫌', 'supne' => '⊋', 'vnsub' => '⊂⃒', 'vnsup' => '⊃⃒', 'vsubnE' => '⫋︀', 'vsubne' => '⊊︀', 'vsupnE' => '⫌︀', 'vsupne' => '⊋︀', 'ang' => '∠', 'ange' => '⦤', 'angmsd' => '∡', 'angmsdaa' => '⦨', 'angmsdab' => '⦩', 'angmsdac' => '⦪', 'angmsdad' => '⦫', 'angmsdae' => '⦬', 'angmsdaf' => '⦭', 'angmsdag' => '⦮', 'angmsdah' => '⦯', 'angrtvb' => '⊾', 'angrtvbd' => '⦝', 'bbrk' => '⎵', 'bbrktbrk' => '⎶', 'bemptyv' => '⦰', 'beth' => 'ℶ', 'boxbox' => '⧉', 'bprime' => '‵', 'bsemi' => '⁏', 'cemptyv' => '⦲', 'cirE' => '⧃', 'cirscir' => '⧂', 'comp' => '∁', 'daleth' => 'ℸ', 'demptyv' => '⦱', 'ell' => 'ℓ', 'empty' => '∅', 'emptyv' => '∅', 'gimel' => 'ℷ', 'iiota' => '℩', 'image' => 'ℑ', 'imath' => 'ı', 'jmath' => 'ȷ', 'laemptyv' => '⦴', 'lltri' => '◺', 'lrtri' => '⊿', 'mho' => '℧', 'nang' => '∠⃒', 'nexist' => '∄', 'oS' => 'Ⓢ', 'planck' => 'ℏ', 'plankv' => 'ℏ', 'raemptyv' => '⦳', 'range' => '⦥', 'real' => 'ℜ', 'tbrk' => '⎴', 'trpezium' => '⏢', 'ultri' => '◸', 'urtri' => '◹', 'vzigzag' => '⦚', 'weierp' => '℘', 'apE' => '⩰', 'ape' => '≊', 'apid' => '≋', 'asymp' => '≈', 'Barv' => '⫧', 'bcong' => '≌', 'bepsi' => '϶', 'bowtie' => '⋈', 'bsim' => '∽', 'bsime' => '⋍', 'bsolhsub' => '\⊂', 'bump' => '≎', 'bumpE' => '⪮', 'bumpe' => '≏', 'cire' => '≗', 'Colon' => '∷', 'Colone' => '⩴', 'colone' => '≔', 'congdot' => '⩭', 'csub' => '⫏', 'csube' => '⫑', 'csup' => '⫐', 'csupe' => '⫒', 'cuepr' => '⋞', 'cuesc' => '⋟', 'Dashv' => '⫤', 'dashv' => '⊣', 'easter' => '⩮', 'ecir' => '≖', 'ecolon' => '≕', 'eDDot' => '⩷', 'eDot' => '≑', 'efDot' => '≒', 'eg' => '⪚', 'egs' => '⪖', 'egsdot' => '⪘', 'el' => '⪙', 'els' => '⪕', 'elsdot' => '⪗', 'equest' => '≟', 'equivDD' => '⩸', 'erDot' => '≓', 'esdot' => '≐', 'Esim' => '⩳', 'esim' => '≂', 'fork' => '⋔', 'forkv' => '⫙', 'frown' => '⌢', 'gap' => '⪆', 'gE' => '≧', 'gEl' => '⪌', 'gel' => '⋛', 'ges' => '⩾', 'gescc' => '⪩', 'gesdot' => '⪀', 'gesdoto' => '⪂', 'gesdotol' => '⪄', 'gesl' => '⋛︀', 'gesles' => '⪔', 'Gg' => '⋙', 'gl' => '≷', 'gla' => '⪥', 'glE' => '⪒', 'glj' => '⪤', 'gsim' => '≳', 'gsime' => '⪎', 'gsiml' => '⪐', 'Gt' => '≫', 'gtcc' => '⪧', 'gtcir' => '⩺', 'gtdot' => '⋗', 'gtquest' => '⩼', 'gtrarr' => '⥸', 'homtht' => '∻', 'lap' => '⪅', 'lat' => '⪫', 'late' => '⪭', 'lates' => '⪭︀', 'lE' => '≦', 'lEg' => '⪋', 'leg' => '⋚', 'les' => '⩽', 'lescc' => '⪨', 'lesdot' => '⩿', 'lesdoto' => '⪁', 'lesdotor' => '⪃', 'lesg' => '⋚︀', 'lesges' => '⪓', 'lg' => '≶', 'lgE' => '⪑', 'Ll' => '⋘', 'lsim' => '≲', 'lsime' => '⪍', 'lsimg' => '⪏', 'Lt' => '≪', 'ltcc' => '⪦', 'ltcir' => '⩹', 'ltdot' => '⋖', 'ltlarr' => '⥶', 'ltquest' => '⩻', 'ltrie' => '⊴', 'mcomma' => '⨩', 'mDDot' => '∺', 'mid' => '∣', 'mlcp' => '⫛', 'models' => '⊧', 'mstpos' => '∾', 'Pr' => '⪻', 'pr' => '≺', 'prap' => '⪷', 'prcue' => '≼', 'prE' => '⪳', 'pre' => '⪯', 'prsim' => '≾', 'prurel' => '⊰', 'ratio' => '∶', 'rtrie' => '⊵', 'rtriltri' => '⧎', 'Sc' => '⪼', 'sc' => '≻', 'scap' => '⪸', 'sccue' => '≽', 'scE' => '⪴', 'sce' => '⪰', 'scsim' => '≿', 'sdote' => '⩦', 'sfrown' => '⌢', 'simg' => '⪞', 'simgE' => '⪠', 'siml' => '⪝', 'simlE' => '⪟', 'smid' => '∣', 'smile' => '⌣', 'smt' => '⪪', 'smte' => '⪬', 'smtes' => '⪬︀', 'spar' => '∥', 'sqsub' => '⊏', 'sqsube' => '⊑', 'sqsup' => '⊐', 'sqsupe' => '⊒', 'ssmile' => '⌣', 'Sub' => '⋐', 'subE' => '⫅', 'subedot' => '⫃', 'submult' => '⫁', 'subplus' => '⪿', 'subrarr' => '⥹', 'subsim' => '⫇', 'subsub' => '⫕', 'subsup' => '⫓', 'Sup' => '⋑', 'supdsub' => '⫘', 'supE' => '⫆', 'supedot' => '⫄', 'suphsol' => '⊃/', 'suphsub' => '⫗', 'suplarr' => '⥻', 'supmult' => '⫂', 'supplus' => '⫀', 'supsim' => '⫈', 'supsub' => '⫔', 'supsup' => '⫖', 'thkap' => '≈', 'thksim' => '∼', 'topfork' => '⫚', 'trie' => '≜', 'twixt' => '≬', 'Vbar' => '⫫', 'vBar' => '⫨', 'vBarv' => '⫩', 'VDash' => '⊫', 'Vdash' => '⊩', 'vDash' => '⊨', 'vdash' => '⊢', 'Vdashl' => '⫦', 'vltri' => '⊲', 'vprop' => '∝', 'vrtri' => '⊳', 'Vvdash' => '⊪', 'alpha' => 'α', 'beta' => 'β', 'chi' => 'χ', 'Delta' => 'Δ', 'delta' => 'δ', 'epsi' => 'ϵ', 'epsiv' => 'ε', 'eta' => 'η', 'Gamma' => 'Γ', 'gamma' => 'γ', 'Gammad' => 'Ϝ', 'gammad' => 'ϝ', 'iota' => 'ι', 'kappa' => 'κ', 'kappav' => 'ϰ', 'Lambda' => 'Λ', 'lambda' => 'λ', 'mu' => 'μ', 'nu' => 'ν', 'Omega' => 'Ω', 'omega' => 'ω', 'phgr' => 'φ', 'Phi' => 'Φ', 'phi' => 'φ', 'phis' => 'ϕ', 'phiv' => 'φ', 'Pi' => 'Π', 'pi' => 'π', 'piv' => 'ϖ', 'Psi' => 'Ψ', 'psi' => 'ψ', 'rho' => 'ρ', 'rhov' => 'ϱ', 'Sigma' => 'Σ', 'sigma' => 'σ', 'sigmav' => 'ς', 'tau' => 'τ', 'Theta' => 'Θ', 'theta' => 'θ', 'thetav' => 'ϑ', 'Upsi' => 'ϒ', 'upsi' => 'υ', 'Xi' => 'Ξ', 'xi' => 'ξ', 'zeta' => 'ζ', 'Afr' => '𝔄', 'afr' => '𝔞', 'Bfr' => '𝔅', 'bfr' => '𝔟', 'Cfr' => 'ℭ', 'cfr' => '𝔠', 'Dfr' => '𝔇', 'dfr' => '𝔡', 'Efr' => '𝔈', 'efr' => '𝔢', 'Ffr' => '𝔉', 'ffr' => '𝔣', 'Gfr' => '𝔊', 'gfr' => '𝔤', 'Hfr' => 'ℌ', 'hfr' => '𝔥', 'Ifr' => 'ℑ', 'ifr' => '𝔦', 'Jfr' => '𝔍', 'jfr' => '𝔧', 'Kfr' => '𝔎', 'kfr' => '𝔨', 'Lfr' => '𝔏', 'lfr' => '𝔩', 'Mfr' => '𝔐', 'mfr' => '𝔪', 'Nfr' => '𝔑', 'nfr' => '𝔫', 'Ofr' => '𝔒', 'ofr' => '𝔬', 'Pfr' => '𝔓', 'pfr' => '𝔭', 'Qfr' => '𝔔', 'qfr' => '𝔮', 'Rfr' => 'ℜ', 'rfr' => '𝔯', 'Sfr' => '𝔖', 'sfr' => '𝔰', 'Tfr' => '𝔗', 'tfr' => '𝔱', 'Ufr' => '𝔘', 'ufr' => '𝔲', 'Vfr' => '𝔙', 'vfr' => '𝔳', 'Wfr' => '𝔚', 'wfr' => '𝔴', 'Xfr' => '𝔛', 'xfr' => '𝔵', 'Yfr' => '𝔜', 'yfr' => '𝔶', 'Zfr' => 'ℨ', 'zfr' => '𝔷', 'Aopf' => '𝔸', 'Bopf' => '𝔹', 'Copf' => 'ℂ', 'Dopf' => '𝔻', 'Eopf' => '𝔼', 'Fopf' => '𝔽', 'Gopf' => '𝔾', 'Hopf' => 'ℍ', 'Iopf' => '𝕀', 'Jopf' => '𝕁', 'Kopf' => '𝕂', 'Lopf' => '𝕃', 'Mopf' => '𝕄', 'Nopf' => 'ℕ', 'Oopf' => '𝕆', 'Popf' => 'ℙ', 'Qopf' => 'ℚ', 'Ropf' => 'ℝ', 'Sopf' => '𝕊', 'Topf' => '𝕋', 'Uopf' => '𝕌', 'Vopf' => '𝕍', 'Wopf' => '𝕎', 'Xopf' => '𝕏', 'Yopf' => '𝕐', 'Zopf' => 'ℤ', 'Ascr' => '𝒜', 'ascr' => '𝒶', 'Bscr' => 'ℬ', 'bscr' => '𝒷', 'Cscr' => '𝒞', 'cscr' => '𝒸', 'Dscr' => '𝒟', 'dscr' => '𝒹', 'Escr' => 'ℰ', 'escr' => 'ℯ', 'Fscr' => 'ℱ', 'fscr' => '𝒻', 'Gscr' => '𝒢', 'gscr' => 'ℊ', 'Hscr' => 'ℋ', 'hscr' => '𝒽', 'Iscr' => 'ℐ', 'iscr' => '𝒾', 'Jscr' => '𝒥', 'jscr' => '𝒿', 'Kscr' => '𝒦', 'kscr' => '𝓀', 'Lscr' => 'ℒ', 'lscr' => '𝓁', 'Mscr' => 'ℳ', 'mscr' => '𝓂', 'Nscr' => '𝒩', 'nscr' => '𝓃', 'Oscr' => '𝒪', 'oscr' => 'ℴ', 'Pscr' => '𝒫', 'pscr' => '𝓅', 'Qscr' => '𝒬', 'qscr' => '𝓆', 'Rscr' => 'ℛ', 'rscr' => '𝓇', 'Sscr' => '𝒮', 'sscr' => '𝓈', 'Tscr' => '𝒯', 'tscr' => '𝓉', 'Uscr' => '𝒰', 'uscr' => '𝓊', 'Vscr' => '𝒱', 'vscr' => '𝓋', 'Wscr' => '𝒲', 'wscr' => '𝓌', 'Xscr' => '𝒳', 'xscr' => '𝓍', 'Yscr' => '𝒴', 'yscr' => '𝓎', 'Zscr' => '𝒵', 'zscr' => '𝓏', 'acd' => '∿', 'aleph' => 'ℵ', 'And' => '⩓', 'and' => '∧', 'andand' => '⩕', 'andd' => '⩜', 'andslope' => '⩘', 'andv' => '⩚', 'angrt' => '∟', 'angsph' => '∢', 'angst' => 'Å', 'ap' => '≈', 'apacir' => '⩯', 'awconint' => '∳', 'awint' => '⨑', 'becaus' => '∵', 'bernou' => 'ℬ', 'bne' => '=⃥', 'bnequiv' => '≡⃥', 'bNot' => '⫭', 'bnot' => '⌐', 'bottom' => '⊥', 'cap' => '∩', 'Cconint' => '∰', 'cirfnint' => '⨐', 'compfn' => '∘', 'cong' => '≅', 'Conint' => '∯', 'conint' => '∮', 'ctdot' => '⋯', 'cup' => '∪', 'cwconint' => '∲', 'cwint' => '∱', 'cylcty' => '⌭', 'disin' => '⋲', 'Dot' => '¨', 'DotDot' => '⃜', 'dsol' => '⧶', 'dtdot' => '⋱', 'dwangle' => '⦦', 'elinters' => '⏧', 'epar' => '⋕', 'eparsl' => '⧣', 'equiv' => '≡', 'eqvparsl' => '⧥', 'exist' => '∃', 'fltns' => '▱', 'fnof' => 'ƒ', 'forall' => '∀', 'fpartint' => '⨍', 'ge' => '≥', 'hamilt' => 'ℋ', 'iff' => '⇔', 'iinfin' => '⧜', 'imped' => 'Ƶ', 'infin' => '∞', 'infintie' => '⧝', 'Int' => '∬', 'int' => '∫', 'intlarhk' => '⨗', 'isin' => '∈', 'isindot' => '⋵', 'isinE' => '⋹', 'isins' => '⋴', 'isinsv' => '⋳', 'isinv' => '∈', 'lagran' => 'ℒ', 'Lang' => '⟪', 'lang' => '⟨', 'lArr' => '⇐', 'lbbrk' => '❲', 'le' => '≤', 'loang' => '⟬', 'lobrk' => '⟦', 'lopar' => '⦅', 'lowast' => '∗', 'minus' => '−', 'mnplus' => '∓', 'nabla' => '∇', 'ne' => '≠', 'nedot' => '≐̸', 'nhpar' => '⫲', 'ni' => '∋', 'nis' => '⋼', 'nisd' => '⋺', 'niv' => '∋', 'Not' => '⫬', 'notin' => '∉', 'notindot' => '⋵̸', 'notinE' => '⋹̸', 'notinva' => '∉', 'notinvb' => '⋷', 'notinvc' => '⋶', 'notni' => '∌', 'notniva' => '∌', 'notnivb' => '⋾', 'notnivc' => '⋽', 'nparsl' => '⫽⃥', 'npart' => '∂̸', 'npolint' => '⨔', 'nvinfin' => '⧞', 'olcross' => '⦻', 'Or' => '⩔', 'or' => '∨', 'ord' => '⩝', 'order' => 'ℴ', 'oror' => '⩖', 'orslope' => '⩗', 'orv' => '⩛', 'par' => '∥', 'parsl' => '⫽', 'part' => '∂', 'permil' => '‰', 'perp' => '⊥', 'pertenk' => '‱', 'phmmat' => 'ℳ', 'pointint' => '⨕', 'Prime' => '″', 'prime' => '′', 'profalar' => '⌮', 'profline' => '⌒', 'profsurf' => '⌓', 'prop' => '∝', 'qint' => '⨌', 'qprime' => '⁗', 'quatint' => '⨖', 'radic' => '√', 'Rang' => '⟫', 'rang' => '⟩', 'rArr' => '⇒', 'rbbrk' => '❳', 'roang' => '⟭', 'robrk' => '⟧', 'ropar' => '⦆', 'rppolint' => '⨒', 'scpolint' => '⨓', 'sim' => '∼', 'simdot' => '⩪', 'sime' => '≃', 'smeparsl' => '⧤', 'square' => '□', 'squarf' => '▪', 'strns' => '¯', 'sub' => '⊂', 'sube' => '⊆', 'sup' => '⊃', 'supe' => '⊇', 'tdot' => '⃛', 'there4' => '∴', 'tint' => '∭', 'top' => '⊤', 'topbot' => '⌶', 'topcir' => '⫱', 'tprime' => '‴', 'utdot' => '⋰', 'uwangle' => '⦧', 'vangrt' => '⦜', 'veeeq' => '≚', 'Verbar' => '‖', 'wedgeq' => '≙', 'xnis' => '⋻', 'boxDL' => '╗', 'boxDl' => '╖', 'boxdL' => '╕', 'boxdl' => '┐', 'boxDR' => '╔', 'boxDr' => '╓', 'boxdR' => '╒', 'boxdr' => '┌', 'boxH' => '═', 'boxh' => '─', 'boxHD' => '╦', 'boxHd' => '╤', 'boxhD' => '╥', 'boxhd' => '┬', 'boxHU' => '╩', 'boxHu' => '╧', 'boxhU' => '╨', 'boxhu' => '┴', 'boxUL' => '╝', 'boxUl' => '╜', 'boxuL' => '╛', 'boxul' => '┘', 'boxUR' => '╚', 'boxUr' => '╙', 'boxuR' => '╘', 'boxur' => '└', 'boxV' => '║', 'boxv' => '│', 'boxVH' => '╬', 'boxVh' => '╫', 'boxvH' => '╪', 'boxvh' => '┼', 'boxVL' => '╣', 'boxVl' => '╢', 'boxvL' => '╡', 'boxvl' => '┤', 'boxVR' => '╠', 'boxVr' => '╟', 'boxvR' => '╞', 'boxvr' => '├', 'Acy' => 'А', 'acy' => 'а', 'Bcy' => 'Б', 'bcy' => 'б', 'CHcy' => 'Ч', 'chcy' => 'ч', 'Dcy' => 'Д', 'dcy' => 'д', 'Ecy' => 'Э', 'ecy' => 'э', 'Fcy' => 'Ф', 'fcy' => 'ф', 'Gcy' => 'Г', 'gcy' => 'г', 'HARDcy' => 'Ъ', 'hardcy' => 'ъ', 'Icy' => 'И', 'icy' => 'и', 'IEcy' => 'Е', 'iecy' => 'е', 'IOcy' => 'Ё', 'iocy' => 'ё', 'Jcy' => 'Й', 'jcy' => 'й', 'Kcy' => 'К', 'kcy' => 'к', 'KHcy' => 'Х', 'khcy' => 'х', 'Lcy' => 'Л', 'lcy' => 'л', 'Mcy' => 'М', 'mcy' => 'м', 'Ncy' => 'Н', 'ncy' => 'н', 'numero' => '№', 'Ocy' => 'О', 'ocy' => 'о', 'Pcy' => 'П', 'pcy' => 'п', 'Rcy' => 'Р', 'rcy' => 'р', 'Scy' => 'С', 'scy' => 'с', 'SHCHcy' => 'Щ', 'shchcy' => 'щ', 'SHcy' => 'Ш', 'shcy' => 'ш', 'SOFTcy' => 'Ь', 'softcy' => 'ь', 'Tcy' => 'Т', 'tcy' => 'т', 'TScy' => 'Ц', 'tscy' => 'ц', 'Ucy' => 'У', 'ucy' => 'у', 'Vcy' => 'В', 'vcy' => 'в', 'YAcy' => 'Я', 'yacy' => 'я', 'Ycy' => 'Ы', 'ycy' => 'ы', 'YUcy' => 'Ю', 'yucy' => 'ю', 'Zcy' => 'З', 'zcy' => 'з', 'ZHcy' => 'Ж', 'zhcy' => 'ж', 'DJcy' => 'Ђ', 'djcy' => 'ђ', 'DScy' => 'Ѕ', 'dscy' => 'ѕ', 'DZcy' => 'Џ', 'dzcy' => 'џ', 'GJcy' => 'Ѓ', 'gjcy' => 'ѓ', 'Iukcy' => 'І', 'iukcy' => 'і', 'Jsercy' => 'Ј', 'jsercy' => 'ј', 'Jukcy' => 'Є', 'jukcy' => 'є', 'KJcy' => 'Ќ', 'kjcy' => 'ќ', 'LJcy' => 'Љ', 'ljcy' => 'љ', 'NJcy' => 'Њ', 'njcy' => 'њ', 'TSHcy' => 'Ћ', 'tshcy' => 'ћ', 'Ubrcy' => 'Ў', 'ubrcy' => 'ў', 'YIcy' => 'Ї', 'yicy' => 'ї', 'acute' => '´', 'breve' => '˘', 'caron' => 'ˇ', 'cedil' => '¸', 'circ' => 'ˆ', 'dblac' => '˝', 'die' => '¨', 'dot' => '˙', 'grave' => '`', 'macr' => '¯', 'ogon' => '˛', 'ring' => '˚', 'tilde' => '˜', 'uml' => '¨', 'Aacute' => 'Á', 'aacute' => 'á', 'Acirc' => 'Â', 'acirc' => 'â', 'AElig' => 'Æ', 'aelig' => 'æ', 'Agrave' => 'À', 'agrave' => 'à', 'Aring' => 'Å', 'aring' => 'å', 'Atilde' => 'Ã', 'atilde' => 'ã', 'Auml' => 'Ä', 'auml' => 'ä', 'Ccedil' => 'Ç', 'ccedil' => 'ç', 'Eacute' => 'É', 'eacute' => 'é', 'Ecirc' => 'Ê', 'ecirc' => 'ê', 'Egrave' => 'È', 'egrave' => 'è', 'ETH' => 'Ð', 'eth' => 'ð', 'Euml' => 'Ë', 'euml' => 'ë', 'Iacute' => 'Í', 'iacute' => 'í', 'Icirc' => 'Î', 'icirc' => 'î', 'Igrave' => 'Ì', 'igrave' => 'ì', 'Iuml' => 'Ï', 'iuml' => 'ï', 'Ntilde' => 'Ñ', 'ntilde' => 'ñ', 'Oacute' => 'Ó', 'oacute' => 'ó', 'Ocirc' => 'Ô', 'ocirc' => 'ô', 'Ograve' => 'Ò', 'ograve' => 'ò', 'Oslash' => 'Ø', 'oslash' => 'ø', 'Otilde' => 'Õ', 'otilde' => 'õ', 'Ouml' => 'Ö', 'ouml' => 'ö', 'szlig' => 'ß', 'THORN' => 'Þ', 'thorn' => 'þ', 'Uacute' => 'Ú', 'uacute' => 'ú', 'Ucirc' => 'Û', 'ucirc' => 'û', 'Ugrave' => 'Ù', 'ugrave' => 'ù', 'Uuml' => 'Ü', 'uuml' => 'ü', 'Yacute' => 'Ý', 'yacute' => 'ý', 'yuml' => 'ÿ', 'Abreve' => 'Ă', 'abreve' => 'ă', 'Amacr' => 'Ā', 'amacr' => 'ā', 'Aogon' => 'Ą', 'aogon' => 'ą', 'Cacute' => 'Ć', 'cacute' => 'ć', 'Ccaron' => 'Č', 'ccaron' => 'č', 'Ccirc' => 'Ĉ', 'ccirc' => 'ĉ', 'Cdot' => 'Ċ', 'cdot' => 'ċ', 'Dcaron' => 'Ď', 'dcaron' => 'ď', 'Dstrok' => 'Đ', 'dstrok' => 'đ', 'Ecaron' => 'Ě', 'ecaron' => 'ě', 'Edot' => 'Ė', 'edot' => 'ė', 'Emacr' => 'Ē', 'emacr' => 'ē', 'ENG' => 'Ŋ', 'eng' => 'ŋ', 'Eogon' => 'Ę', 'eogon' => 'ę', 'gacute' => 'ǵ', 'Gbreve' => 'Ğ', 'gbreve' => 'ğ', 'Gcedil' => 'Ģ', 'Gcirc' => 'Ĝ', 'gcirc' => 'ĝ', 'Gdot' => 'Ġ', 'gdot' => 'ġ', 'Hcirc' => 'Ĥ', 'hcirc' => 'ĥ', 'Hstrok' => 'Ħ', 'hstrok' => 'ħ', 'Idot' => 'İ', 'IJlig' => 'IJ', 'ijlig' => 'ij', 'Imacr' => 'Ī', 'imacr' => 'ī', 'inodot' => 'ı', 'Iogon' => 'Į', 'iogon' => 'į', 'Itilde' => 'Ĩ', 'itilde' => 'ĩ', 'Jcirc' => 'Ĵ', 'jcirc' => 'ĵ', 'Kcedil' => 'Ķ', 'kcedil' => 'ķ', 'kgreen' => 'ĸ', 'Lacute' => 'Ĺ', 'lacute' => 'ĺ', 'Lcaron' => 'Ľ', 'lcaron' => 'ľ', 'Lcedil' => 'Ļ', 'lcedil' => 'ļ', 'Lmidot' => 'Ŀ', 'lmidot' => 'ŀ', 'Lstrok' => 'Ł', 'lstrok' => 'ł', 'Nacute' => 'Ń', 'nacute' => 'ń', 'napos' => 'ʼn', 'Ncaron' => 'Ň', 'ncaron' => 'ň', 'Ncedil' => 'Ņ', 'ncedil' => 'ņ', 'Odblac' => 'Ő', 'odblac' => 'ő', 'OElig' => 'Œ', 'oelig' => 'œ', 'Omacr' => 'Ō', 'omacr' => 'ō', 'Racute' => 'Ŕ', 'racute' => 'ŕ', 'Rcaron' => 'Ř', 'rcaron' => 'ř', 'Rcedil' => 'Ŗ', 'rcedil' => 'ŗ', 'Sacute' => 'Ś', 'sacute' => 'ś', 'Scaron' => 'Š', 'scaron' => 'š', 'Scedil' => 'Ş', 'scedil' => 'ş', 'Scirc' => 'Ŝ', 'scirc' => 'ŝ', 'Tcaron' => 'Ť', 'tcaron' => 'ť', 'Tcedil' => 'Ţ', 'tcedil' => 'ţ', 'Tstrok' => 'Ŧ', 'tstrok' => 'ŧ', 'Ubreve' => 'Ŭ', 'ubreve' => 'ŭ', 'Udblac' => 'Ű', 'udblac' => 'ű', 'Umacr' => 'Ū', 'umacr' => 'ū', 'Uogon' => 'Ų', 'uogon' => 'ų', 'Uring' => 'Ů', 'uring' => 'ů', 'Utilde' => 'Ũ', 'utilde' => 'ũ', 'Wcirc' => 'Ŵ', 'wcirc' => 'ŵ', 'Ycirc' => 'Ŷ', 'ycirc' => 'ŷ', 'Yuml' => 'Ÿ', 'Zacute' => 'Ź', 'zacute' => 'ź', 'Zcaron' => 'Ž', 'zcaron' => 'ž', 'Zdot' => 'Ż', 'zdot' => 'ż', 'apos' => ''', 'ast' => '*', 'brvbar' => '¦', 'bsol' => '\', 'cent' => '¢', 'colon' => ':', 'comma' => ',', 'commat' => '@', 'copy' => '©', 'curren' => '¤', 'darr' => '↓', 'deg' => '°', 'divide' => '÷', 'dollar' => '$', 'equals' => '=', 'excl' => '!', 'frac12' => '½', 'frac14' => '¼', 'frac18' => '⅛', 'frac34' => '¾', 'frac38' => '⅜', 'frac58' => '⅝', 'frac78' => '⅞', 'gt' => '>', 'half' => '½', 'horbar' => '―', 'hyphen' => '‐', 'iexcl' => '¡', 'iquest' => '¿', 'laquo' => '«', 'larr' => '←', 'lcub' => '{', 'ldquo' => '“', 'lowbar' => '_', 'lpar' => '(', 'lsqb' => '[', 'lsquo' => '‘', 'micro' => 'µ', 'middot' => '·', 'nbsp' => ' ', 'not' => '¬', 'num' => '#', 'ohm' => 'Ω', 'ordf' => 'ª', 'ordm' => 'º', 'para' => '¶', 'percnt' => '%', 'period' => '.', 'plus' => '+', 'plusmn' => '±', 'pound' => '£', 'quest' => '?', 'quot' => '"', 'raquo' => '»', 'rarr' => '→', 'rcub' => '}', 'rdquo' => '”', 'reg' => '®', 'rpar' => ')', 'rsqb' => ']', 'rsquo' => '’', 'sect' => '§', 'semi' => ';', 'shy' => '­', 'sol' => '/', 'sung' => '♪', 'sup1' => '¹', 'sup2' => '²', 'sup3' => '³', 'times' => '×', 'trade' => '™', 'uarr' => '↑', 'verbar' => '|', 'yen' => '¥', 'blank' => '␣', 'blk12' => '▒', 'blk14' => '░', 'blk34' => '▓', 'block' => '█', 'bull' => '•', 'caret' => '⁁', 'check' => '✓', 'cir' => '○', 'clubs' => '♣', 'copysr' => '℗', 'cross' => '✗', 'Dagger' => '‡', 'dagger' => '†', 'dash' => '‐', 'diams' => '♦', 'dlcrop' => '⌍', 'drcrop' => '⌌', 'dtri' => '▿', 'dtrif' => '▾', 'emsp' => ' ', 'emsp13' => ' ', 'emsp14' => ' ', 'ensp' => ' ', 'female' => '♀', 'ffilig' => 'ffi', 'fflig' => 'ff', 'ffllig' => 'ffl', 'filig' => 'fi', 'fjlig' => 'fj', 'flat' => '♭', 'fllig' => 'fl', 'frac13' => '⅓', 'frac15' => '⅕', 'frac16' => '⅙', 'frac23' => '⅔', 'frac25' => '⅖', 'frac35' => '⅗', 'frac45' => '⅘', 'frac56' => '⅚', 'hairsp' => ' ', 'hearts' => '♥', 'hellip' => '…', 'hybull' => '⁃', 'incare' => '℅', 'ldquor' => '„', 'lhblk' => '▄', 'loz' => '◊', 'lozf' => '⧫', 'lsquor' => '‚', 'ltri' => '◃', 'ltrif' => '◂', 'male' => '♂', 'malt' => '✠', 'marker' => '▮', 'mdash' => '—', 'mldr' => '…', 'natur' => '♮', 'ndash' => '–', 'nldr' => '‥', 'numsp' => ' ', 'phone' => '☎', 'puncsp' => ' ', 'rdquor' => '”', 'rect' => '▭', 'rsquor' => '’', 'rtri' => '▹', 'rtrif' => '▸', 'rx' => '℞', 'sext' => '✶', 'sharp' => '♯', 'spades' => '♠', 'squ' => '□', 'squf' => '▪', 'star' => '☆', 'starf' => '★', 'target' => '⌖', 'telrec' => '⌕', 'thinsp' => ' ', 'uhblk' => '▀', 'ulcrop' => '⌏', 'urcrop' => '⌎', 'utri' => '▵', 'utrif' => '▴', 'vellip' => '⋮', 'af' => '⁡', 'aopf' => '𝕒', 'asympeq' => '≍', 'bopf' => '𝕓', 'copf' => '𝕔', 'Cross' => '⨯', 'DD' => 'ⅅ', 'dd' => 'ⅆ', 'dopf' => '𝕕', 'DownArrowBar' => '⤓', 'DownBreve' => '̑', 'DownLeftRightVector' => '⥐', 'DownLeftTeeVector' => '⥞', 'DownLeftVectorBar' => '⥖', 'DownRightTeeVector' => '⥟', 'DownRightVectorBar' => '⥗', 'ee' => 'ⅇ', 'EmptySmallSquare' => '◻', 'EmptyVerySmallSquare' => '▫', 'eopf' => '𝕖', 'Equal' => '⩵', 'FilledSmallSquare' => '◼', 'FilledVerySmallSquare' => '▪', 'fopf' => '𝕗', 'gopf' => '𝕘', 'GreaterGreater' => '⪢', 'Hat' => '^', 'hopf' => '𝕙', 'HorizontalLine' => '─', 'ic' => '⁣', 'ii' => 'ⅈ', 'iopf' => '𝕚', 'it' => '⁢', 'jopf' => '𝕛', 'kopf' => '𝕜', 'larrb' => '⇤', 'LeftDownTeeVector' => '⥡', 'LeftDownVectorBar' => '⥙', 'LeftRightVector' => '⥎', 'LeftTeeVector' => '⥚', 'LeftTriangleBar' => '⧏', 'LeftUpDownVector' => '⥑', 'LeftUpTeeVector' => '⥠', 'LeftUpVectorBar' => '⥘', 'LeftVectorBar' => '⥒', 'LessLess' => '⪡', 'lopf' => '𝕝', 'mapstodown' => '↧', 'mapstoleft' => '↤', 'mapstoup' => '↥', 'MediumSpace' => ' ', 'mopf' => '𝕞', 'nbump' => '≎̸', 'nbumpe' => '≏̸', 'nesim' => '≂̸', 'NewLine' => ' ', 'NoBreak' => '⁠', 'nopf' => '𝕟', 'NotCupCap' => '≭', 'NotHumpEqual' => '≏̸', 'NotLeftTriangleBar' => '⧏̸', 'NotNestedGreaterGreater' => '⪢̸', 'NotNestedLessLess' => '⪡̸', 'NotRightTriangleBar' => '⧐̸', 'NotSquareSubset' => '⊏̸', 'NotSquareSuperset' => '⊐̸', 'NotSucceedsTilde' => '≿̸', 'oopf' => '𝕠', 'OverBar' => '¯', 'OverBrace' => '⏞', 'OverBracket' => '⎴', 'OverParenthesis' => '⏜', 'planckh' => 'ℎ', 'popf' => '𝕡', 'Product' => '∏', 'qopf' => '𝕢', 'rarrb' => '⇥', 'RightDownTeeVector' => '⥝', 'RightDownVectorBar' => '⥕', 'RightTeeVector' => '⥛', 'RightTriangleBar' => '⧐', 'RightUpDownVector' => '⥏', 'RightUpTeeVector' => '⥜', 'RightUpVectorBar' => '⥔', 'RightVectorBar' => '⥓', 'ropf' => '𝕣', 'RoundImplies' => '⥰', 'RuleDelayed' => '⧴', 'sopf' => '𝕤', 'Tab' => ' ', 'ThickSpace' => '   ', 'topf' => '𝕥', 'UnderBar' => '̲', 'UnderBrace' => '⏟', 'UnderBracket' => '⎵', 'UnderParenthesis' => '⏝', 'uopf' => '𝕦', 'UpArrowBar' => '⤒', 'Upsilon' => 'Υ', 'VerticalLine' => '|', 'VerticalSeparator' => '❘', 'vopf' => '𝕧', 'wopf' => '𝕨', 'xopf' => '𝕩', 'yopf' => '𝕪', 'ZeroWidthSpace' => '​', 'zopf' => '𝕫', 'angle' => '∠', 'ApplyFunction' => '⁡', 'approx' => '≈', 'approxeq' => '≊', 'Assign' => '≔', 'backcong' => '≌', 'backepsilon' => '϶', 'backprime' => '‵', 'backsim' => '∽', 'backsimeq' => '⋍', 'Backslash' => '∖', 'barwedge' => '⌅', 'Because' => '∵', 'because' => '∵', 'Bernoullis' => 'ℬ', 'between' => '≬', 'bigcap' => '⋂', 'bigcirc' => '◯', 'bigcup' => '⋃', 'bigodot' => '⨀', 'bigoplus' => '⨁', 'bigotimes' => '⨂', 'bigsqcup' => '⨆', 'bigstar' => '★', 'bigtriangledown' => '▽', 'bigtriangleup' => '△', 'biguplus' => '⨄', 'bigvee' => '⋁', 'bigwedge' => '⋀', 'bkarow' => '⤍', 'blacklozenge' => '⧫', 'blacksquare' => '▪', 'blacktriangle' => '▴', 'blacktriangledown' => '▾', 'blacktriangleleft' => '◂', 'blacktriangleright' => '▸', 'bot' => '⊥', 'boxminus' => '⊟', 'boxplus' => '⊞', 'boxtimes' => '⊠', 'Breve' => '˘', 'bullet' => '•', 'Bumpeq' => '≎', 'bumpeq' => '≏', 'CapitalDifferentialD' => 'ⅅ', 'Cayleys' => 'ℭ', 'Cedilla' => '¸', 'CenterDot' => '·', 'centerdot' => '·', 'checkmark' => '✓', 'circeq' => '≗', 'circlearrowleft' => '↺', 'circlearrowright' => '↻', 'circledast' => '⊛', 'circledcirc' => '⊚', 'circleddash' => '⊝', 'CircleDot' => '⊙', 'circledR' => '®', 'circledS' => 'Ⓢ', 'CircleMinus' => '⊖', 'CirclePlus' => '⊕', 'CircleTimes' => '⊗', 'ClockwiseContourIntegral' => '∲', 'CloseCurlyDoubleQuote' => '”', 'CloseCurlyQuote' => '’', 'clubsuit' => '♣', 'coloneq' => '≔', 'complement' => '∁', 'complexes' => 'ℂ', 'Congruent' => '≡', 'ContourIntegral' => '∮', 'Coproduct' => '∐', 'CounterClockwiseContourIntegral' => '∳', 'CupCap' => '≍', 'curlyeqprec' => '⋞', 'curlyeqsucc' => '⋟', 'curlyvee' => '⋎', 'curlywedge' => '⋏', 'curvearrowleft' => '↶', 'curvearrowright' => '↷', 'dbkarow' => '⤏', 'ddagger' => '‡', 'ddotseq' => '⩷', 'Del' => '∇', 'DiacriticalAcute' => '´', 'DiacriticalDot' => '˙', 'DiacriticalDoubleAcute' => '˝', 'DiacriticalGrave' => '`', 'DiacriticalTilde' => '˜', 'Diamond' => '⋄', 'diamond' => '⋄', 'diamondsuit' => '♦', 'DifferentialD' => 'ⅆ', 'digamma' => 'ϝ', 'div' => '÷', 'divideontimes' => '⋇', 'doteq' => '≐', 'doteqdot' => '≑', 'DotEqual' => '≐', 'dotminus' => '∸', 'dotplus' => '∔', 'dotsquare' => '⊡', 'doublebarwedge' => '⌆', 'DoubleContourIntegral' => '∯', 'DoubleDot' => '¨', 'DoubleDownArrow' => '⇓', 'DoubleLeftArrow' => '⇐', 'DoubleLeftRightArrow' => '⇔', 'DoubleLeftTee' => '⫤', 'DoubleLongLeftArrow' => '⟸', 'DoubleLongLeftRightArrow' => '⟺', 'DoubleLongRightArrow' => '⟹', 'DoubleRightArrow' => '⇒', 'DoubleRightTee' => '⊨', 'DoubleUpArrow' => '⇑', 'DoubleUpDownArrow' => '⇕', 'DoubleVerticalBar' => '∥', 'DownArrow' => '↓', 'Downarrow' => '⇓', 'downarrow' => '↓', 'DownArrowUpArrow' => '⇵', 'downdownarrows' => '⇊', 'downharpoonleft' => '⇃', 'downharpoonright' => '⇂', 'DownLeftVector' => '↽', 'DownRightVector' => '⇁', 'DownTee' => '⊤', 'DownTeeArrow' => '↧', 'drbkarow' => '⤐', 'Element' => '∈', 'emptyset' => '∅', 'eqcirc' => '≖', 'eqcolon' => '≕', 'eqsim' => '≂', 'eqslantgtr' => '⪖', 'eqslantless' => '⪕', 'EqualTilde' => '≂', 'Equilibrium' => '⇌', 'Exists' => '∃', 'expectation' => 'ℰ', 'ExponentialE' => 'ⅇ', 'exponentiale' => 'ⅇ', 'fallingdotseq' => '≒', 'ForAll' => '∀', 'Fouriertrf' => 'ℱ', 'geq' => '≥', 'geqq' => '≧', 'geqslant' => '⩾', 'gg' => '≫', 'ggg' => '⋙', 'gnapprox' => '⪊', 'gneq' => '⪈', 'gneqq' => '≩', 'GreaterEqual' => '≥', 'GreaterEqualLess' => '⋛', 'GreaterFullEqual' => '≧', 'GreaterLess' => '≷', 'GreaterSlantEqual' => '⩾', 'GreaterTilde' => '≳', 'gtrapprox' => '⪆', 'gtrdot' => '⋗', 'gtreqless' => '⋛', 'gtreqqless' => '⪌', 'gtrless' => '≷', 'gtrsim' => '≳', 'gvertneqq' => '≩︀', 'Hacek' => 'ˇ', 'hbar' => 'ℏ', 'heartsuit' => '♥', 'HilbertSpace' => 'ℋ', 'hksearow' => '⤥', 'hkswarow' => '⤦', 'hookleftarrow' => '↩', 'hookrightarrow' => '↪', 'hslash' => 'ℏ', 'HumpDownHump' => '≎', 'HumpEqual' => '≏', 'iiiint' => '⨌', 'iiint' => '∭', 'Im' => 'ℑ', 'ImaginaryI' => 'ⅈ', 'imagline' => 'ℐ', 'imagpart' => 'ℑ', 'Implies' => '⇒', 'in' => '∈', 'integers' => 'ℤ', 'Integral' => '∫', 'intercal' => '⊺', 'Intersection' => '⋂', 'intprod' => '⨼', 'InvisibleComma' => '⁣', 'InvisibleTimes' => '⁢', 'langle' => '⟨', 'Laplacetrf' => 'ℒ', 'lbrace' => '{', 'lbrack' => '[', 'LeftAngleBracket' => '⟨', 'LeftArrow' => '←', 'Leftarrow' => '⇐', 'leftarrow' => '←', 'LeftArrowBar' => '⇤', 'LeftArrowRightArrow' => '⇆', 'leftarrowtail' => '↢', 'LeftCeiling' => '⌈', 'LeftDoubleBracket' => '⟦', 'LeftDownVector' => '⇃', 'LeftFloor' => '⌊', 'leftharpoondown' => '↽', 'leftharpoonup' => '↼', 'leftleftarrows' => '⇇', 'LeftRightArrow' => '↔', 'Leftrightarrow' => '⇔', 'leftrightarrow' => '↔', 'leftrightarrows' => '⇆', 'leftrightharpoons' => '⇋', 'leftrightsquigarrow' => '↭', 'LeftTee' => '⊣', 'LeftTeeArrow' => '↤', 'leftthreetimes' => '⋋', 'LeftTriangle' => '⊲', 'LeftTriangleEqual' => '⊴', 'LeftUpVector' => '↿', 'LeftVector' => '↼', 'leq' => '≤', 'leqq' => '≦', 'leqslant' => '⩽', 'lessapprox' => '⪅', 'lessdot' => '⋖', 'lesseqgtr' => '⋚', 'lesseqqgtr' => '⪋', 'LessEqualGreater' => '⋚', 'LessFullEqual' => '≦', 'LessGreater' => '≶', 'lessgtr' => '≶', 'lesssim' => '≲', 'LessSlantEqual' => '⩽', 'LessTilde' => '≲', 'll' => '≪', 'llcorner' => '⌞', 'Lleftarrow' => '⇚', 'lmoustache' => '⎰', 'lnapprox' => '⪉', 'lneq' => '⪇', 'lneqq' => '≨', 'LongLeftArrow' => '⟵', 'Longleftarrow' => '⟸', 'longleftarrow' => '⟵', 'LongLeftRightArrow' => '⟷', 'Longleftrightarrow' => '⟺', 'longleftrightarrow' => '⟷', 'longmapsto' => '⟼', 'LongRightArrow' => '⟶', 'Longrightarrow' => '⟹', 'longrightarrow' => '⟶', 'looparrowleft' => '↫', 'looparrowright' => '↬', 'LowerLeftArrow' => '↙', 'LowerRightArrow' => '↘', 'lozenge' => '◊', 'lrcorner' => '⌟', 'Lsh' => '↰', 'lvertneqq' => '≨︀', 'maltese' => '✠', 'mapsto' => '↦', 'measuredangle' => '∡', 'Mellintrf' => 'ℳ', 'MinusPlus' => '∓', 'mp' => '∓', 'multimap' => '⊸', 'napprox' => '≉', 'natural' => '♮', 'naturals' => 'ℕ', 'nearrow' => '↗', 'NegativeMediumSpace' => '​', 'NegativeThickSpace' => '​', 'NegativeThinSpace' => '​', 'NegativeVeryThinSpace' => '​', 'NestedGreaterGreater' => '≫', 'NestedLessLess' => '≪', 'nexists' => '∄', 'ngeq' => '≱', 'ngeqq' => '≧̸', 'ngeqslant' => '⩾̸', 'ngtr' => '≯', 'nLeftarrow' => '⇍', 'nleftarrow' => '↚', 'nLeftrightarrow' => '⇎', 'nleftrightarrow' => '↮', 'nleq' => '≰', 'nleqq' => '≦̸', 'nleqslant' => '⩽̸', 'nless' => '≮', 'NonBreakingSpace' => ' ', 'NotCongruent' => '≢', 'NotDoubleVerticalBar' => '∦', 'NotElement' => '∉', 'NotEqual' => '≠', 'NotEqualTilde' => '≂̸', 'NotExists' => '∄', 'NotGreater' => '≯', 'NotGreaterEqual' => '≱', 'NotGreaterFullEqual' => '≦̸', 'NotGreaterGreater' => '≫̸', 'NotGreaterLess' => '≹', 'NotGreaterSlantEqual' => '⩾̸', 'NotGreaterTilde' => '≵', 'NotHumpDownHump' => '≎̸', 'NotLeftTriangle' => '⋪', 'NotLeftTriangleEqual' => '⋬', 'NotLess' => '≮', 'NotLessEqual' => '≰', 'NotLessGreater' => '≸', 'NotLessLess' => '≪̸', 'NotLessSlantEqual' => '⩽̸', 'NotLessTilde' => '≴', 'NotPrecedes' => '⊀', 'NotPrecedesEqual' => '⪯̸', 'NotPrecedesSlantEqual' => '⋠', 'NotReverseElement' => '∌', 'NotRightTriangle' => '⋫', 'NotRightTriangleEqual' => '⋭', 'NotSquareSubsetEqual' => '⋢', 'NotSquareSupersetEqual' => '⋣', 'NotSubset' => '⊂⃒', 'NotSubsetEqual' => '⊈', 'NotSucceeds' => '⊁', 'NotSucceedsEqual' => '⪰̸', 'NotSucceedsSlantEqual' => '⋡', 'NotSuperset' => '⊃⃒', 'NotSupersetEqual' => '⊉', 'NotTilde' => '≁', 'NotTildeEqual' => '≄', 'NotTildeFullEqual' => '≇', 'NotTildeTilde' => '≉', 'NotVerticalBar' => '∤', 'nparallel' => '∦', 'nprec' => '⊀', 'npreceq' => '⪯̸', 'nRightarrow' => '⇏', 'nrightarrow' => '↛', 'nshortmid' => '∤', 'nshortparallel' => '∦', 'nsimeq' => '≄', 'nsubset' => '⊂⃒', 'nsubseteq' => '⊈', 'nsubseteqq' => '⫅̸', 'nsucc' => '⊁', 'nsucceq' => '⪰̸', 'nsupset' => '⊃⃒', 'nsupseteq' => '⊉', 'nsupseteqq' => '⫆̸', 'ntriangleleft' => '⋪', 'ntrianglelefteq' => '⋬', 'ntriangleright' => '⋫', 'ntrianglerighteq' => '⋭', 'nwarrow' => '↖', 'oint' => '∮', 'OpenCurlyDoubleQuote' => '“', 'OpenCurlyQuote' => '‘', 'orderof' => 'ℴ', 'parallel' => '∥', 'PartialD' => '∂', 'pitchfork' => '⋔', 'PlusMinus' => '±', 'pm' => '±', 'Poincareplane' => 'ℌ', 'prec' => '≺', 'precapprox' => '⪷', 'preccurlyeq' => '≼', 'Precedes' => '≺', 'PrecedesEqual' => '⪯', 'PrecedesSlantEqual' => '≼', 'PrecedesTilde' => '≾', 'preceq' => '⪯', 'precnapprox' => '⪹', 'precneqq' => '⪵', 'precnsim' => '⋨', 'precsim' => '≾', 'primes' => 'ℙ', 'Proportion' => '∷', 'Proportional' => '∝', 'propto' => '∝', 'quaternions' => 'ℍ', 'questeq' => '≟', 'rangle' => '⟩', 'rationals' => 'ℚ', 'rbrace' => '}', 'rbrack' => ']', 'Re' => 'ℜ', 'realine' => 'ℛ', 'realpart' => 'ℜ', 'reals' => 'ℝ', 'ReverseElement' => '∋', 'ReverseEquilibrium' => '⇋', 'ReverseUpEquilibrium' => '⥯', 'RightAngleBracket' => '⟩', 'RightArrow' => '→', 'Rightarrow' => '⇒', 'rightarrow' => '→', 'RightArrowBar' => '⇥', 'RightArrowLeftArrow' => '⇄', 'rightarrowtail' => '↣', 'RightCeiling' => '⌉', 'RightDoubleBracket' => '⟧', 'RightDownVector' => '⇂', 'RightFloor' => '⌋', 'rightharpoondown' => '⇁', 'rightharpoonup' => '⇀', 'rightleftarrows' => '⇄', 'rightleftharpoons' => '⇌', 'rightrightarrows' => '⇉', 'rightsquigarrow' => '↝', 'RightTee' => '⊢', 'RightTeeArrow' => '↦', 'rightthreetimes' => '⋌', 'RightTriangle' => '⊳', 'RightTriangleEqual' => '⊵', 'RightUpVector' => '↾', 'RightVector' => '⇀', 'risingdotseq' => '≓', 'rmoustache' => '⎱', 'Rrightarrow' => '⇛', 'Rsh' => '↱', 'searrow' => '↘', 'setminus' => '∖', 'ShortDownArrow' => '↓', 'ShortLeftArrow' => '←', 'shortmid' => '∣', 'shortparallel' => '∥', 'ShortRightArrow' => '→', 'ShortUpArrow' => '↑', 'simeq' => '≃', 'SmallCircle' => '∘', 'smallsetminus' => '∖', 'spadesuit' => '♠', 'Sqrt' => '√', 'sqsubset' => '⊏', 'sqsubseteq' => '⊑', 'sqsupset' => '⊐', 'sqsupseteq' => '⊒', 'Square' => '□', 'SquareIntersection' => '⊓', 'SquareSubset' => '⊏', 'SquareSubsetEqual' => '⊑', 'SquareSuperset' => '⊐', 'SquareSupersetEqual' => '⊒', 'SquareUnion' => '⊔', 'Star' => '⋆', 'straightepsilon' => 'ϵ', 'straightphi' => 'ϕ', 'Subset' => '⋐', 'subset' => '⊂', 'subseteq' => '⊆', 'subseteqq' => '⫅', 'SubsetEqual' => '⊆', 'subsetneq' => '⊊', 'subsetneqq' => '⫋', 'succ' => '≻', 'succapprox' => '⪸', 'succcurlyeq' => '≽', 'Succeeds' => '≻', 'SucceedsEqual' => '⪰', 'SucceedsSlantEqual' => '≽', 'SucceedsTilde' => '≿', 'succeq' => '⪰', 'succnapprox' => '⪺', 'succneqq' => '⪶', 'succnsim' => '⋩', 'succsim' => '≿', 'SuchThat' => '∋', 'Sum' => '∑', 'Superset' => '⊃', 'SupersetEqual' => '⊇', 'Supset' => '⋑', 'supset' => '⊃', 'supseteq' => '⊇', 'supseteqq' => '⫆', 'supsetneq' => '⊋', 'supsetneqq' => '⫌', 'swarrow' => '↙', 'Therefore' => '∴', 'therefore' => '∴', 'thickapprox' => '≈', 'thicksim' => '∼', 'ThinSpace' => ' ', 'Tilde' => '∼', 'TildeEqual' => '≃', 'TildeFullEqual' => '≅', 'TildeTilde' => '≈', 'toea' => '⤨', 'tosa' => '⤩', 'triangle' => '▵', 'triangledown' => '▿', 'triangleleft' => '◃', 'trianglelefteq' => '⊴', 'triangleq' => '≜', 'triangleright' => '▹', 'trianglerighteq' => '⊵', 'TripleDot' => '⃛', 'twoheadleftarrow' => '↞', 'twoheadrightarrow' => '↠', 'ulcorner' => '⌜', 'Union' => '⋃', 'UnionPlus' => '⊎', 'UpArrow' => '↑', 'Uparrow' => '⇑', 'uparrow' => '↑', 'UpArrowDownArrow' => '⇅', 'UpDownArrow' => '↕', 'Updownarrow' => '⇕', 'updownarrow' => '↕', 'UpEquilibrium' => '⥮', 'upharpoonleft' => '↿', 'upharpoonright' => '↾', 'UpperLeftArrow' => '↖', 'UpperRightArrow' => '↗', 'upsilon' => 'υ', 'UpTee' => '⊥', 'UpTeeArrow' => '↥', 'upuparrows' => '⇈', 'urcorner' => '⌝', 'varepsilon' => 'ε', 'varkappa' => 'ϰ', 'varnothing' => '∅', 'varphi' => 'φ', 'varpi' => 'ϖ', 'varpropto' => '∝', 'varrho' => 'ϱ', 'varsigma' => 'ς', 'varsubsetneq' => '⊊︀', 'varsubsetneqq' => '⫋︀', 'varsupsetneq' => '⊋︀', 'varsupsetneqq' => '⫌︀', 'vartheta' => 'ϑ', 'vartriangleleft' => '⊲', 'vartriangleright' => '⊳', 'Vee' => '⋁', 'vee' => '∨', 'Vert' => '‖', 'vert' => '|', 'VerticalBar' => '∣', 'VerticalTilde' => '≀', 'VeryThinSpace' => ' ', 'Wedge' => '⋀', 'wedge' => '∧', 'wp' => '℘', 'wr' => '≀', 'zeetrf' => 'ℨ', 'AMP' => '&', 'COPY' => '©', 'GT' => '>', 'LT' => '<', 'QUOT' => '"', 'REG' => '®', 'TRADE' => '™' } unless const_defined? "MATHML_ENTITIES" #:startdoc: # Converts XHTML+MathML named entities in string to Numeric Character References # # :call-seq: # string.to_ncr -> string # def to_ncr self.gsub(/&([a-zA-Z0-9]+);/) {|m| $1.convert_to_ncr} end # Converts XHTML+MathML named entities in string to Numeric Character References # # :call-seq: # string.to_ncr! -> str or nil # # Substitution is done in-place. # def to_ncr! self.gsub!(/&([a-zA-Z0-9]+);/) {|m| $1.convert_to_ncr} end # Converts XHTML+MathML named entities in string to UTF-8 # # :call-seq: # string.to_utf8 -> string # #-- def to_utf8 self.gsub(/&([a-zA-Z0-9]+);/) {|m| $1.convert_to_utf8} # You might think this is faster, but it isn't # pieces = self.split(/&([a-zA-Z0-9]+);/) # 1.step(pieces.length-1, 2) {|i| pieces[i].convert_to_utf8} # pieces.join end #++ # Converts XHTML+MathML named entities in string to UTF-8 # # :call-seq: # string.to_ncr! -> str or nil # # Substitution is done in-place. # def to_utf8! self.gsub!(/&([a-zA-Z0-9]+);/) {|m| $1.convert_to_utf8} end protected def convert_to_ncr #:nodoc: if self =~ /^(lt|gt|amp|quot|apos)$/ self.replace "&" + self + ";" elsif MATHML_ENTITIES.has_key?(self) self.replace MATHML_ENTITIES[self] else self.replace "&" + self + ";" end end def convert_to_utf8 #:nodoc: if self =~ /^(lt|gt|amp|quot|apos)$/i self.replace "&" + self.downcase + ";" elsif MATHML_ENTITIES.has_key?(self) self.replace MATHML_ENTITIES[self].split(';').collect {|s| s.gsub(/^&#x([A-F0-9]+)$/, '\1').hex }.pack('U*') else self.replace "&" + self + ";" end end end require 'rexml/element' module REXML #:nodoc: class Element # Convert XHTML+MathML Named Entities in a REXML::Element to Numeric Character References # # :call-seq: # tree.to_ncr -> REXML::Element # # REXML, typically, converts NCRs to utf-8 characters, which is what you'll see when you # access the resulting REXML document. # # Note that this method needs to traverse the entire tree, converting text nodes and attributes # for each element. This can be SLOW. It will often be faster to serialize to a string and then # use String.to_ncr instead. # def to_ncr self.each_element { |el| el.texts.each_index {|i| el.texts[i].value = el.texts[i].to_s.to_ncr } el.attributes.each { |name,val| el.attributes[name] = val.to_ncr } el.to_ncr if el.has_elements? } return self end # Convert XHTML+MathML Named Entities in a REXML::Element to UTF-8 # # :call-seq: # tree.to_utf8 -> REXML::Element # # Note that this method needs to traverse the entire tree, converting text nodes and attributes # for each element. This can be SLOW. It will often be faster to serialize to a string and then # use String.to_utf8 instead. # def to_utf8 self.each_element { |el| el.texts.each_index {|i| el.texts[i].value = el.texts[i].to_s.to_utf8 } el.attributes.each { |name,val| el.attributes[name] = val.to_utf8 } el.to_utf8 if el.has_elements? } return self end end end module HTML5 #:nodoc: all module TreeWalkers private class << self def [](name) case name.to_s.downcase when 'rexml' require 'html5/treewalkers/rexml' REXML::TreeWalker when 'rexml2' REXML2::TreeWalker else raise "Unknown TreeWalker #{name}" end end alias :get_tree_walker :[] end module REXML2 class TreeWalker < HTML5::TreeWalkers::NonRecursiveTreeWalker private def node_details(node) case node when ::REXML::Document [:DOCUMENT] when ::REXML::Element if !node.name [:DOCUMENT_FRAGMENT] else [:ELEMENT, node.name, node.attributes.map {|name,value| [name,value.to_utf8]}, node.has_elements? || node.has_text?] end when ::REXML::Text [:TEXT, node.value.to_utf8] when ::REXML::Comment [:COMMENT, node.string] when ::REXML::DocType [:DOCTYPE, node.name, node.public, node.system] when ::REXML::XMLDecl [nil] else [:UNKNOWN, node.class.inspect] end end def first_child(node) node.children.first end def next_sibling(node) node.next_sibling end def parent(node) node.parent end end end end end