Sync with latest HTML5lib and latest Maruku

2007-07-04 17:36:59 -05:00 · 2007-07-04 17:36:59 -05:00 · 8ccaad85a5
commit 8ccaad85a5
parent 8e92e4a3ab
71 changed files with 1974 additions and 1621 deletions
--- a/vendor/plugins/HTML5lib/lib/html5lib.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib.rb
@ -1,11 +1,11 @@
-require 'html5lib/html5parser'
-
-module HTML5lib
-    def self.parse(stream, options={})
-        HTMLParser.parse(stream, options)
-    end
-
-    def self.parseFragment(stream, options={})
-        HTMLParser.parse(stream, options)
-    end
-end
+require 'html5/html5parser'
+
+module HTML5
+    def self.parse(stream, options={})
+        HTMLParser.parse(stream, options)
+    end
+
+    def self.parseFragment(stream, options={})
+        HTMLParser.parse(stream, options)
+    end
+end
--- a/vendor/plugins/HTML5lib/lib/html5/constants.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/constants.rb
@ -0,0 +1,817 @@
+module HTML5
+
+  class EOF < Exception; end
+
+  CONTENT_MODEL_FLAGS = [
+      :PCDATA,
+      :RCDATA,
+      :CDATA,
+      :PLAINTEXT
+  ]
+
+  SCOPING_ELEMENTS = %w[
+      button
+      caption
+      html
+      marquee
+      object
+      table
+      td
+      th
+  ]
+
+  FORMATTING_ELEMENTS = %w[
+      a
+      b
+      big
+      em
+      font
+      i
+      nobr
+      s
+      small
+      strike
+      strong
+      tt
+      u
+  ]
+
+  SPECIAL_ELEMENTS = %w[
+      address
+      area
+      base
+      basefont
+      bgsound
+      blockquote
+      body
+      br
+      center
+      col
+      colgroup
+      dd
+      dir
+      div
+      dl
+      dt
+      embed
+      fieldset
+      form
+      frame
+      frameset
+      h1
+      h2
+      h3
+      h4
+      h5
+      h6
+      head
+      hr
+      iframe
+      image
+      img
+      input
+      isindex
+      li
+      link
+      listing
+      menu
+      meta
+      noembed
+      noframes
+      noscript
+      ol
+      optgroup
+      option
+      p
+      param
+      plaintext
+      pre
+      script
+      select
+      spacer
+      style
+      tbody
+      textarea
+      tfoot
+      thead
+      title
+      tr
+      ul
+      wbr
+  ]
+
+  SPACE_CHARACTERS = %W[
+      \t
+      \n
+      \x0B
+      \x0C
+      \x20
+      \r
+  ]
+
+  TABLE_INSERT_MODE_ELEMENTS = %w[
+      table
+      tbody
+      tfoot
+      thead
+      tr
+  ]
+
+  ASCII_LOWERCASE = ('a'..'z').to_a.join('')
+  ASCII_UPPERCASE = ('A'..'Z').to_a.join('')
+  ASCII_LETTERS = ASCII_LOWERCASE + ASCII_UPPERCASE
+  DIGITS = '0'..'9'
+  HEX_DIGITS = DIGITS.to_a + ('a'..'f').to_a + ('A'..'F').to_a
+
+  # Heading elements need to be ordered 
+  HEADING_ELEMENTS = %w[
+      h1
+      h2
+      h3
+      h4
+      h5
+      h6
+  ]
+
+  # XXX What about event-source and command?
+  VOID_ELEMENTS = %w[
+      base
+      link
+      meta
+      hr
+      br
+      img
+      embed
+      param
+      area
+      col
+      input
+  ]
+
+  CDATA_ELEMENTS = %w[title textarea]
+
+  RCDATA_ELEMENTS = %w[
+    style
+    script
+    xmp
+    iframe
+    noembed
+    noframes
+    noscript
+  ]
+
+  BOOLEAN_ATTRIBUTES = {
+    :global => %w[irrelevant],
+    'style' => %w[scoped],
+    'img' => %w[ismap],
+    'audio' => %w[autoplay controls],
+    'video' => %w[autoplay controls],
+    'script' => %w[defer async],
+    'details' => %w[open],
+    'datagrid' => %w[multiple disabled],
+    'command' => %w[hidden disabled checked default],
+    'menu' => %w[autosubmit],
+    'fieldset' => %w[disabled readonly],
+    'option' => %w[disabled readonly selected],
+    'optgroup' => %w[disabled readonly],
+    'button' => %w[disabled autofocus],
+    'input' => %w[disabled readonly required autofocus checked ismap],
+    'select' => %w[disabled readonly autofocus multiple],
+    'output' => %w[disabled readonly]
+  }
+
+  # entitiesWindows1252 has to be _ordered_ and needs to have an index.
+  ENTITIES_WINDOWS1252 = [
+      8364,  # 0x80  0x20AC  EURO SIGN
+      65533, # 0x81          UNDEFINED
+      8218,  # 0x82  0x201A  SINGLE LOW-9 QUOTATION MARK
+      402,   # 0x83  0x0192  LATIN SMALL LETTER F WITH HOOK
+      8222,  # 0x84  0x201E  DOUBLE LOW-9 QUOTATION MARK
+      8230,  # 0x85  0x2026  HORIZONTAL ELLIPSIS
+      8224,  # 0x86  0x2020  DAGGER
+      8225,  # 0x87  0x2021  DOUBLE DAGGER
+      710,   # 0x88  0x02C6  MODIFIER LETTER CIRCUMFLEX ACCENT
+      8240,  # 0x89  0x2030  PER MILLE SIGN
+      352,   # 0x8A  0x0160  LATIN CAPITAL LETTER S WITH CARON
+      8249,  # 0x8B  0x2039  SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+      338,   # 0x8C  0x0152  LATIN CAPITAL LIGATURE OE
+      65533, # 0x8D          UNDEFINED
+      381,   # 0x8E  0x017D  LATIN CAPITAL LETTER Z WITH CARON
+      65533, # 0x8F          UNDEFINED
+      65533, # 0x90          UNDEFINED
+      8216,  # 0x91  0x2018  LEFT SINGLE QUOTATION MARK
+      8217,  # 0x92  0x2019  RIGHT SINGLE QUOTATION MARK
+      8220,  # 0x93  0x201C  LEFT DOUBLE QUOTATION MARK
+      8221,  # 0x94  0x201D  RIGHT DOUBLE QUOTATION MARK
+      8226,  # 0x95  0x2022  BULLET
+      8211,  # 0x96  0x2013  EN DASH
+      8212,  # 0x97  0x2014  EM DASH
+      732,   # 0x98  0x02DC  SMALL TILDE
+      8482,  # 0x99  0x2122  TRADE MARK SIGN
+      353,   # 0x9A  0x0161  LATIN SMALL LETTER S WITH CARON
+      8250,  # 0x9B  0x203A  SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+      339,   # 0x9C  0x0153  LATIN SMALL LIGATURE OE
+      65533, # 0x9D          UNDEFINED
+      382,   # 0x9E  0x017E  LATIN SMALL LETTER Z WITH CARON
+      376    # 0x9F  0x0178  LATIN CAPITAL LETTER Y WITH DIAERESIS
+  ]
+
+  # ENTITIES was generated from Python using the following code:
+  #
+  # import constants
+  # entities = constants.entities.items()
+  # entities.sort()
+  # list = [ ' '.join([repr(entity), '=>', ord(value)<128 and 
+  #   repr(str(value)) or repr(value.encode('utf-8')).replace("'",'"')])
+  #   for entity, value in entities]
+  #   print '  ENTITIES = {\n    ' + ',\n    '.join(list) + '\n  }'
+
+  ENTITIES = {
+    'AElig' => "\xc3\x86",
+    'AElig;' => "\xc3\x86",
+    'AMP' => '&',
+    'AMP;' => '&',
+    'Aacute' => "\xc3\x81",
+    'Aacute;' => "\xc3\x81",
+    'Acirc' => "\xc3\x82",
+    'Acirc;' => "\xc3\x82",
+    'Agrave' => "\xc3\x80",
+    'Agrave;' => "\xc3\x80",
+    'Alpha;' => "\xce\x91",
+    'Aring' => "\xc3\x85",
+    'Aring;' => "\xc3\x85",
+    'Atilde' => "\xc3\x83",
+    'Atilde;' => "\xc3\x83",
+    'Auml' => "\xc3\x84",
+    'Auml;' => "\xc3\x84",
+    'Beta;' => "\xce\x92",
+    'COPY' => "\xc2\xa9",
+    'COPY;' => "\xc2\xa9",
+    'Ccedil' => "\xc3\x87",
+    'Ccedil;' => "\xc3\x87",
+    'Chi;' => "\xce\xa7",
+    'Dagger;' => "\xe2\x80\xa1",
+    'Delta;' => "\xce\x94",
+    'ETH' => "\xc3\x90",
+    'ETH;' => "\xc3\x90",
+    'Eacute' => "\xc3\x89",
+    'Eacute;' => "\xc3\x89",
+    'Ecirc' => "\xc3\x8a",
+    'Ecirc;' => "\xc3\x8a",
+    'Egrave' => "\xc3\x88",
+    'Egrave;' => "\xc3\x88",
+    'Epsilon;' => "\xce\x95",
+    'Eta;' => "\xce\x97",
+    'Euml' => "\xc3\x8b",
+    'Euml;' => "\xc3\x8b",
+    'GT' => '>',
+    'GT;' => '>',
+    'Gamma;' => "\xce\x93",
+    'Iacute' => "\xc3\x8d",
+    'Iacute;' => "\xc3\x8d",
+    'Icirc' => "\xc3\x8e",
+    'Icirc;' => "\xc3\x8e",
+    'Igrave' => "\xc3\x8c",
+    'Igrave;' => "\xc3\x8c",
+    'Iota;' => "\xce\x99",
+    'Iuml' => "\xc3\x8f",
+    'Iuml;' => "\xc3\x8f",
+    'Kappa;' => "\xce\x9a",
+    'LT' => '<',
+    'LT;' => '<',
+    'Lambda;' => "\xce\x9b",
+    'Mu;' => "\xce\x9c",
+    'Ntilde' => "\xc3\x91",
+    'Ntilde;' => "\xc3\x91",
+    'Nu;' => "\xce\x9d",
+    'OElig;' => "\xc5\x92",
+    'Oacute' => "\xc3\x93",
+    'Oacute;' => "\xc3\x93",
+    'Ocirc' => "\xc3\x94",
+    'Ocirc;' => "\xc3\x94",
+    'Ograve' => "\xc3\x92",
+    'Ograve;' => "\xc3\x92",
+    'Omega;' => "\xce\xa9",
+    'Omicron;' => "\xce\x9f",
+    'Oslash' => "\xc3\x98",
+    'Oslash;' => "\xc3\x98",
+    'Otilde' => "\xc3\x95",
+    'Otilde;' => "\xc3\x95",
+    'Ouml' => "\xc3\x96",
+    'Ouml;' => "\xc3\x96",
+    'Phi;' => "\xce\xa6",
+    'Pi;' => "\xce\xa0",
+    'Prime;' => "\xe2\x80\xb3",
+    'Psi;' => "\xce\xa8",
+    'QUOT' => '"',
+    'QUOT;' => '"',
+    'REG' => "\xc2\xae",
+    'REG;' => "\xc2\xae",
+    'Rho;' => "\xce\xa1",
+    'Scaron;' => "\xc5\xa0",
+    'Sigma;' => "\xce\xa3",
+    'THORN' => "\xc3\x9e",
+    'THORN;' => "\xc3\x9e",
+    'TRADE;' => "\xe2\x84\xa2",
+    'Tau;' => "\xce\xa4",
+    'Theta;' => "\xce\x98",
+    'Uacute' => "\xc3\x9a",
+    'Uacute;' => "\xc3\x9a",
+    'Ucirc' => "\xc3\x9b",
+    'Ucirc;' => "\xc3\x9b",
+    'Ugrave' => "\xc3\x99",
+    'Ugrave;' => "\xc3\x99",
+    'Upsilon;' => "\xce\xa5",
+    'Uuml' => "\xc3\x9c",
+    'Uuml;' => "\xc3\x9c",
+    'Xi;' => "\xce\x9e",
+    'Yacute' => "\xc3\x9d",
+    'Yacute;' => "\xc3\x9d",
+    'Yuml;' => "\xc5\xb8",
+    'Zeta;' => "\xce\x96",
+    'aacute' => "\xc3\xa1",
+    'aacute;' => "\xc3\xa1",
+    'acirc' => "\xc3\xa2",
+    'acirc;' => "\xc3\xa2",
+    'acute' => "\xc2\xb4",
+    'acute;' => "\xc2\xb4",
+    'aelig' => "\xc3\xa6",
+    'aelig;' => "\xc3\xa6",
+    'agrave' => "\xc3\xa0",
+    'agrave;' => "\xc3\xa0",
+    'alefsym;' => "\xe2\x84\xb5",
+    'alpha;' => "\xce\xb1",
+    'amp' => '&',
+    'amp;' => '&',
+    'and;' => "\xe2\x88\xa7",
+    'ang;' => "\xe2\x88\xa0",
+    'apos;' => "'",
+    'aring' => "\xc3\xa5",
+    'aring;' => "\xc3\xa5",
+    'asymp;' => "\xe2\x89\x88",
+    'atilde' => "\xc3\xa3",
+    'atilde;' => "\xc3\xa3",
+    'auml' => "\xc3\xa4",
+    'auml;' => "\xc3\xa4",
+    'bdquo;' => "\xe2\x80\x9e",
+    'beta;' => "\xce\xb2",
+    'brvbar' => "\xc2\xa6",
+    'brvbar;' => "\xc2\xa6",
+    'bull;' => "\xe2\x80\xa2",
+    'cap;' => "\xe2\x88\xa9",
+    'ccedil' => "\xc3\xa7",
+    'ccedil;' => "\xc3\xa7",
+    'cedil' => "\xc2\xb8",
+    'cedil;' => "\xc2\xb8",
+    'cent' => "\xc2\xa2",
+    'cent;' => "\xc2\xa2",
+    'chi;' => "\xcf\x87",
+    'circ;' => "\xcb\x86",
+    'clubs;' => "\xe2\x99\xa3",
+    'cong;' => "\xe2\x89\x85",
+    'copy' => "\xc2\xa9",
+    'copy;' => "\xc2\xa9",
+    'crarr;' => "\xe2\x86\xb5",
+    'cup;' => "\xe2\x88\xaa",
+    'curren' => "\xc2\xa4",
+    'curren;' => "\xc2\xa4",
+    'dArr;' => "\xe2\x87\x93",
+    'dagger;' => "\xe2\x80\xa0",
+    'darr;' => "\xe2\x86\x93",
+    'deg' => "\xc2\xb0",
+    'deg;' => "\xc2\xb0",
+    'delta;' => "\xce\xb4",
+    'diams;' => "\xe2\x99\xa6",
+    'divide' => "\xc3\xb7",
+    'divide;' => "\xc3\xb7",
+    'eacute' => "\xc3\xa9",
+    'eacute;' => "\xc3\xa9",
+    'ecirc' => "\xc3\xaa",
+    'ecirc;' => "\xc3\xaa",
+    'egrave' => "\xc3\xa8",
+    'egrave;' => "\xc3\xa8",
+    'empty;' => "\xe2\x88\x85",
+    'emsp;' => "\xe2\x80\x83",
+    'ensp;' => "\xe2\x80\x82",
+    'epsilon;' => "\xce\xb5",
+    'equiv;' => "\xe2\x89\xa1",
+    'eta;' => "\xce\xb7",
+    'eth' => "\xc3\xb0",
+    'eth;' => "\xc3\xb0",
+    'euml' => "\xc3\xab",
+    'euml;' => "\xc3\xab",
+    'euro;' => "\xe2\x82\xac",
+    'exist;' => "\xe2\x88\x83",
+    'fnof;' => "\xc6\x92",
+    'forall;' => "\xe2\x88\x80",
+    'frac12' => "\xc2\xbd",
+    'frac12;' => "\xc2\xbd",
+    'frac14' => "\xc2\xbc",
+    'frac14;' => "\xc2\xbc",
+    'frac34' => "\xc2\xbe",
+    'frac34;' => "\xc2\xbe",
+    'frasl;' => "\xe2\x81\x84",
+    'gamma;' => "\xce\xb3",
+    'ge;' => "\xe2\x89\xa5",
+    'gt' => '>',
+    'gt;' => '>',
+    'hArr;' => "\xe2\x87\x94",
+    'harr;' => "\xe2\x86\x94",
+    'hearts;' => "\xe2\x99\xa5",
+    'hellip;' => "\xe2\x80\xa6",
+    'iacute' => "\xc3\xad",
+    'iacute;' => "\xc3\xad",
+    'icirc' => "\xc3\xae",
+    'icirc;' => "\xc3\xae",
+    'iexcl' => "\xc2\xa1",
+    'iexcl;' => "\xc2\xa1",
+    'igrave' => "\xc3\xac",
+    'igrave;' => "\xc3\xac",
+    'image;' => "\xe2\x84\x91",
+    'infin;' => "\xe2\x88\x9e",
+    'int;' => "\xe2\x88\xab",
+    'iota;' => "\xce\xb9",
+    'iquest' => "\xc2\xbf",
+    'iquest;' => "\xc2\xbf",
+    'isin;' => "\xe2\x88\x88",
+    'iuml' => "\xc3\xaf",
+    'iuml;' => "\xc3\xaf",
+    'kappa;' => "\xce\xba",
+    'lArr;' => "\xe2\x87\x90",
+    'lambda;' => "\xce\xbb",
+    'lang;' => "\xe3\x80\x88",
+    'laquo' => "\xc2\xab",
+    'laquo;' => "\xc2\xab",
+    'larr;' => "\xe2\x86\x90",
+    'lceil;' => "\xe2\x8c\x88",
+    'ldquo;' => "\xe2\x80\x9c",
+    'le;' => "\xe2\x89\xa4",
+    'lfloor;' => "\xe2\x8c\x8a",
+    'lowast;' => "\xe2\x88\x97",
+    'loz;' => "\xe2\x97\x8a",
+    'lrm;' => "\xe2\x80\x8e",
+    'lsaquo;' => "\xe2\x80\xb9",
+    'lsquo;' => "\xe2\x80\x98",
+    'lt' => '<',
+    'lt;' => '<',
+    'macr' => "\xc2\xaf",
+    'macr;' => "\xc2\xaf",
+    'mdash;' => "\xe2\x80\x94",
+    'micro' => "\xc2\xb5",
+    'micro;' => "\xc2\xb5",
+    'middot' => "\xc2\xb7",
+    'middot;' => "\xc2\xb7",
+    'minus;' => "\xe2\x88\x92",
+    'mu;' => "\xce\xbc",
+    'nabla;' => "\xe2\x88\x87",
+    'nbsp' => "\xc2\xa0",
+    'nbsp;' => "\xc2\xa0",
+    'ndash;' => "\xe2\x80\x93",
+    'ne;' => "\xe2\x89\xa0",
+    'ni;' => "\xe2\x88\x8b",
+    'not' => "\xc2\xac",
+    'not;' => "\xc2\xac",
+    'notin;' => "\xe2\x88\x89",
+    'nsub;' => "\xe2\x8a\x84",
+    'ntilde' => "\xc3\xb1",
+    'ntilde;' => "\xc3\xb1",
+    'nu;' => "\xce\xbd",
+    'oacute' => "\xc3\xb3",
+    'oacute;' => "\xc3\xb3",
+    'ocirc' => "\xc3\xb4",
+    'ocirc;' => "\xc3\xb4",
+    'oelig;' => "\xc5\x93",
+    'ograve' => "\xc3\xb2",
+    'ograve;' => "\xc3\xb2",
+    'oline;' => "\xe2\x80\xbe",
+    'omega;' => "\xcf\x89",
+    'omicron;' => "\xce\xbf",
+    'oplus;' => "\xe2\x8a\x95",
+    'or;' => "\xe2\x88\xa8",
+    'ordf' => "\xc2\xaa",
+    'ordf;' => "\xc2\xaa",
+    'ordm' => "\xc2\xba",
+    'ordm;' => "\xc2\xba",
+    'oslash' => "\xc3\xb8",
+    'oslash;' => "\xc3\xb8",
+    'otilde' => "\xc3\xb5",
+    'otilde;' => "\xc3\xb5",
+    'otimes;' => "\xe2\x8a\x97",
+    'ouml' => "\xc3\xb6",
+    'ouml;' => "\xc3\xb6",
+    'para' => "\xc2\xb6",
+    'para;' => "\xc2\xb6",
+    'part;' => "\xe2\x88\x82",
+    'permil;' => "\xe2\x80\xb0",
+    'perp;' => "\xe2\x8a\xa5",
+    'phi;' => "\xcf\x86",
+    'pi;' => "\xcf\x80",
+    'piv;' => "\xcf\x96",
+    'plusmn' => "\xc2\xb1",
+    'plusmn;' => "\xc2\xb1",
+    'pound' => "\xc2\xa3",
+    'pound;' => "\xc2\xa3",
+    'prime;' => "\xe2\x80\xb2",
+    'prod;' => "\xe2\x88\x8f",
+    'prop;' => "\xe2\x88\x9d",
+    'psi;' => "\xcf\x88",
+    'quot' => '"',
+    'quot;' => '"',
+    'rArr;' => "\xe2\x87\x92",
+    'radic;' => "\xe2\x88\x9a",
+    'rang;' => "\xe3\x80\x89",
+    'raquo' => "\xc2\xbb",
+    'raquo;' => "\xc2\xbb",
+    'rarr;' => "\xe2\x86\x92",
+    'rceil;' => "\xe2\x8c\x89",
+    'rdquo;' => "\xe2\x80\x9d",
+    'real;' => "\xe2\x84\x9c",
+    'reg' => "\xc2\xae",
+    'reg;' => "\xc2\xae",
+    'rfloor;' => "\xe2\x8c\x8b",
+    'rho;' => "\xcf\x81",
+    'rlm;' => "\xe2\x80\x8f",
+    'rsaquo;' => "\xe2\x80\xba",
+    'rsquo;' => "\xe2\x80\x99",
+    'sbquo;' => "\xe2\x80\x9a",
+    'scaron;' => "\xc5\xa1",
+    'sdot;' => "\xe2\x8b\x85",
+    'sect' => "\xc2\xa7",
+    'sect;' => "\xc2\xa7",
+    'shy' => "\xc2\xad",
+    'shy;' => "\xc2\xad",
+    'sigma;' => "\xcf\x83",
+    'sigmaf;' => "\xcf\x82",
+    'sim;' => "\xe2\x88\xbc",
+    'spades;' => "\xe2\x99\xa0",
+    'sub;' => "\xe2\x8a\x82",
+    'sube;' => "\xe2\x8a\x86",
+    'sum;' => "\xe2\x88\x91",
+    'sup1' => "\xc2\xb9",
+    'sup1;' => "\xc2\xb9",
+    'sup2' => "\xc2\xb2",
+    'sup2;' => "\xc2\xb2",
+    'sup3' => "\xc2\xb3",
+    'sup3;' => "\xc2\xb3",
+    'sup;' => "\xe2\x8a\x83",
+    'supe;' => "\xe2\x8a\x87",
+    'szlig' => "\xc3\x9f",
+    'szlig;' => "\xc3\x9f",
+    'tau;' => "\xcf\x84",
+    'there4;' => "\xe2\x88\xb4",
+    'theta;' => "\xce\xb8",
+    'thetasym;' => "\xcf\x91",
+    'thinsp;' => "\xe2\x80\x89",
+    'thorn' => "\xc3\xbe",
+    'thorn;' => "\xc3\xbe",
+    'tilde;' => "\xcb\x9c",
+    'times' => "\xc3\x97",
+    'times;' => "\xc3\x97",
+    'trade;' => "\xe2\x84\xa2",
+    'uArr;' => "\xe2\x87\x91",
+    'uacute' => "\xc3\xba",
+    'uacute;' => "\xc3\xba",
+    'uarr;' => "\xe2\x86\x91",
+    'ucirc' => "\xc3\xbb",
+    'ucirc;' => "\xc3\xbb",
+    'ugrave' => "\xc3\xb9",
+    'ugrave;' => "\xc3\xb9",
+    'uml' => "\xc2\xa8",
+    'uml;' => "\xc2\xa8",
+    'upsih;' => "\xcf\x92",
+    'upsilon;' => "\xcf\x85",
+    'uuml' => "\xc3\xbc",
+    'uuml;' => "\xc3\xbc",
+    'weierp;' => "\xe2\x84\x98",
+    'xi;' => "\xce\xbe",
+    'yacute' => "\xc3\xbd",
+    'yacute;' => "\xc3\xbd",
+    'yen' => "\xc2\xa5",
+    'yen;' => "\xc2\xa5",
+    'yuml' => "\xc3\xbf",
+    'yuml;' => "\xc3\xbf",
+    'zeta;' => "\xce\xb6",
+    'zwj;' => "\xe2\x80\x8d",
+    'zwnj;' => "\xe2\x80\x8c"
+  }
+
+  ENCODINGS = %w[
+      ansi_x3.4-1968
+      iso-ir-6
+      ansi_x3.4-1986
+      iso_646.irv:1991
+      ascii
+      iso646-us
+      us-ascii
+      us
+      ibm367
+      cp367
+      csascii
+      ks_c_5601-1987
+      korean
+      iso-2022-kr
+      csiso2022kr
+      euc-kr
+      iso-2022-jp
+      csiso2022jp
+      iso-2022-jp-2
+      iso-ir-58
+      chinese
+      csiso58gb231280
+      iso_8859-1:1987
+      iso-ir-100
+      iso_8859-1
+      iso-8859-1
+      latin1
+      l1
+      ibm819
+      cp819
+      csisolatin1
+      iso_8859-2:1987
+      iso-ir-101
+      iso_8859-2
+      iso-8859-2
+      latin2
+      l2
+      csisolatin2
+      iso_8859-3:1988
+      iso-ir-109
+      iso_8859-3
+      iso-8859-3
+      latin3
+      l3
+      csisolatin3
+      iso_8859-4:1988
+      iso-ir-110
+      iso_8859-4
+      iso-8859-4
+      latin4
+      l4
+      csisolatin4
+      iso_8859-6:1987
+      iso-ir-127
+      iso_8859-6
+      iso-8859-6
+      ecma-114
+      asmo-708
+      arabic
+      csisolatinarabic
+      iso_8859-7:1987
+      iso-ir-126
+      iso_8859-7
+      iso-8859-7
+      elot_928
+      ecma-118
+      greek
+      greek8
+      csisolatingreek
+      iso_8859-8:1988
+      iso-ir-138
+      iso_8859-8
+      iso-8859-8
+      hebrew
+      csisolatinhebrew
+      iso_8859-5:1988
+      iso-ir-144
+      iso_8859-5
+      iso-8859-5
+      cyrillic
+      csisolatincyrillic
+      iso_8859-9:1989
+      iso-ir-148
+      iso_8859-9
+      iso-8859-9
+      latin5
+      l5
+      csisolatin5
+      iso-8859-10
+      iso-ir-157
+      l6
+      iso_8859-10:1992
+      csisolatin6
+      latin6
+      hp-roman8
+      roman8
+      r8
+      ibm037
+      cp037
+      csibm037
+      ibm424
+      cp424
+      csibm424
+      ibm437
+      cp437
+      437
+      cspc8codepage437
+      ibm500
+      cp500
+      csibm500
+      ibm775
+      cp775
+      cspc775baltic
+      ibm850
+      cp850
+      850
+      cspc850multilingual
+      ibm852
+      cp852
+      852
+      cspcp852
+      ibm855
+      cp855
+      855
+      csibm855
+      ibm857
+      cp857
+      857
+      csibm857
+      ibm860
+      cp860
+      860
+      csibm860
+      ibm861
+      cp861
+      861
+      cp-is
+      csibm861
+      ibm862
+      cp862
+      862
+      cspc862latinhebrew
+      ibm863
+      cp863
+      863
+      csibm863
+      ibm864
+      cp864
+      csibm864
+      ibm865
+      cp865
+      865
+      csibm865
+      ibm866
+      cp866
+      866
+      csibm866
+      ibm869
+      cp869
+      869
+      cp-gr
+      csibm869
+      ibm1026
+      cp1026
+      csibm1026
+      koi8-r
+      cskoi8r
+      koi8-u
+      big5-hkscs
+      ptcp154
+      csptcp154
+      pt154
+      cp154
+      utf-7
+      utf-16be
+      utf-16le
+      utf-16
+      utf-8
+      iso-8859-13
+      iso-8859-14
+      iso-ir-199
+      iso_8859-14:1998
+      iso_8859-14
+      latin8
+      iso-celtic
+      l8
+      iso-8859-15
+      iso_8859-15
+      iso-8859-16
+      iso-ir-226
+      iso_8859-16:2001
+      iso_8859-16
+      latin10
+      l10
+      gbk
+      cp936
+      ms936
+      gb18030
+      shift_jis
+      ms_kanji
+      csshiftjis
+      euc-jp
+      gb2312
+      big5
+      csbig5
+      windows-1250
+      windows-1251
+      windows-1252
+      windows-1253
+      windows-1254
+      windows-1255
+      windows-1256
+      windows-1257
+      windows-1258
+      tis-620
+      hz-gb-2312
+  ]
+
+end
--- a/vendor/plugins/HTML5lib/lib/html5/filters.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/filters.rb
@ -0,0 +1 @@
+require 'html5/filters/optionaltags'
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters/base.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/filters/base.rb
@ -1,7 +1,7 @@
 require 'delegate'
 require 'enumerator'

-module HTML5lib
+module HTML5
  module Filters
    class Base < SimpleDelegator
      include Enumerable
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters/inject_meta_charset.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/filters/inject_meta_charset.rb
@ -1,6 +1,6 @@
-require 'html5lib/filters/base'
+require 'html5/filters/base'

-module HTML5lib
+module HTML5
  module Filters
    class InjectMetaCharset < Base
      def initialize(source, encoding)
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters/optionaltags.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/filters/optionaltags.rb
@ -1,7 +1,7 @@
-require 'html5lib/constants'
-require 'html5lib/filters/base'
+require 'html5/constants'
+require 'html5/filters/base'

-module HTML5lib
+module HTML5
  module Filters

    class OptionalTagFilter < Base
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters/sanitizer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/filters/sanitizer.rb
@ -1,7 +1,7 @@
-require 'html5lib/filters/base'
-require 'html5lib/sanitizer'
+require 'html5/filters/base'
+require 'html5/sanitizer'

-module HTML5lib
+module HTML5
  module Filters
    class HTMLSanitizeFilter < Base
      include HTMLSanitizeModule
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters/whitespace.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/filters/whitespace.rb
@ -1,7 +1,7 @@
-require 'html5lib/constants'
-require 'html5lib/filters/base'
+require 'html5/constants'
+require 'html5/filters/base'

-module HTML5lib
+module HTML5
  module Filters
    class WhitespaceFilter < Base

--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser.rb
@ -1,246 +1,246 @@
-require 'html5lib/constants'
-require 'html5lib/tokenizer'
-require 'html5lib/treebuilders/rexml'
-
-Dir.glob(File.join(File.dirname(__FILE__), 'html5parser', '*_phase.rb')).each do |path|
-  require 'html5lib/html5parser/' + File.basename(path)
-end
-
-module HTML5lib
-
-  # Error in parsed document
-  class ParseError < Exception; end
-  class AssertionError < Exception; end
-
-  # HTML parser. Generates a tree structure from a stream of (possibly malformed) HTML
-  #
-  class HTMLParser
-
-    attr_accessor :phase, :firstStartTag, :innerHTML, :lastPhase, :insertFromTable
-
-    attr_reader :phases, :tokenizer, :tree, :errors
-
-    def self.parse(stream, options = {})
-      encoding = options.delete(:encoding)
-      new(options).parse(stream,encoding)
-    end
-
-    def self.parseFragment(stream, options = {})
-      container = options.delete(:container) || 'div'
-      encoding = options.delete(:encoding)
-      new(options).parseFragment(stream,container,encoding)
-    end
-
-    @@phases = %w( initial rootElement beforeHead inHead afterHead inBody inTable inCaption
-      inColumnGroup inTableBody inRow inCell inSelect afterBody inFrameset afterFrameset trailingEnd )
-
-    # :strict - raise an exception when a parse error is encountered
-    # :tree - a treebuilder class controlling the type of tree that will be
-    # returned. Built in treebuilders can be accessed through
-    # HTML5lib::TreeBuilders[treeType]
-    def initialize(options = {})
-      @strict = false
-      @errors = []
-     
-      @tokenizer =  HTMLTokenizer
-      @tree = TreeBuilders::REXML::TreeBuilder
- 
-      options.each { |name, value| instance_variable_set("@#{name}", value) }
-
-      @tree = @tree.new
-
-      @phases = @@phases.inject({}) do |phases, phase_name|
-        phase_class_name = phase_name.sub(/(.)/) { $1.upcase } + 'Phase'
-        phases[phase_name.to_sym] = HTML5lib.const_get(phase_class_name).new(self, @tree)
-        phases 
-      end
-    end
-
-    def _parse(stream, innerHTML, encoding, container = 'div')
-      @tree.reset
-      @firstStartTag = false
-      @errors = []
-
-      @tokenizer = @tokenizer.class unless Class === @tokenizer
-      @tokenizer = @tokenizer.new(stream, :encoding => encoding,
-        :parseMeta => !innerHTML)
-
-      if innerHTML
-        case @innerHTML = container.downcase
-          when 'title', 'textarea'
-            @tokenizer.contentModelFlag = :RCDATA
-          when 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript'
-            @tokenizer.contentModelFlag = :CDATA
-          when 'plaintext'
-            @tokenizer.contentModelFlag = :PLAINTEXT
-          else
-          # contentModelFlag already is PCDATA
-          #@tokenizer.contentModelFlag = :PCDATA
-        end
-      
-        @phase = @phases[:rootElement]
-        @phase.insertHtmlElement
-        resetInsertionMode
-      else
-        @innerHTML = false
-        @phase = @phases[:initial]
-      end
-
-      # We only seem to have InBodyPhase testcases where the following is
-      # relevant ... need others too
-      @lastPhase = nil
-
-      # XXX This is temporary for the moment so there isn't any other
-      # changes needed for the parser to work with the iterable tokenizer
-      @tokenizer.each do |token|
-        token = normalizeToken(token)
-
-        method = 'process%s' % token[:type]
-
-        case token[:type]
-          when :Characters, :SpaceCharacters, :Comment
-            @phase.send method, token[:data]
-          when :StartTag
-            @phase.send method, token[:name], token[:data]
-          when :EndTag
-            @phase.send method, token[:name]
-          when :Doctype
-            @phase.send method, token[:name], token[:publicId],
-              token[:systemId], token[:correct]
-          else
-            parseError(token[:data])
-        end
-      end
-
-      # When the loop finishes it's EOF
-      @phase.processEOF
-    end
-
-    # Parse a HTML document into a well-formed tree
-    #
-    # stream - a filelike object or string containing the HTML to be parsed
-    #
-    # The optional encoding parameter must be a string that indicates
-    # the encoding.  If specified, that encoding will be used,
-    # regardless of any BOM or later declaration (such as in a meta
-    # element)
-    def parse(stream, encoding=nil)
-      _parse(stream, false, encoding)
-      return @tree.getDocument
-    end
-  
-    # Parse a HTML fragment into a well-formed tree fragment
-    
-    # container - name of the element we're setting the innerHTML property
-    # if set to nil, default to 'div'
-    #
-    # stream - a filelike object or string containing the HTML to be parsed
-    #
-    # The optional encoding parameter must be a string that indicates
-    # the encoding.  If specified, that encoding will be used,
-    # regardless of any BOM or later declaration (such as in a meta
-    # element)
-    def parseFragment(stream, container='div', encoding=nil)
-      _parse(stream, true, encoding, container)
-      return @tree.getFragment
-    end
-
-    def parseError(data = 'XXX ERROR MESSAGE NEEDED')
-      # XXX The idea is to make data mandatory.
-      @errors.push([@tokenizer.stream.position, data])
-      raise ParseError if @strict
-    end
-
-    # HTML5 specific normalizations to the token stream
-    def normalizeToken(token)
-
-      if token[:type] == :EmptyTag
-        # When a solidus (/) is encountered within a tag name what happens
-        # depends on whether the current tag name matches that of a void
-        # element.  If it matches a void element atheists did the wrong
-        # thing and if it doesn't it's wrong for everyone.
-
-        unless VOID_ELEMENTS.include?(token[:name])
-          parseError(_('Solidus (/) incorrectly placed in tag.'))
-        end
-
-        token[:type] = :StartTag
-      end
-
-      if token[:type] == :StartTag
-        token[:name] = token[:name].tr(ASCII_UPPERCASE,ASCII_LOWERCASE)
-
-        # We need to remove the duplicate attributes and convert attributes
-        # to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
-
-        unless token[:data].empty?
-          data = token[:data].reverse.map { |attr, value| [attr.tr(ASCII_UPPERCASE, ASCII_LOWERCASE), value] }
-          token[:data] = Hash[*data.flatten]
-        end
-
-      elsif token[:type] == :EndTag
-        parseError(_('End tag contains unexpected attributes.')) unless token[:data].empty?
-        token[:name] = token[:name].downcase
-      end
-
-      return token
-    end
-
-    @@new_modes = {
-      'select' => :inSelect,
-      'td' => :inCell,
-      'th' => :inCell,
-      'tr' => :inRow,
-      'tbody' => :inTableBody,
-      'thead' => :inTableBody,
-      'tfoot' => :inTableBody,
-      'caption' => :inCaption,
-      'colgroup' => :inColumnGroup,
-      'table' => :inTable,
-      'head' => :inBody,
-      'body' => :inBody,
-      'frameset' => :inFrameset
-    }
-
-    def resetInsertionMode
-      # The name of this method is mostly historical. (It's also used in the
-      # specification.)
-      last = false
-
-      @tree.openElements.reverse.each do |node|
-        nodeName = node.name
-
-        if node == @tree.openElements[0]
-          last = true
-          unless ['td', 'th'].include?(nodeName)
-            # XXX
-            # assert @innerHTML
-            nodeName = @innerHTML
-          end
-        end
-
-        # Check for conditions that should only happen in the innerHTML
-        # case
-        if ['select', 'colgroup', 'head', 'frameset'].include?(nodeName)
-          # XXX
-          # assert @innerHTML
-        end
-
-        if @@new_modes.has_key?(nodeName)
-          @phase = @phases[@@new_modes[nodeName]]
-        elsif nodeName == 'html'
-          @phase = @phases[@tree.headPointer.nil?? :beforeHead : :afterHead]
-        elsif last
-          @phase = @phases[:inBody]
-        else
-          next
-        end
-
-        break
-      end
-    end
-
-    def _(string); string; end
-  end
-
-end
+require 'html5/constants'
+require 'html5/tokenizer'
+require 'html5/treebuilders/rexml'
+
+Dir.glob(File.join(File.dirname(__FILE__), 'html5parser', '*_phase.rb')).each do |path|
+  require 'html5/html5parser/' + File.basename(path)
+end
+
+module HTML5
+
+  # Error in parsed document
+  class ParseError < Exception; end
+  class AssertionError < Exception; end
+
+  # HTML parser. Generates a tree structure from a stream of (possibly malformed) HTML
+  #
+  class HTMLParser
+
+    attr_accessor :phase, :firstStartTag, :innerHTML, :lastPhase, :insertFromTable
+
+    attr_reader :phases, :tokenizer, :tree, :errors
+
+    def self.parse(stream, options = {})
+      encoding = options.delete(:encoding)
+      new(options).parse(stream,encoding)
+    end
+
+    def self.parseFragment(stream, options = {})
+      container = options.delete(:container) || 'div'
+      encoding = options.delete(:encoding)
+      new(options).parseFragment(stream,container,encoding)
+    end
+
+    @@phases = %w( initial rootElement beforeHead inHead afterHead inBody inTable inCaption
+      inColumnGroup inTableBody inRow inCell inSelect afterBody inFrameset afterFrameset trailingEnd )
+
+    # :strict - raise an exception when a parse error is encountered
+    # :tree - a treebuilder class controlling the type of tree that will be
+    # returned. Built in treebuilders can be accessed through
+    # HTML5::TreeBuilders[treeType]
+    def initialize(options = {})
+      @strict = false
+      @errors = []
+     
+      @tokenizer =  HTMLTokenizer
+      @tree = TreeBuilders::REXML::TreeBuilder
+ 
+      options.each { |name, value| instance_variable_set("@#{name}", value) }
+
+      @tree = @tree.new
+
+      @phases = @@phases.inject({}) do |phases, phase_name|
+        phase_class_name = phase_name.sub(/(.)/) { $1.upcase } + 'Phase'
+        phases[phase_name.to_sym] = HTML5.const_get(phase_class_name).new(self, @tree)
+        phases 
+      end
+    end
+
+    def _parse(stream, innerHTML, encoding, container = 'div')
+      @tree.reset
+      @firstStartTag = false
+      @errors = []
+
+      @tokenizer = @tokenizer.class unless Class === @tokenizer
+      @tokenizer = @tokenizer.new(stream, :encoding => encoding,
+        :parseMeta => !innerHTML)
+
+      if innerHTML
+        case @innerHTML = container.downcase
+          when 'title', 'textarea'
+            @tokenizer.contentModelFlag = :RCDATA
+          when 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript'
+            @tokenizer.contentModelFlag = :CDATA
+          when 'plaintext'
+            @tokenizer.contentModelFlag = :PLAINTEXT
+          else
+          # contentModelFlag already is PCDATA
+          #@tokenizer.contentModelFlag = :PCDATA
+        end
+      
+        @phase = @phases[:rootElement]
+        @phase.insertHtmlElement
+        resetInsertionMode
+      else
+        @innerHTML = false
+        @phase = @phases[:initial]
+      end
+
+      # We only seem to have InBodyPhase testcases where the following is
+      # relevant ... need others too
+      @lastPhase = nil
+
+      # XXX This is temporary for the moment so there isn't any other
+      # changes needed for the parser to work with the iterable tokenizer
+      @tokenizer.each do |token|
+        token = normalizeToken(token)
+
+        method = 'process%s' % token[:type]
+
+        case token[:type]
+          when :Characters, :SpaceCharacters, :Comment
+            @phase.send method, token[:data]
+          when :StartTag
+            @phase.send method, token[:name], token[:data]
+          when :EndTag
+            @phase.send method, token[:name]
+          when :Doctype
+            @phase.send method, token[:name], token[:publicId],
+              token[:systemId], token[:correct]
+          else
+            parseError(token[:data])
+        end
+      end
+
+      # When the loop finishes it's EOF
+      @phase.processEOF
+    end
+
+    # Parse a HTML document into a well-formed tree
+    #
+    # stream - a filelike object or string containing the HTML to be parsed
+    #
+    # The optional encoding parameter must be a string that indicates
+    # the encoding.  If specified, that encoding will be used,
+    # regardless of any BOM or later declaration (such as in a meta
+    # element)
+    def parse(stream, encoding=nil)
+      _parse(stream, false, encoding)
+      return @tree.getDocument
+    end
+  
+    # Parse a HTML fragment into a well-formed tree fragment
+    
+    # container - name of the element we're setting the innerHTML property
+    # if set to nil, default to 'div'
+    #
+    # stream - a filelike object or string containing the HTML to be parsed
+    #
+    # The optional encoding parameter must be a string that indicates
+    # the encoding.  If specified, that encoding will be used,
+    # regardless of any BOM or later declaration (such as in a meta
+    # element)
+    def parseFragment(stream, container='div', encoding=nil)
+      _parse(stream, true, encoding, container)
+      return @tree.getFragment
+    end
+
+    def parseError(data = 'XXX ERROR MESSAGE NEEDED')
+      # XXX The idea is to make data mandatory.
+      @errors.push([@tokenizer.stream.position, data])
+      raise ParseError if @strict
+    end
+
+    # HTML5 specific normalizations to the token stream
+    def normalizeToken(token)
+
+      if token[:type] == :EmptyTag
+        # When a solidus (/) is encountered within a tag name what happens
+        # depends on whether the current tag name matches that of a void
+        # element.  If it matches a void element atheists did the wrong
+        # thing and if it doesn't it's wrong for everyone.
+
+        unless VOID_ELEMENTS.include?(token[:name])
+          parseError(_('Solidus (/) incorrectly placed in tag.'))
+        end
+
+        token[:type] = :StartTag
+      end
+
+      if token[:type] == :StartTag
+        token[:name] = token[:name].tr(ASCII_UPPERCASE,ASCII_LOWERCASE)
+
+        # We need to remove the duplicate attributes and convert attributes
+        # to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
+
+        unless token[:data].empty?
+          data = token[:data].reverse.map { |attr, value| [attr.tr(ASCII_UPPERCASE, ASCII_LOWERCASE), value] }
+          token[:data] = Hash[*data.flatten]
+        end
+
+      elsif token[:type] == :EndTag
+        parseError(_('End tag contains unexpected attributes.')) unless token[:data].empty?
+        token[:name] = token[:name].downcase
+      end
+
+      return token
+    end
+
+    @@new_modes = {
+      'select' => :inSelect,
+      'td' => :inCell,
+      'th' => :inCell,
+      'tr' => :inRow,
+      'tbody' => :inTableBody,
+      'thead' => :inTableBody,
+      'tfoot' => :inTableBody,
+      'caption' => :inCaption,
+      'colgroup' => :inColumnGroup,
+      'table' => :inTable,
+      'head' => :inBody,
+      'body' => :inBody,
+      'frameset' => :inFrameset
+    }
+
+    def resetInsertionMode
+      # The name of this method is mostly historical. (It's also used in the
+      # specification.)
+      last = false
+
+      @tree.openElements.reverse.each do |node|
+        nodeName = node.name
+
+        if node == @tree.openElements[0]
+          last = true
+          unless ['td', 'th'].include?(nodeName)
+            # XXX
+            # assert @innerHTML
+            nodeName = @innerHTML
+          end
+        end
+
+        # Check for conditions that should only happen in the innerHTML
+        # case
+        if ['select', 'colgroup', 'head', 'frameset'].include?(nodeName)
+          # XXX
+          # assert @innerHTML
+        end
+
+        if @@new_modes.has_key?(nodeName)
+          @phase = @phases[@@new_modes[nodeName]]
+        elsif nodeName == 'html'
+          @phase = @phases[@tree.headPointer.nil?? :beforeHead : :afterHead]
+        elsif last
+          @phase = @phases[:inBody]
+        else
+          next
+        end
+
+        break
+      end
+    end
+
+    def _(string); string; end
+  end
+
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_body_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_body_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class AfterBodyPhase < Phase

    handle_end 'html'
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_frameset_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_frameset_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class AfterFramesetPhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#after3
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_head_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_head_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class AfterHeadPhase < Phase
  
    handle_start 'html', 'body', 'frameset', %w( base link meta script style title ) => 'FromHead'
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/before_head_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/before_head_phase.rb
@ -1,11 +1,11 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class BeforeHeadPhase < Phase

    handle_start 'html', 'head'

-    handle_end %w( html head body br ) => 'ImplyHead'
+    handle_end %w( html head body br p ) => 'ImplyHead'

    def processEOF
      startTagHead('head', {})
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_body_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_body_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InBodyPhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#in-body
@ -112,7 +112,7 @@ module HTML5lib

    def startTagForm(name, attributes)
      if @tree.formPointer
-        @parser.parseError('Unexpected start tag (form). Ignored.')
+        @parser.parseError(_('Unexpected start tag (form). Ignored.'))
      else
        endTagP('p') if in_scope?('p')
        @tree.insertElement(name, attributes)
@ -129,9 +129,9 @@ module HTML5lib
        if stopName.include?(node.name)
          poppedNodes = (0..i).collect { @tree.openElements.pop }
          if i >= 1
-            @parser.parseError("Missing end tag%s (%s)" % [
+            @parser.parseError(_("Missing end tag%s (%s)" % [
              (i>1 ? 's' : ''),
-              poppedNodes.reverse.map {|item| item.name}.join(', ')])
+              poppedNodes.reverse.map {|item| item.name}.join(', ')]))
          end
          break
        end
@ -251,7 +251,7 @@ module HTML5lib
    end

    def startTagIsindex(name, attributes)
-      @parser.parseError("Unexpected start tag isindex. Don't use it!")
+      @parser.parseError(_("Unexpected start tag isindex. Don't use it!"))
      return if @tree.formPointer
      processStartTag('form', {})
      processStartTag('hr', {})
@ -311,8 +311,13 @@ module HTML5lib

    def endTagP(name)
      @tree.generateImpliedEndTags('p') if in_scope?('p')
-      @parser.parseError('Unexpected end tag (p).') unless @tree.openElements[-1].name == 'p'
-      @tree.openElements.pop while in_scope?('p')
+      @parser.parseError(_('Unexpected end tag (p).')) unless @tree.openElements[-1].name == 'p'
+      if in_scope?('p')
+        @tree.openElements.pop while in_scope?('p')
+      else
+        startTagCloseP('p', {})
+        endTagP('p')
+      end
    end

    def endTagBody(name)
@ -342,7 +347,7 @@ module HTML5lib
      @tree.generateImpliedEndTags if in_scope?(name)

      unless @tree.openElements[-1].name == name
-        @parser.parseError(("End tag (#{name}) seen too early. Expected other end tag."))
+        @parser.parseError(_("End tag (#{name}) seen too early. Expected other end tag."))
      end

      if in_scope?(name)
@ -351,7 +356,14 @@ module HTML5lib
    end

    def endTagForm(name)
-      endTagBlock(name)
+      if in_scope?(name)
+        @tree.generateImpliedEndTags
+      end
+      if @tree.openElements[-1].name != name
+        @parser.parseError(_("End tag (form) seen too early. Ignored."))
+      else
+        @tree.openElements.pop
+      end
      @tree.formPointer = nil
    end

@ -361,7 +373,7 @@ module HTML5lib
        @tree.generateImpliedEndTags(name)

        unless @tree.openElements[-1].name == name
-          @parser.parseError(("End tag (#{name}) seen too early. Expected other end tag."))
+          @parser.parseError(_("End tag (#{name}) seen too early. Expected other end tag."))
        end
      end

@ -377,7 +389,7 @@ module HTML5lib
      end

      unless @tree.openElements[-1].name == name
-        @parser.parseError(("Unexpected end tag (#{name}). Expected other end tag."))
+        @parser.parseError(_("Unexpected end tag (#{name}). Expected other end tag."))
      end

      HEADING_ELEMENTS.each do |element|
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_caption_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_caption_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InCaptionPhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#in-caption
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_cell_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_cell_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InCellPhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#in-cell
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_column_group_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_column_group_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InColumnGroupPhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#in-column
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_frameset_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_frameset_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InFramesetPhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_head_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_head_phase.rb
@ -1,12 +1,12 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InHeadPhase < Phase

    handle_start 'html', 'head', 'title', 'style', 'script', %w( base link meta )

    handle_end 'head'
-    handle_end %w( html body br ) => 'ImplyAfterHead'
+    handle_end %w( html body br p ) => 'ImplyAfterHead'
    handle_end %w( title style script )

    def processEOF
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_row_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_row_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InRowPhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#in-row
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_select_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_select_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InSelectPhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#in-select
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_body_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_body_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InTableBodyPhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#in-table0
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InTablePhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#in-table
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/initial_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/initial_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InitialPhase < Phase

    # This phase deals with error handling as well which is currently not
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/phase.rb
@ -1,4 +1,4 @@
-module HTML5lib
+module HTML5
  # Base class for helper objects that implement each phase of processing.
  #
  # Handler methods should be in the following order (they can be omitted):
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/root_element_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/root_element_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class RootElementPhase < Phase

    def processEOF
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/trailing_end_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/trailing_end_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class TrailingEndPhase < Phase

    def processEOF
--- a/vendor/plugins/HTML5lib/lib/html5lib/inputstream.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/inputstream.rb
@ -1,7 +1,7 @@
 require 'stringio'
-require 'html5lib/constants'
+require 'html5/constants'

-module HTML5lib
+module HTML5

  # Provides a unicode stream of characters to the HTMLTokenizer.

@ -10,7 +10,7 @@ module HTML5lib

  class HTMLInputStream

-    attr_accessor :queue, :char_encoding
+    attr_accessor :queue, :char_encoding, :errors

    # Initialises the HTMLInputStream.
    # 
@ -40,25 +40,31 @@ module HTML5lib
      #Number of bytes to use when looking for a meta element with
      #encoding information
      @NUM_BYTES_META = 512
+      #Number of bytes to use when using detecting encoding using chardet
+      @NUM_BYTES_CHARDET = 256
+      #Number of bytes to use when reading content
+      @NUM_BYTES_BUFFER = 1024
+
      #Encoding to use if no other information can be found
      @DEFAULT_ENCODING = 'windows-1252'
    
      #Detect encoding iff no explicit "transport level" encoding is supplied
-      if @encoding.nil? or not HTML5lib.is_valid_encoding(@encoding)
+      if @encoding.nil? or not HTML5.is_valid_encoding(@encoding)
        @char_encoding = detect_encoding
      else
        @char_encoding = @encoding
      end

      # Read bytes from stream decoding them into Unicode
-      uString = @raw_stream.read
+      @buffer = @raw_stream.read(@NUM_BYTES_BUFFER) || ''
      if @char_encoding == 'windows-1252'
        @win1252 = true
      elsif @char_encoding != 'utf-8'
        begin
          require 'iconv'
          begin
-            uString = Iconv.iconv('utf-8', @char_encoding, uString).first
+            @buffer << @raw_stream.read unless @raw_stream.eof?
+            @buffer = Iconv.iconv('utf-8', @char_encoding, @buffer).first
          rescue
            @win1252 = true
          end
@ -67,10 +73,8 @@ module HTML5lib
        end
      end

-      # Convert the unicode string into a list to be used as the data stream
-      @data_stream = uString
-
      @queue = []
+      @errors = []

      # Reset position in the list to read from
      @tell = 0
@ -109,9 +113,22 @@ module HTML5lib
        begin
          require 'rubygems'
          require 'UniversalDetector' # gem install chardet
-          buffer = @raw_stream.read
-          encoding = UniversalDetector::chardet(buffer)['encoding']
-          seek(buffer, 0)
+          buffers = []
+          detector = UniversalDetector::Detector.instance
+          detector.reset
+          until @raw_stream.eof?
+            buffer = @raw_stream.read(@NUM_BYTES_CHARDET)
+            break if !buffer or buffer.empty?
+            buffers << buffer
+            detector.feed(buffer)
+            break if detector.instance_eval {@done}
+            detector.instance_eval {
+              @_mLastChar = @_mLastChar.chr if Fixnum === @_mLastChar
+            }
+          end
+          detector.close
+          encoding = detector.result['encoding']
+          seek(buffers*'', 0)
        rescue LoadError
        end
      end
@ -242,14 +259,20 @@ module HTML5lib
      unless @queue.empty?
        return @queue.shift
      else
-        c = @data_stream[@tell]
+        if @tell + 3 > @buffer.length and !@raw_stream.eof?
+          # read next block
+          @buffer = @buffer[@tell .. -1] + @raw_stream.read(@NUM_BYTES_BUFFER)
+          @tell = 0
+        end
+
+        c = @buffer[@tell]
        @tell += 1

        case c
        when 0x01 .. 0x7F
          if c == 0x0D
            # normalize newlines
-            @tell += 1 if @data_stream[@tell] == 0x0A
+            @tell += 1 if @buffer[@tell] == 0x0A
            c = 0x0A
          end

@ -276,7 +299,7 @@ module HTML5lib
        when 0xC0 .. 0xFF
          if @win1252
            "\xC3" + (c-64).chr # convert to utf-8
-          elsif @data_stream[@tell-1 .. -1] =~ /^
+          elsif @buffer[@tell-1 .. @tell+3] =~ /^
                ( [\xC2-\xDF][\x80-\xBF]             # non-overlong 2-byte
                |  \xE0[\xA0-\xBF][\x80-\xBF]        # excluding overlongs
                | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}  # straight 3-byte
@ -292,6 +315,8 @@ module HTML5lib
          end

        when 0x00
+          @errors.push('null character found in input stream, ' +
+            'replaced with U+FFFD')
          [0xFFFD].pack('U') # null characters are invalid

        else
@ -317,6 +342,10 @@ module HTML5lib
      @queue.insert(0, c) unless c == :EOF
      return char_stack.join('')
    end
+
+    def unget(characters)
+      @queue.unshift(*characters.to_a) unless characters == :EOF
+    end
  end

  # String-like object with an assosiated position and various extra methods
@ -433,14 +462,14 @@ module HTML5lib
        
        if attr[0] == 'charset'
          tentative_encoding = attr[1]
-          if HTML5lib.is_valid_encoding(tentative_encoding)
+          if HTML5.is_valid_encoding(tentative_encoding)
            @encoding = tentative_encoding  
            return false
          end
        elsif attr[0] == 'content'
          content_parser = ContentAttrParser.new(EncodingBytes.new(attr[1]))
          tentative_encoding = content_parser.parse
-          if HTML5lib.is_valid_encoding(tentative_encoding)
+          if HTML5.is_valid_encoding(tentative_encoding)
            @encoding = tentative_encoding
            return false
          end
--- a/vendor/plugins/HTML5lib/lib/html5lib/liberalxmlparser.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/liberalxmlparser.rb
@ -11,10 +11,10 @@
 # 
 # @@TODO:
 # * Selectively lowercase only XHTML, but not foreign markup
-require 'html5lib/html5parser'
-require 'html5lib/constants'
+require 'html5/html5parser'
+require 'html5/constants'

-module HTML5lib
+module HTML5

  # liberal XML parser
  class XMLParser < HTMLParser
@ -25,25 +25,35 @@ module HTML5lib
    end

    def normalizeToken(token)
-      if token[:type] == :StartTag or token[:type] == :EmptyTag
+      case token[:type]
+      when :StartTag, :EmptyTag
        # We need to remove the duplicate attributes and convert attributes
-        # to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
+        # to a Hash so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}

        token[:data] = Hash[*token[:data].reverse.flatten]

        # For EmptyTags, process both a Start and an End tag
        if token[:type] == :EmptyTag
+          save = @tokenizer.contentModelFlag
          @phase.processStartTag(token[:name], token[:data])
+          @tokenizer.contentModelFlag = save
          token[:data] = {}
          token[:type] = :EndTag
        end

-      elsif token[:type] == :EndTag
+      when :Characters
+        # un-escape RCDATA_ELEMENTS (e.g. style, script)
+        if @tokenizer.contentModelFlag == :CDATA
+          token[:data] = token[:data].
+            gsub('&lt;','<').gsub('&gt;','>').gsub('&amp;','&')
+        end
+
+      when :EndTag
        if token[:data]
           parseError(_("End tag contains unexpected attributes."))
        end

-      elsif token[:type] == :Comment
+      when :Comment
        # Rescue CDATA from the comments
        if token[:data][0..6] == "[CDATA[" and token[:data][-2..-1] == "]]"
          token[:type] = :Characters
--- a/vendor/plugins/HTML5lib/lib/html5lib/sanitizer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/sanitizer.rb
@ -1,6 +1,7 @@
 require 'cgi'
+require 'html5/tokenizer'

-module HTML5lib
+module HTML5

 # This module provides sanitization of XHTML+MathML+SVG
 # and of inline style attributes.
--- a/vendor/plugins/HTML5lib/lib/html5/serializer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/serializer.rb
@ -0,0 +1,2 @@
+require 'html5/serializer/htmlserializer'
+require 'html5/serializer/xhtmlserializer'
--- a/vendor/plugins/HTML5lib/lib/html5lib/serializer/htmlserializer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/serializer/htmlserializer.rb
@ -1,6 +1,6 @@
-require 'html5lib/constants'
+require 'html5/constants'

-module HTML5lib
+module HTML5

  class HTMLSerializer

@ -21,6 +21,7 @@ module HTML5lib
      @use_trailing_solidus = false
      @space_before_trailing_solidus = true
      @escape_lt_in_attrs = false
+      @escape_rcdata = false

      @omit_optional_tags = true
      @sanitize = false
@ -43,22 +44,22 @@ module HTML5lib
      @errors = []

      if encoding and @inject_meta_charset
-        require 'html5lib/filters/inject_meta_charset'
+        require 'html5/filters/inject_meta_charset'
        treewalker = Filters::InjectMetaCharset.new(treewalker, encoding)
      end

      if @strip_whitespace
-        require 'html5lib/filters/whitespace'
+        require 'html5/filters/whitespace'
        treewalker = Filters::WhitespaceFilter.new(treewalker)
      end

      if @sanitize
-        require 'html5lib/filters/sanitizer'
+        require 'html5/filters/sanitizer'
        treewalker = Filters::HTMLSanitizeFilter.new(treewalker)
      end

      if @omit_optional_tags
-        require 'html5lib/filters/optionaltags'
+        require 'html5/filters/optionaltags'
        treewalker = Filters::OptionalTagFilter.new(treewalker)
      end

@ -81,7 +82,7 @@ module HTML5lib

        elsif [:StartTag, :EmptyTag].include? type
          name = token[:name]
-          if RCDATA_ELEMENTS.include?(name)
+          if RCDATA_ELEMENTS.include?(name) and not @escape_rcdata
            in_cdata = true
          elsif in_cdata
            serializeError(_("Unexpected child element of a CDATA element"))
--- a/vendor/plugins/HTML5lib/lib/html5lib/serializer/xhtmlserializer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/serializer/xhtmlserializer.rb
@ -1,6 +1,6 @@
-require 'html5lib/serializer/htmlserializer'
+require 'html5/serializer/htmlserializer'

-module HTML5lib
+module HTML5

  class XHTMLSerializer < HTMLSerializer
    DEFAULTS = {
@ -8,7 +8,8 @@ module HTML5lib
      :minimize_boolean_attributes => false,
      :use_trailing_solidus => true,
      :escape_lt_in_attrs => true,
-      :omit_optional_tags => false
+      :omit_optional_tags => false,
+      :escape_rcdata => true
    }

    def initialize(options={})
--- a/vendor/plugins/HTML5lib/lib/html5lib/tokenizer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/tokenizer.rb
@ -1,7 +1,7 @@
-require 'html5lib/constants'
-require 'html5lib/inputstream'
+require 'html5/constants'
+require 'html5/inputstream'

-module HTML5lib
+module HTML5

  # This class takes care of tokenizing HTML.
  #
@ -84,9 +84,9 @@ module HTML5lib
      # Start processing. When EOF is reached @state will return false
      # instead of true and the loop will terminate.
      while send @state
-        while not @tokenQueue.empty?
-          yield @tokenQueue.shift
-        end
+        yield :type => :ParseError, :data => @stream.errors.shift until
+          @stream.errors.empty?
+        yield @tokenQueue.shift until @tokenQueue.empty?
      end
    end

@ -109,7 +109,7 @@ module HTML5lib

      # The character we just consumed need to be put back on the stack so it
      # doesn't get lost...
-      @stream.queue.push(data)
+      @stream.unget(data)
    end

    # This function returns either U+FFFD or the character based on the
@ -128,7 +128,6 @@ module HTML5lib
        radix = 16
      end

-      char = [0xFFFD].pack('U')
      charStack = []

      # Consume all the characters that are in range while making sure we
@ -142,17 +141,25 @@ module HTML5lib
      # Convert the set of characters consumed to an int.
      charAsInt = charStack.join('').to_i(radix)

-      # If the integer is between 127 and 160 (so 128 and bigger and 159 and
-      # smaller) we need to do the "windows trick".
-      if (127...160).include? charAsInt
+      if charAsInt == 13
+        @tokenQueue.push({:type => :ParseError, :data =>
+          _("Incorrect CR newline entity. Replaced with LF.")})
+        charAsInt = 10
+      elsif (128..159).include? charAsInt
+        # If the integer is between 127 and 160 (so 128 and bigger and 159
+        # and smaller) we need to do the "windows trick".
        @tokenQueue.push({:type => :ParseError, :data =>
          _("Entity used with illegal number (windows-1252 reference).")})

        charAsInt = ENTITIES_WINDOWS1252[charAsInt - 128]
      end

-      if charAsInt > 0 and charAsInt <= 1114111
+      if 0 < charAsInt and charAsInt <= 1114111 and not (55296 <= charAsInt and charAsInt <= 57343)
        char = [charAsInt].pack('U')
+      else
+        char = [0xFFFD].pack('U')
+        @tokenQueue.push({:type => :ParseError, :data =>
+          _("Numeric entity represents an illegal codepoint.")})
      end

      # Discard the ; if present. Otherwise, put it back on the queue and
@ -160,18 +167,18 @@ module HTML5lib
      if c != ";"
        @tokenQueue.push({:type => :ParseError, :data =>
          _("Numeric entity didn't end with ';'.")})
-        @stream.queue.push(c)
+        @stream.unget(c)
      end

      return char
    end

-    def consumeEntity
+    def consumeEntity(from_attribute=false)
      char = nil
      charStack = [@stream.char]
      if SPACE_CHARACTERS.include?(charStack[0]) or 
        [:EOF, '<', '&'].include?(charStack[0])
-        @stream.queue+= charStack
+        @stream.unget(charStack)
      elsif charStack[0] == "#"
        # We might have a number entity here.
        charStack += [@stream.char, @stream.char]
@ -179,22 +186,22 @@ module HTML5lib
          # If we reach the end of the file put everything up to :EOF
          # back in the queue
          charStack = charStack[0...charStack.index(:EOF)]
-          @stream.queue+= charStack
+          @stream.unget(charStack)
          @tokenQueue.push({:type => :ParseError, :data =>
            _("Numeric entity expected. Got end of file instead.")})
        else
          if charStack[1].downcase == "x" \
            and HEX_DIGITS.include? charStack[2]
            # Hexadecimal entity detected.
-            @stream.queue.push(charStack[2])
+            @stream.unget(charStack[2])
            char = consumeNumberEntity(true)
          elsif DIGITS.include? charStack[1]
            # Decimal entity detected.
-            @stream.queue += charStack[1..-1]
+            @stream.unget(charStack[1..-1])
            char = consumeNumberEntity(false)
          else
            # No number entity detected.
-            @stream.queue += charStack
+            @stream.unget(charStack)
            @tokenQueue.push({:type => :ParseError, :data =>
              _("Numeric entity expected but none found.")})
          end
@ -209,6 +216,8 @@ module HTML5lib
        filteredEntityList.reject! {|e| e[0].chr != charStack[0]}
        entityName = nil

+        # Try to find the longest entity the string will match to take care
+        # of &noti for instance.
        while charStack[-1] != :EOF
          name = charStack.join('')
          if filteredEntityList.any? {|e| e[0...name.length] == name}
@ -220,6 +229,7 @@ module HTML5lib

          if ENTITIES.include? name
            entityName = name
+            break if entityName[-1] == ';'
          end
        end

@ -228,15 +238,23 @@ module HTML5lib

          # Check whether or not the last character returned can be
          # discarded or needs to be put back.
-          if not charStack[-1] == ";"
+          if entityName[-1] != ?;
            @tokenQueue.push({:type => :ParseError, :data =>
              _("Named entity didn't end with ';'.")})
-            @stream.queue += charStack[entityName.length..-1]
+          end
+
+          if charStack[-1] != ";" and from_attribute and
+             (ASCII_LETTERS.include?(charStack[entityName.length]) or
+              DIGITS.include?(charStack[entityName.length]))
+            @stream.unget(charStack)
+            char = '&'
+          else
+            @stream.unget(charStack[entityName.length..-1])
          end
        else
          @tokenQueue.push({:type => :ParseError, :data =>
            _("Named entity expected. Got none.")})
-          @stream.queue += charStack
+          @stream.unget(charStack)
        end
      end
      return char
@ -244,7 +262,7 @@ module HTML5lib

    # This method replaces the need for "entityInAttributeValueState".
    def processEntityInAttribute
-      entity = consumeEntity
+      entity = consumeEntity(true)
      if entity
        @currentToken[:data][-1][1] += entity
      else
@ -274,20 +292,23 @@ module HTML5lib
        @lastFourChars.shift if @lastFourChars.length > 4
      end

-      if data == "&" and [:PCDATA,:RCDATA].include?(@contentModelFlag)
-        @state = @states[:entityData]
+      if data == "&" and !@escapeFlag and
+        [:PCDATA,:RCDATA].include?(@contentModelFlag)
+          @state = @states[:entityData]

-      elsif data == "-" and [:CDATA,:RCDATA].include?(@contentModelFlag) and
-        @escapeFlag == false and @lastFourChars.join('') == "<!--"
+      elsif data == "-" and !@escapeFlag and
+        [:CDATA,:RCDATA].include?(@contentModelFlag) and
+        @lastFourChars.join('') == "<!--"
          @escapeFlag = true
          @tokenQueue.push({:type => :Characters, :data => data})

-      elsif data == "<" and @escapeFlag == false and
+      elsif data == "<" and !@escapeFlag and
        [:PCDATA,:CDATA,:RCDATA].include?(@contentModelFlag)
          @state = @states[:tagOpen]

-      elsif data == ">" and [:CDATA,:RCDATA].include?(@contentModelFlag) and
-        @escapeFlag == true and @lastFourChars[1..-1].join('') == "-->"
+      elsif data == ">" and @escapeFlag and 
+        [:CDATA,:RCDATA].include?(@contentModelFlag) and
+        @lastFourChars[1..-1].join('') == "-->"
          @escapeFlag = false
          @tokenQueue.push({:type => :Characters, :data => data})

@ -345,14 +366,14 @@ module HTML5lib
          @tokenQueue.push({:type => :ParseError, :data =>
            _("Expected tag name. Got '?' instead (HTML doesn't " +
            "support processing instructions).")})
-          @stream.queue.push(data)
+          @stream.unget(data)
          @state = @states[:bogusComment]
        else
          # XXX
          @tokenQueue.push({:type => :ParseError, :data =>
            _("Expected tag name. Got something else instead")})
          @tokenQueue.push({:type => :Characters, :data => "<"})
-          @stream.queue.push(data)
+          @stream.unget(data)
          @state = @states[:data]
        end
      else
@ -363,7 +384,7 @@ module HTML5lib
          @state = @states[:closeTagOpen]
        else
          @tokenQueue.push({:type => :Characters, :data => "<"})
-          @stream.queue.insert(0, data)
+          @stream.unget(data)
          @state = @states[:data]
        end
      end
@ -388,7 +409,7 @@ module HTML5lib

          # Since this is just for checking. We put the characters back on
          # the stack.
-          @stream.queue += charStack
+          @stream.unget(charStack)
        end

        if @currentToken and
@ -426,7 +447,7 @@ module HTML5lib
        # XXX data can be _'_...
        @tokenQueue.push({:type => :ParseError, :data =>
          _("Expected closing tag. Unexpected character '#{data}' found.")})
-        @stream.queue.push(data)
+        @stream.unget(data)
        @state = @states[:bogusComment]
      end

@ -556,7 +577,7 @@ module HTML5lib
        @state = @states[:attributeValueDoubleQuoted]
      elsif data == "&"
        @state = @states[:attributeValueUnQuoted]
-        @stream.queue.push(data);
+        @stream.unget(data);
      elsif data == "'"
        @state = @states[:attributeValueSingleQuoted]
      elsif data == ">"
@ -656,7 +677,7 @@ module HTML5lib
        else
          @tokenQueue.push({:type => :ParseError, :data =>
            _("Expected '--' or 'DOCTYPE'. Not found.")})
-          @stream.queue += charStack
+          @stream.unget(charStack)
          @state = @states[:bogusComment]
        end
      end
@ -771,7 +792,7 @@ module HTML5lib
      else
        @tokenQueue.push({:type => :ParseError, :data =>
          _("No space after literal string 'DOCTYPE'.")})
-        @stream.queue.push(data)
+        @stream.unget(data)
        @state = @states[:beforeDoctypeName]
      end
      return true
@ -827,7 +848,7 @@ module HTML5lib
        @state = @states[:data]
      elsif data == :EOF
        @currentToken[:data] = true
-        @stream.queue.push(data)
+        @stream.unget(data)
        @tokenQueue.push({:type => :ParseError, :data =>
          _("Unexpected end of file in DOCTYPE.")})
        @currentToken[:correct] = false
@ -842,7 +863,7 @@ module HTML5lib
        elsif token == "system"
          @state = @states[:beforeDoctypeSystemIdentifier]
        else
-          @stream.queue += charStack
+          @stream.unget(charStack)
          @tokenQueue.push({:type => :ParseError, :data =>
            _("Expected 'public' or 'system'. Got '#{charStack.join('')}'")})
          @state = @states[:bogusDoctype]
@ -1028,7 +1049,7 @@ module HTML5lib
        @state = @states[:data]
      elsif data == :EOF
        # XXX EMIT
-        @stream.queue.push(data)
+        @stream.unget(data)
        @tokenQueue.push({:type => :ParseError, :data =>
          _("Unexpected end of file in bogus doctype.")})
        @currentToken[:correct] = false
--- a/vendor/plugins/HTML5lib/lib/html5lib/treebuilders.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treebuilders.rb
@ -1,17 +1,17 @@
-module HTML5lib
+module HTML5
  module TreeBuilders

    class << self
      def [](name)
        case name.to_s.downcase
        when 'simpletree' then
-          require 'html5lib/treebuilders/simpletree'
+          require 'html5/treebuilders/simpletree'
          SimpleTree::TreeBuilder
        when 'rexml' then
-          require 'html5lib/treebuilders/rexml'
+          require 'html5/treebuilders/rexml'
          REXML::TreeBuilder
        when 'hpricot' then
-          require 'html5lib/treebuilders/hpricot'
+          require 'html5/treebuilders/hpricot'
          Hpricot::TreeBuilder
        else
          raise "Unknown TreeBuilder #{name}"
--- a/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/base.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/base.rb
@ -1,8 +1,8 @@
-require 'html5lib/constants'
+require 'html5/constants'

 #XXX - TODO; make the default interface more ElementTree-like rather than DOM-like

-module HTML5lib
+module HTML5

  # The scope markers are inserted when entering buttons, object elements,
  # marquees, table cells, and table captions, and are used to prevent formatting
--- a/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/hpricot.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/hpricot.rb
@ -1,221 +1,221 @@
-require 'html5lib/treebuilders/base'
-require 'rubygems'
-require 'hpricot'
-require 'forwardable'
-
-module HTML5lib
-  module TreeBuilders
-    module Hpricot
-
-      class Node < Base::Node
-
-        extend Forwardable
-
-        def_delegators :@hpricot, :name
-
-        attr_accessor :hpricot
-
-        def initialize(name)
-          super(name)
-          @hpricot = self.class.hpricot_class.new name
-        end
-
-        def appendChild(node)
-          if node.kind_of?(TextNode) and childNodes.any? and childNodes.last.kind_of?(TextNode)
-            childNodes[-1].hpricot.content = childNodes[-1].hpricot.to_s + node.hpricot.to_s
-          else
-            childNodes << node
-            hpricot.children << node.hpricot
-          end
-          if (oldparent = node.hpricot.parent) != nil
-            oldparent.children.delete_at(oldparent.children.index(node.hpricot))
-          end
-          node.hpricot.parent = hpricot
-          node.parent = self
-        end
-
-        def removeChild(node)
-           childNodes.delete(node)
-           hpricot.children.delete_at(hpricot.children.index(node.hpricot))
-           node.hpricot.parent = nil
-           node.parent = nil
-        end
-
-        def insertText(data, before=nil)
-          if before
-            insertBefore(TextNode.new(data), before)
-          else
-            appendChild(TextNode.new(data))
-          end
-        end
-
-        def insertBefore(node, refNode)
-          index = childNodes.index(refNode)
-          if node.kind_of?(TextNode) and index > 0 and childNodes[index-1].kind_of?(TextNode)
-            childNodes[index-1].hpricot.content = childNodes[index-1].hpricot.to_s + node.hpricot.to_s
-          else
-            refNode.hpricot.parent.insert_before(node.hpricot,refNode.hpricot)
-            childNodes.insert(index, node)
-          end
-        end
-
-        def hasContent
-          childNodes.any?
-        end
-      end
-
-      class Element < Node
-        def self.hpricot_class
-          ::Hpricot::Elem
-        end
-
-        def initialize(name)
-          super(name)
-
-          @hpricot = ::Hpricot::Elem.new(::Hpricot::STag.new(name))
-        end
-
-        def name
-          @hpricot.stag.name
-        end
-
-        def cloneNode
-          attributes.inject(self.class.new(name)) do |node, (name, value)|
-            node.hpricot[name] = value
-            node
-          end
-        end
-
-        # A call to Hpricot::Elem#raw_attributes is built dynamically,
-        # so alterations to the returned value (a hash) will be lost.
-        #
-        # AttributeProxy works around this by forwarding :[]= calls
-        # to the raw_attributes accessor on the element start tag.
-        #
-        class AttributeProxy
-          def initialize(hpricot)
-            @hpricot = hpricot
-          end
-
-          def []=(k, v)
-            @hpricot.stag.send(stag_attributes_method)[k] = v
-          end
-
-          def stag_attributes_method
-            # STag#attributes changed to STag#raw_attributes after Hpricot 0.5
-            @hpricot.stag.respond_to?(:raw_attributes) ? :raw_attributes : :attributes
-          end
-
-          def method_missing(*a, &b)
-            @hpricot.attributes.send(*a, &b)
-          end
-        end
-
-        def attributes
-          AttributeProxy.new(@hpricot)
-        end
-
-        def attributes=(attrs)
-          attrs.each { |name, value| @hpricot[name] = value }
-        end
-
-        def printTree(indent=0)
-          tree = "\n|#{' ' * indent}<#{name}>"
-          indent += 2
-          attributes.each do |name, value|
-            next if name == 'xmlns'
-            tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
-          end
-          childNodes.inject(tree) { |tree, child| tree + child.printTree(indent) }
-        end
-      end
-
-      class Document < Node
-        def self.hpricot_class
-          ::Hpricot::Doc
-        end
-
-        def initialize
-          super(nil)
-        end
-
-        def printTree(indent=0)
-          childNodes.inject('#document') { |tree, child| tree + child.printTree(indent + 2) }
-        end
-      end
-
-      class DocumentType < Node
-        def self.hpricot_class
-          ::Hpricot::DocType
-        end
-
-        def initialize(name)
-          begin
-            super(name)
-          rescue ArgumentError # needs 3...
-          end
-
-          @hpricot = ::Hpricot::DocType.new(name, nil, nil)
-        end
-
-        def printTree(indent=0)
-          "\n|#{' ' * indent}<!DOCTYPE #{hpricot.target}>"
-        end
-      end
-
-      class DocumentFragment < Element
-        def initialize
-          super('')
-        end
-
-        def printTree(indent=0)
-          childNodes.inject('') { |tree, child| tree + child.printTree(indent+2) }
-        end
-      end
-
-      class TextNode < Node
-        def initialize(data)
-          @hpricot = ::Hpricot::Text.new(data)
-        end
-
-        def printTree(indent=0)
-          "\n|#{' ' * indent}\"#{hpricot.content}\""
-        end
-      end
-
-      class CommentNode < Node
-        def self.hpricot_class
-          ::Hpricot::Comment
-        end
-
-        def printTree(indent=0)
-          "\n|#{' ' * indent}<!-- #{hpricot.content} -->"
-        end
-      end
-
-      class TreeBuilder < Base::TreeBuilder
-        def initialize
-          @documentClass = Document
-          @doctypeClass = DocumentType
-          @elementClass = Element
-          @commentClass = CommentNode
-          @fragmentClass = DocumentFragment
-        end
-
-        def testSerializer(node)
-          node.printTree
-        end
-
-        def getDocument
-          @document.hpricot
-        end
-
-        def getFragment
-          @document = super
-          return @document.hpricot.children
-        end
-      end
-
-    end
-  end
-end
+require 'html5/treebuilders/base'
+require 'rubygems'
+require 'hpricot'
+require 'forwardable'
+
+module HTML5
+  module TreeBuilders
+    module Hpricot
+
+      class Node < Base::Node
+
+        extend Forwardable
+
+        def_delegators :@hpricot, :name
+
+        attr_accessor :hpricot
+
+        def initialize(name)
+          super(name)
+          @hpricot = self.class.hpricot_class.new name
+        end
+
+        def appendChild(node)
+          if node.kind_of?(TextNode) and childNodes.any? and childNodes.last.kind_of?(TextNode)
+            childNodes[-1].hpricot.content = childNodes[-1].hpricot.to_s + node.hpricot.to_s
+          else
+            childNodes << node
+            hpricot.children << node.hpricot
+          end
+          if (oldparent = node.hpricot.parent) != nil
+            oldparent.children.delete_at(oldparent.children.index(node.hpricot))
+          end
+          node.hpricot.parent = hpricot
+          node.parent = self
+        end
+
+        def removeChild(node)
+           childNodes.delete(node)
+           hpricot.children.delete_at(hpricot.children.index(node.hpricot))
+           node.hpricot.parent = nil
+           node.parent = nil
+        end
+
+        def insertText(data, before=nil)
+          if before
+            insertBefore(TextNode.new(data), before)
+          else
+            appendChild(TextNode.new(data))
+          end
+        end
+
+        def insertBefore(node, refNode)
+          index = childNodes.index(refNode)
+          if node.kind_of?(TextNode) and index > 0 and childNodes[index-1].kind_of?(TextNode)
+            childNodes[index-1].hpricot.content = childNodes[index-1].hpricot.to_s + node.hpricot.to_s
+          else
+            refNode.hpricot.parent.insert_before(node.hpricot,refNode.hpricot)
+            childNodes.insert(index, node)
+          end
+        end
+
+        def hasContent
+          childNodes.any?
+        end
+      end
+
+      class Element < Node
+        def self.hpricot_class
+          ::Hpricot::Elem
+        end
+
+        def initialize(name)
+          super(name)
+
+          @hpricot = ::Hpricot::Elem.new(::Hpricot::STag.new(name))
+        end
+
+        def name
+          @hpricot.stag.name
+        end
+
+        def cloneNode
+          attributes.inject(self.class.new(name)) do |node, (name, value)|
+            node.hpricot[name] = value
+            node
+          end
+        end
+
+        # A call to Hpricot::Elem#raw_attributes is built dynamically,
+        # so alterations to the returned value (a hash) will be lost.
+        #
+        # AttributeProxy works around this by forwarding :[]= calls
+        # to the raw_attributes accessor on the element start tag.
+        #
+        class AttributeProxy
+          def initialize(hpricot)
+            @hpricot = hpricot
+          end
+
+          def []=(k, v)
+            @hpricot.stag.send(stag_attributes_method)[k] = v
+          end
+
+          def stag_attributes_method
+            # STag#attributes changed to STag#raw_attributes after Hpricot 0.5
+            @hpricot.stag.respond_to?(:raw_attributes) ? :raw_attributes : :attributes
+          end
+
+          def method_missing(*a, &b)
+            @hpricot.attributes.send(*a, &b)
+          end
+        end
+
+        def attributes
+          AttributeProxy.new(@hpricot)
+        end
+
+        def attributes=(attrs)
+          attrs.each { |name, value| @hpricot[name] = value }
+        end
+
+        def printTree(indent=0)
+          tree = "\n|#{' ' * indent}<#{name}>"
+          indent += 2
+          attributes.each do |name, value|
+            next if name == 'xmlns'
+            tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
+          end
+          childNodes.inject(tree) { |tree, child| tree + child.printTree(indent) }
+        end
+      end
+
+      class Document < Node
+        def self.hpricot_class
+          ::Hpricot::Doc
+        end
+
+        def initialize
+          super(nil)
+        end
+
+        def printTree(indent=0)
+          childNodes.inject('#document') { |tree, child| tree + child.printTree(indent + 2) }
+        end
+      end
+
+      class DocumentType < Node
+        def self.hpricot_class
+          ::Hpricot::DocType
+        end
+
+        def initialize(name)
+          begin
+            super(name)
+          rescue ArgumentError # needs 3...
+          end
+
+          @hpricot = ::Hpricot::DocType.new(name, nil, nil)
+        end
+
+        def printTree(indent=0)
+          "\n|#{' ' * indent}<!DOCTYPE #{hpricot.target}>"
+        end
+      end
+
+      class DocumentFragment < Element
+        def initialize
+          super('')
+        end
+
+        def printTree(indent=0)
+          childNodes.inject('') { |tree, child| tree + child.printTree(indent+2) }
+        end
+      end
+
+      class TextNode < Node
+        def initialize(data)
+          @hpricot = ::Hpricot::Text.new(data)
+        end
+
+        def printTree(indent=0)
+          "\n|#{' ' * indent}\"#{hpricot.content}\""
+        end
+      end
+
+      class CommentNode < Node
+        def self.hpricot_class
+          ::Hpricot::Comment
+        end
+
+        def printTree(indent=0)
+          "\n|#{' ' * indent}<!-- #{hpricot.content} -->"
+        end
+      end
+
+      class TreeBuilder < Base::TreeBuilder
+        def initialize
+          @documentClass = Document
+          @doctypeClass = DocumentType
+          @elementClass = Element
+          @commentClass = CommentNode
+          @fragmentClass = DocumentFragment
+        end
+
+        def testSerializer(node)
+          node.printTree
+        end
+
+        def getDocument
+          @document.hpricot
+        end
+
+        def getFragment
+          @document = super
+          return @document.hpricot.children
+        end
+      end
+
+    end
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/rexml.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/rexml.rb
@ -1,8 +1,8 @@
-require 'html5lib/treebuilders/base'
+require 'html5/treebuilders/base'
 require 'rexml/document'
 require 'forwardable'

-module HTML5lib
+module HTML5
  module TreeBuilders
    module REXML

--- a/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/simpletree.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/simpletree.rb
@ -1,6 +1,6 @@
-require 'html5lib/treebuilders/base'
+require 'html5/treebuilders/base'

-module HTML5lib
+module HTML5
  module TreeBuilders
    module SimpleTree

--- a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treewalkers.rb
@ -1,19 +1,19 @@
-require 'html5lib/treewalkers/base'
+require 'html5/treewalkers/base'

-module HTML5lib
+module HTML5
  module TreeWalkers

    class << self
      def [](name)
        case name.to_s.downcase
        when 'simpletree' then
-          require 'html5lib/treewalkers/simpletree'
+          require 'html5/treewalkers/simpletree'
          SimpleTree::TreeWalker
        when 'rexml' then
-          require 'html5lib/treewalkers/rexml'
+          require 'html5/treewalkers/rexml'
          REXML::TreeWalker
        when 'hpricot' then
-          require 'html5lib/treewalkers/hpricot'
+          require 'html5/treewalkers/hpricot'
          Hpricot::TreeWalker
        else
          raise "Unknown TreeWalker #{name}"
--- a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/base.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/base.rb
@ -1,5 +1,5 @@
-require 'html5lib/constants'
-module HTML5lib
+require 'html5/constants'
+module HTML5
 module TreeWalkers

 module TokenConstructor
--- a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/hpricot.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/hpricot.rb
@ -1,10 +1,10 @@
-require 'html5lib/treewalkers/base'
+require 'html5/treewalkers/base'
 require 'rexml/document'

-module HTML5lib
+module HTML5
  module TreeWalkers
    module Hpricot
-      class TreeWalker < HTML5lib::TreeWalkers::NonRecursiveTreeWalker
+      class TreeWalker < HTML5::TreeWalkers::NonRecursiveTreeWalker

        def node_details(node)
          case node
--- a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/rexml.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/rexml.rb
@ -1,10 +1,10 @@
-require 'html5lib/treewalkers/base'
+require 'html5/treewalkers/base'
 require 'rexml/document'

-module HTML5lib
+module HTML5
  module TreeWalkers
    module REXML
-      class TreeWalker < HTML5lib::TreeWalkers::NonRecursiveTreeWalker
+      class TreeWalker < HTML5::TreeWalkers::NonRecursiveTreeWalker

        def node_details(node)
          case node
--- a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/simpletree.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/simpletree.rb
@ -1,10 +1,10 @@
-require 'html5lib/treewalkers/base'
+require 'html5/treewalkers/base'

-module HTML5lib
+module HTML5
  module TreeWalkers
    module SimpleTree
-      class TreeWalker < HTML5lib::TreeWalkers::Base
-        include HTML5lib::TreeBuilders::SimpleTree
+      class TreeWalker < HTML5::TreeWalkers::Base
+        include HTML5::TreeBuilders::SimpleTree

        def walk(node)
          case node
--- a/vendor/plugins/HTML5lib/lib/html5lib/constants.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/constants.rb
@ -1,708 +0,0 @@
-module HTML5lib
-
-  class EOF < Exception; end
-
-  CONTENT_MODEL_FLAGS = [
-      :PCDATA,
-      :RCDATA,
-      :CDATA,
-      :PLAINTEXT
-  ]
-
-  SCOPING_ELEMENTS = %w[
-      button
-      caption
-      html
-      marquee
-      object
-      table
-      td
-      th
-  ]
-
-  FORMATTING_ELEMENTS = %w[
-      a
-      b
-      big
-      em
-      font
-      i
-      nobr
-      s
-      small
-      strike
-      strong
-      tt
-      u
-  ]
-
-  SPECIAL_ELEMENTS = %w[
-      address
-      area
-      base
-      basefont
-      bgsound
-      blockquote
-      body
-      br
-      center
-      col
-      colgroup
-      dd
-      dir
-      div
-      dl
-      dt
-      embed
-      fieldset
-      form
-      frame
-      frameset
-      h1
-      h2
-      h3
-      h4
-      h5
-      h6
-      head
-      hr
-      iframe
-      image
-      img
-      input
-      isindex
-      li
-      link
-      listing
-      menu
-      meta
-      noembed
-      noframes
-      noscript
-      ol
-      optgroup
-      option
-      p
-      param
-      plaintext
-      pre
-      script
-      select
-      spacer
-      style
-      tbody
-      textarea
-      tfoot
-      thead
-      title
-      tr
-      ul
-      wbr
-  ]
-
-  SPACE_CHARACTERS = %W[
-      \t
-      \n
-      \x0B
-      \x0C
-      \x20
-      \r
-  ]
-
-  TABLE_INSERT_MODE_ELEMENTS = %w[
-      table
-      tbody
-      tfoot
-      thead
-      tr
-  ]
-
-  ASCII_LOWERCASE = ('a'..'z').to_a.join('')
-  ASCII_UPPERCASE = ('A'..'Z').to_a.join('')
-  ASCII_LETTERS = ASCII_LOWERCASE + ASCII_UPPERCASE
-  DIGITS = '0'..'9'
-  HEX_DIGITS = DIGITS.to_a + ('a'..'f').to_a + ('A'..'F').to_a
-
-  # Heading elements need to be ordered 
-  HEADING_ELEMENTS = %w[
-      h1
-      h2
-      h3
-      h4
-      h5
-      h6
-  ]
-
-  # XXX What about event-source and command?
-  VOID_ELEMENTS = %w[
-      base
-      link
-      meta
-      hr
-      br
-      img
-      embed
-      param
-      area
-      col
-      input
-  ]
-
-  CDATA_ELEMENTS = %w[title textarea]
-
-  RCDATA_ELEMENTS = %w[
-    style
-    script
-    xmp
-    iframe
-    noembed
-    noframes
-    noscript
-  ]
-
-  BOOLEAN_ATTRIBUTES = {
-    :global => %w[irrelevant],
-    'style' => %w[scoped],
-    'img' => %w[ismap],
-    'audio' => %w[autoplay controls],
-    'video' => %w[autoplay controls],
-    'script' => %w[defer async],
-    'details' => %w[open],
-    'datagrid' => %w[multiple disabled],
-    'command' => %w[hidden disabled checked default],
-    'menu' => %w[autosubmit],
-    'fieldset' => %w[disabled readonly],
-    'option' => %w[disabled readonly selected],
-    'optgroup' => %w[disabled readonly],
-    'button' => %w[disabled autofocus],
-    'input' => %w[disabled readonly required autofocus checked ismap],
-    'select' => %w[disabled readonly autofocus multiple],
-    'output' => %w[disabled readonly]
-  }
-
-  # entitiesWindows1252 has to be _ordered_ and needs to have an index.
-  ENTITIES_WINDOWS1252 = [
-      8364,  # 0x80  0x20AC  EURO SIGN
-      65533, # 0x81          UNDEFINED
-      8218,  # 0x82  0x201A  SINGLE LOW-9 QUOTATION MARK
-      402,   # 0x83  0x0192  LATIN SMALL LETTER F WITH HOOK
-      8222,  # 0x84  0x201E  DOUBLE LOW-9 QUOTATION MARK
-      8230,  # 0x85  0x2026  HORIZONTAL ELLIPSIS
-      8224,  # 0x86  0x2020  DAGGER
-      8225,  # 0x87  0x2021  DOUBLE DAGGER
-      710,   # 0x88  0x02C6  MODIFIER LETTER CIRCUMFLEX ACCENT
-      8240,  # 0x89  0x2030  PER MILLE SIGN
-      352,   # 0x8A  0x0160  LATIN CAPITAL LETTER S WITH CARON
-      8249,  # 0x8B  0x2039  SINGLE LEFT-POINTING ANGLE QUOTATION MARK
-      338,   # 0x8C  0x0152  LATIN CAPITAL LIGATURE OE
-      65533, # 0x8D          UNDEFINED
-      381,   # 0x8E  0x017D  LATIN CAPITAL LETTER Z WITH CARON
-      65533, # 0x8F          UNDEFINED
-      65533, # 0x90          UNDEFINED
-      8216,  # 0x91  0x2018  LEFT SINGLE QUOTATION MARK
-      8217,  # 0x92  0x2019  RIGHT SINGLE QUOTATION MARK
-      8220,  # 0x93  0x201C  LEFT DOUBLE QUOTATION MARK
-      8221,  # 0x94  0x201D  RIGHT DOUBLE QUOTATION MARK
-      8226,  # 0x95  0x2022  BULLET
-      8211,  # 0x96  0x2013  EN DASH
-      8212,  # 0x97  0x2014  EM DASH
-      732,   # 0x98  0x02DC  SMALL TILDE
-      8482,  # 0x99  0x2122  TRADE MARK SIGN
-      353,   # 0x9A  0x0161  LATIN SMALL LETTER S WITH CARON
-      8250,  # 0x9B  0x203A  SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
-      339,   # 0x9C  0x0153  LATIN SMALL LIGATURE OE
-      65533, # 0x9D          UNDEFINED
-      382,   # 0x9E  0x017E  LATIN SMALL LETTER Z WITH CARON
-      376    # 0x9F  0x0178  LATIN CAPITAL LETTER Y WITH DIAERESIS
-  ]
-
-  private
-
-    def self.U n
-      [n].pack('U')
-    end
-
-  public
-
-  ENTITIES = {
-      "AElig" => U(0xC6),
-      "Aacute" => U(0xC1),
-      "Acirc" => U(0xC2),
-      "Agrave" => U(0xC0),
-      "Alpha" => U(0x0391),
-      "Aring" => U(0xC5),
-      "Atilde" => U(0xC3),
-      "Auml" => U(0xC4),
-      "Beta" => U(0x0392),
-      "Ccedil" => U(0xC7),
-      "Chi" => U(0x03A7),
-      "Dagger" => U(0x2021),
-      "Delta" => U(0x0394),
-      "ETH" => U(0xD0),
-      "Eacute" => U(0xC9),
-      "Ecirc" => U(0xCA),
-      "Egrave" => U(0xC8),
-      "Epsilon" => U(0x0395),
-      "Eta" => U(0x0397),
-      "Euml" => U(0xCB),
-      "Gamma" => U(0x0393),
-      "Iacute" => U(0xCD),
-      "Icirc" => U(0xCE),
-      "Igrave" => U(0xCC),
-      "Iota" => U(0x0399),
-      "Iuml" => U(0xCF),
-      "Kappa" => U(0x039A),
-      "Lambda" => U(0x039B),
-      "Mu" => U(0x039C),
-      "Ntilde" => U(0xD1),
-      "Nu" => U(0x039D),
-      "OElig" => U(0x0152),
-      "Oacute" => U(0xD3),
-      "Ocirc" => U(0xD4),
-      "Ograve" => U(0xD2),
-      "Omega" => U(0x03A9),
-      "Omicron" => U(0x039F),
-      "Oslash" => U(0xD8),
-      "Otilde" => U(0xD5),
-      "Ouml" => U(0xD6),
-      "Phi" => U(0x03A6),
-      "Pi" => U(0x03A0),
-      "Prime" => U(0x2033),
-      "Psi" => U(0x03A8),
-      "Rho" => U(0x03A1),
-      "Scaron" => U(0x0160),
-      "Sigma" => U(0x03A3),
-      "THORN" => U(0xDE),
-      "Tau" => U(0x03A4),
-      "Theta" => U(0x0398),
-      "Uacute" => U(0xDA),
-      "Ucirc" => U(0xDB),
-      "Ugrave" => U(0xD9),
-      "Upsilon" => U(0x03A5),
-      "Uuml" => U(0xDC),
-      "Xi" => U(0x039E),
-      "Yacute" => U(0xDD),
-      "Yuml" => U(0x0178),
-      "Zeta" => U(0x0396),
-      "aacute" => U(0xE1),
-      "acirc" => U(0xE2),
-      "acute" => U(0xB4),
-      "aelig" => U(0xE6),
-      "agrave" => U(0xE0),
-      "alefsym" => U(0x2135),
-      "alpha" => U(0x03B1),
-      "amp" => U(0x26),
-      "AMP" => U(0x26),
-      "and" => U(0x2227),
-      "ang" => U(0x2220),
-      "apos" => U(0x27),
-      "aring" => U(0xE5),
-      "asymp" => U(0x2248),
-      "atilde" => U(0xE3),
-      "auml" => U(0xE4),
-      "bdquo" => U(0x201E),
-      "beta" => U(0x03B2),
-      "brvbar" => U(0xA6),
-      "bull" => U(0x2022),
-      "cap" => U(0x2229),
-      "ccedil" => U(0xE7),
-      "cedil" => U(0xB8),
-      "cent" => U(0xA2),
-      "chi" => U(0x03C7),
-      "circ" => U(0x02C6),
-      "clubs" => U(0x2663),
-      "cong" => U(0x2245),
-      "copy" => U(0xA9),
-      "COPY" => U(0xA9),
-      "crarr" => U(0x21B5),
-      "cup" => U(0x222A),
-      "curren" => U(0xA4),
-      "dArr" => U(0x21D3),
-      "dagger" => U(0x2020),
-      "darr" => U(0x2193),
-      "deg" => U(0xB0),
-      "delta" => U(0x03B4),
-      "diams" => U(0x2666),
-      "divide" => U(0xF7),
-      "eacute" => U(0xE9),
-      "ecirc" => U(0xEA),
-      "egrave" => U(0xE8),
-      "empty" => U(0x2205),
-      "emsp" => U(0x2003),
-      "ensp" => U(0x2002),
-      "epsilon" => U(0x03B5),
-      "equiv" => U(0x2261),
-      "eta" => U(0x03B7),
-      "eth" => U(0xF0),
-      "euml" => U(0xEB),
-      "euro" => U(0x20AC),
-      "exist" => U(0x2203),
-      "fnof" => U(0x0192),
-      "forall" => U(0x2200),
-      "frac12" => U(0xBD),
-      "frac14" => U(0xBC),
-      "frac34" => U(0xBE),
-      "frasl" => U(0x2044),
-      "gamma" => U(0x03B3),
-      "ge" => U(0x2265),
-      "gt" => U(0x3E),
-      "GT" => U(0x3E),
-      "hArr" => U(0x21D4),
-      "harr" => U(0x2194),
-      "hearts" => U(0x2665),
-      "hellip" => U(0x2026),
-      "iacute" => U(0xED),
-      "icirc" => U(0xEE),
-      "iexcl" => U(0xA1),
-      "igrave" => U(0xEC),
-      "image" => U(0x2111),
-      "infin" => U(0x221E),
-      "int" => U(0x222B),
-      "iota" => U(0x03B9),
-      "iquest" => U(0xBF),
-      "isin" => U(0x2208),
-      "iuml" => U(0xEF),
-      "kappa" => U(0x03BA),
-      "lArr" => U(0x21D0),
-      "lambda" => U(0x03BB),
-      "lang" => U(0x2329),
-      "laquo" => U(0xAB),
-      "larr" => U(0x2190),
-      "lceil" => U(0x2308),
-      "ldquo" => U(0x201C),
-      "le" => U(0x2264),
-      "lfloor" => U(0x230A),
-      "lowast" => U(0x2217),
-      "loz" => U(0x25CA),
-      "lrm" => U(0x200E),
-      "lsaquo" => U(0x2039),
-      "lsquo" => U(0x2018),
-      "lt" => U(0x3C),
-      "LT" => U(0x3C),
-      "macr" => U(0xAF),
-      "mdash" => U(0x2014),
-      "micro" => U(0xB5),
-      "middot" => U(0xB7),
-      "minus" => U(0x2212),
-      "mu" => U(0x03BC),
-      "nabla" => U(0x2207),
-      "nbsp" => U(0xA0),
-      "ndash" => U(0x2013),
-      "ne" => U(0x2260),
-      "ni" => U(0x220B),
-      "not" => U(0xAC),
-      "notin" => U(0x2209),
-      "nsub" => U(0x2284),
-      "ntilde" => U(0xF1),
-      "nu" => U(0x03BD),
-      "oacute" => U(0xF3),
-      "ocirc" => U(0xF4),
-      "oelig" => U(0x0153),
-      "ograve" => U(0xF2),
-      "oline" => U(0x203E),
-      "omega" => U(0x03C9),
-      "omicron" => U(0x03BF),
-      "oplus" => U(0x2295),
-      "or" => U(0x2228),
-      "ordf" => U(0xAA),
-      "ordm" => U(0xBA),
-      "oslash" => U(0xF8),
-      "otilde" => U(0xF5),
-      "otimes" => U(0x2297),
-      "ouml" => U(0xF6),
-      "para" => U(0xB6),
-      "part" => U(0x2202),
-      "permil" => U(0x2030),
-      "perp" => U(0x22A5),
-      "phi" => U(0x03C6),
-      "pi" => U(0x03C0),
-      "piv" => U(0x03D6),
-      "plusmn" => U(0xB1),
-      "pound" => U(0xA3),
-      "prime" => U(0x2032),
-      "prod" => U(0x220F),
-      "prop" => U(0x221D),
-      "psi" => U(0x03C8),
-      "quot" => U(0x22),
-      "QUOT" => U(0x22),
-      "rArr" => U(0x21D2),
-      "radic" => U(0x221A),
-      "rang" => U(0x232A),
-      "raquo" => U(0xBB),
-      "rarr" => U(0x2192),
-      "rceil" => U(0x2309),
-      "rdquo" => U(0x201D),
-      "real" => U(0x211C),
-      "reg" => U(0xAE),
-      "REG" => U(0xAE),
-      "rfloor" => U(0x230B),
-      "rho" => U(0x03C1),
-      "rlm" => U(0x200F),
-      "rsaquo" => U(0x203A),
-      "rsquo" => U(0x2019),
-      "sbquo" => U(0x201A),
-      "scaron" => U(0x0161),
-      "sdot" => U(0x22C5),
-      "sect" => U(0xA7),
-      "shy" => U(0xAD),
-      "sigma" => U(0x03C3),
-      "sigmaf" => U(0x03C2),
-      "sim" => U(0x223C),
-      "spades" => U(0x2660),
-      "sub" => U(0x2282),
-      "sube" => U(0x2286),
-      "sum" => U(0x2211),
-      "sup" => U(0x2283),
-      "sup1" => U(0xB9),
-      "sup2" => U(0xB2),
-      "sup3" => U(0xB3),
-      "supe" => U(0x2287),
-      "szlig" => U(0xDF),
-      "tau" => U(0x03C4),
-      "there4" => U(0x2234),
-      "theta" => U(0x03B8),
-      "thetasym" => U(0x03D1),
-      "thinsp" => U(0x2009),
-      "thorn" => U(0xFE),
-      "tilde" => U(0x02DC),
-      "times" => U(0xD7),
-      "trade" => U(0x2122),
-      "uArr" => U(0x21D1),
-      "uacute" => U(0xFA),
-      "uarr" => U(0x2191),
-      "ucirc" => U(0xFB),
-      "ugrave" => U(0xF9),
-      "uml" => U(0xA8),
-      "upsih" => U(0x03D2),
-      "upsilon" => U(0x03C5),
-      "uuml" => U(0xFC),
-      "weierp" => U(0x2118),
-      "xi" => U(0x03BE),
-      "yacute" => U(0xFD),
-      "yen" => U(0xA5),
-      "yuml" => U(0xFF),
-      "zeta" => U(0x03B6),
-      "zwj" => U(0x200D),
-      "zwnj" => U(0x200C)
-  }
-
-  ENCODINGS = %w[
-      ansi_x3.4-1968
-      iso-ir-6
-      ansi_x3.4-1986
-      iso_646.irv:1991
-      ascii
-      iso646-us
-      us-ascii
-      us
-      ibm367
-      cp367
-      csascii
-      ks_c_5601-1987
-      korean
-      iso-2022-kr
-      csiso2022kr
-      euc-kr
-      iso-2022-jp
-      csiso2022jp
-      iso-2022-jp-2
-      iso-ir-58
-      chinese
-      csiso58gb231280
-      iso_8859-1:1987
-      iso-ir-100
-      iso_8859-1
-      iso-8859-1
-      latin1
-      l1
-      ibm819
-      cp819
-      csisolatin1
-      iso_8859-2:1987
-      iso-ir-101
-      iso_8859-2
-      iso-8859-2
-      latin2
-      l2
-      csisolatin2
-      iso_8859-3:1988
-      iso-ir-109
-      iso_8859-3
-      iso-8859-3
-      latin3
-      l3
-      csisolatin3
-      iso_8859-4:1988
-      iso-ir-110
-      iso_8859-4
-      iso-8859-4
-      latin4
-      l4
-      csisolatin4
-      iso_8859-6:1987
-      iso-ir-127
-      iso_8859-6
-      iso-8859-6
-      ecma-114
-      asmo-708
-      arabic
-      csisolatinarabic
-      iso_8859-7:1987
-      iso-ir-126
-      iso_8859-7
-      iso-8859-7
-      elot_928
-      ecma-118
-      greek
-      greek8
-      csisolatingreek
-      iso_8859-8:1988
-      iso-ir-138
-      iso_8859-8
-      iso-8859-8
-      hebrew
-      csisolatinhebrew
-      iso_8859-5:1988
-      iso-ir-144
-      iso_8859-5
-      iso-8859-5
-      cyrillic
-      csisolatincyrillic
-      iso_8859-9:1989
-      iso-ir-148
-      iso_8859-9
-      iso-8859-9
-      latin5
-      l5
-      csisolatin5
-      iso-8859-10
-      iso-ir-157
-      l6
-      iso_8859-10:1992
-      csisolatin6
-      latin6
-      hp-roman8
-      roman8
-      r8
-      ibm037
-      cp037
-      csibm037
-      ibm424
-      cp424
-      csibm424
-      ibm437
-      cp437
-      437
-      cspc8codepage437
-      ibm500
-      cp500
-      csibm500
-      ibm775
-      cp775
-      cspc775baltic
-      ibm850
-      cp850
-      850
-      cspc850multilingual
-      ibm852
-      cp852
-      852
-      cspcp852
-      ibm855
-      cp855
-      855
-      csibm855
-      ibm857
-      cp857
-      857
-      csibm857
-      ibm860
-      cp860
-      860
-      csibm860
-      ibm861
-      cp861
-      861
-      cp-is
-      csibm861
-      ibm862
-      cp862
-      862
-      cspc862latinhebrew
-      ibm863
-      cp863
-      863
-      csibm863
-      ibm864
-      cp864
-      csibm864
-      ibm865
-      cp865
-      865
-      csibm865
-      ibm866
-      cp866
-      866
-      csibm866
-      ibm869
-      cp869
-      869
-      cp-gr
-      csibm869
-      ibm1026
-      cp1026
-      csibm1026
-      koi8-r
-      cskoi8r
-      koi8-u
-      big5-hkscs
-      ptcp154
-      csptcp154
-      pt154
-      cp154
-      utf-7
-      utf-16be
-      utf-16le
-      utf-16
-      utf-8
-      iso-8859-13
-      iso-8859-14
-      iso-ir-199
-      iso_8859-14:1998
-      iso_8859-14
-      latin8
-      iso-celtic
-      l8
-      iso-8859-15
-      iso_8859-15
-      iso-8859-16
-      iso-ir-226
-      iso_8859-16:2001
-      iso_8859-16
-      latin10
-      l10
-      gbk
-      cp936
-      ms936
-      gb18030
-      shift_jis
-      ms_kanji
-      csshiftjis
-      euc-jp
-      gb2312
-      big5
-      csbig5
-      windows-1250
-      windows-1251
-      windows-1252
-      windows-1253
-      windows-1254
-      windows-1255
-      windows-1256
-      windows-1257
-      windows-1258
-      tis-620
-      hz-gb-2312
-  ]
-
-end
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/filters.rb
@ -1 +0,0 @@
-require 'html5lib/filters/optionaltags'
--- a/vendor/plugins/HTML5lib/lib/html5lib/serializer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/serializer.rb
@ -1,2 +0,0 @@
-require 'html5lib/serializer/htmlserializer'
-require 'html5lib/serializer/xhtmlserializer'