Sync with latest HTML5lib and latest Maruku

2007-07-04 17:36:59 -05:00 · 2007-07-04 17:36:59 -05:00 · 8ccaad85a5
commit 8ccaad85a5
parent 8e92e4a3ab
71 changed files with 1974 additions and 1621 deletions
--- a/lib/sanitize.rb
+++ b/lib/sanitize.rb
@ -25,14 +25,14 @@

 module Sanitize

-  require 'html5lib/html5parser'
-  require 'html5lib/liberalxmlparser'
-  require 'html5lib/treewalkers'
-  require 'html5lib/treebuilders'
-  require 'html5lib/serializer'
-  require 'html5lib/sanitizer'
+  require 'html5/html5parser'
+  require 'html5/liberalxmlparser'
+  require 'html5/treewalkers'
+  require 'html5/treebuilders'
+  require 'html5/serializer'
+  require 'html5/sanitizer'

-  include HTML5lib
+  include HTML5

 # Sanitize a string, parsed using XHTML parsing rules.
 #
--- a/vendor/plugins/HTML5lib/lib/html5lib.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib.rb
@ -1,11 +1,11 @@
-require 'html5lib/html5parser'
-
-module HTML5lib
-    def self.parse(stream, options={})
-        HTMLParser.parse(stream, options)
-    end
-
-    def self.parseFragment(stream, options={})
-        HTMLParser.parse(stream, options)
-    end
-end
+require 'html5/html5parser'
+
+module HTML5
+    def self.parse(stream, options={})
+        HTMLParser.parse(stream, options)
+    end
+
+    def self.parseFragment(stream, options={})
+        HTMLParser.parse(stream, options)
+    end
+end
--- a/vendor/plugins/HTML5lib/lib/html5/constants.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/constants.rb
@ -0,0 +1,817 @@
+module HTML5
+
+  class EOF < Exception; end
+
+  CONTENT_MODEL_FLAGS = [
+      :PCDATA,
+      :RCDATA,
+      :CDATA,
+      :PLAINTEXT
+  ]
+
+  SCOPING_ELEMENTS = %w[
+      button
+      caption
+      html
+      marquee
+      object
+      table
+      td
+      th
+  ]
+
+  FORMATTING_ELEMENTS = %w[
+      a
+      b
+      big
+      em
+      font
+      i
+      nobr
+      s
+      small
+      strike
+      strong
+      tt
+      u
+  ]
+
+  SPECIAL_ELEMENTS = %w[
+      address
+      area
+      base
+      basefont
+      bgsound
+      blockquote
+      body
+      br
+      center
+      col
+      colgroup
+      dd
+      dir
+      div
+      dl
+      dt
+      embed
+      fieldset
+      form
+      frame
+      frameset
+      h1
+      h2
+      h3
+      h4
+      h5
+      h6
+      head
+      hr
+      iframe
+      image
+      img
+      input
+      isindex
+      li
+      link
+      listing
+      menu
+      meta
+      noembed
+      noframes
+      noscript
+      ol
+      optgroup
+      option
+      p
+      param
+      plaintext
+      pre
+      script
+      select
+      spacer
+      style
+      tbody
+      textarea
+      tfoot
+      thead
+      title
+      tr
+      ul
+      wbr
+  ]
+
+  SPACE_CHARACTERS = %W[
+      \t
+      \n
+      \x0B
+      \x0C
+      \x20
+      \r
+  ]
+
+  TABLE_INSERT_MODE_ELEMENTS = %w[
+      table
+      tbody
+      tfoot
+      thead
+      tr
+  ]
+
+  ASCII_LOWERCASE = ('a'..'z').to_a.join('')
+  ASCII_UPPERCASE = ('A'..'Z').to_a.join('')
+  ASCII_LETTERS = ASCII_LOWERCASE + ASCII_UPPERCASE
+  DIGITS = '0'..'9'
+  HEX_DIGITS = DIGITS.to_a + ('a'..'f').to_a + ('A'..'F').to_a
+
+  # Heading elements need to be ordered 
+  HEADING_ELEMENTS = %w[
+      h1
+      h2
+      h3
+      h4
+      h5
+      h6
+  ]
+
+  # XXX What about event-source and command?
+  VOID_ELEMENTS = %w[
+      base
+      link
+      meta
+      hr
+      br
+      img
+      embed
+      param
+      area
+      col
+      input
+  ]
+
+  CDATA_ELEMENTS = %w[title textarea]
+
+  RCDATA_ELEMENTS = %w[
+    style
+    script
+    xmp
+    iframe
+    noembed
+    noframes
+    noscript
+  ]
+
+  BOOLEAN_ATTRIBUTES = {
+    :global => %w[irrelevant],
+    'style' => %w[scoped],
+    'img' => %w[ismap],
+    'audio' => %w[autoplay controls],
+    'video' => %w[autoplay controls],
+    'script' => %w[defer async],
+    'details' => %w[open],
+    'datagrid' => %w[multiple disabled],
+    'command' => %w[hidden disabled checked default],
+    'menu' => %w[autosubmit],
+    'fieldset' => %w[disabled readonly],
+    'option' => %w[disabled readonly selected],
+    'optgroup' => %w[disabled readonly],
+    'button' => %w[disabled autofocus],
+    'input' => %w[disabled readonly required autofocus checked ismap],
+    'select' => %w[disabled readonly autofocus multiple],
+    'output' => %w[disabled readonly]
+  }
+
+  # entitiesWindows1252 has to be _ordered_ and needs to have an index.
+  ENTITIES_WINDOWS1252 = [
+      8364,  # 0x80  0x20AC  EURO SIGN
+      65533, # 0x81          UNDEFINED
+      8218,  # 0x82  0x201A  SINGLE LOW-9 QUOTATION MARK
+      402,   # 0x83  0x0192  LATIN SMALL LETTER F WITH HOOK
+      8222,  # 0x84  0x201E  DOUBLE LOW-9 QUOTATION MARK
+      8230,  # 0x85  0x2026  HORIZONTAL ELLIPSIS
+      8224,  # 0x86  0x2020  DAGGER
+      8225,  # 0x87  0x2021  DOUBLE DAGGER
+      710,   # 0x88  0x02C6  MODIFIER LETTER CIRCUMFLEX ACCENT
+      8240,  # 0x89  0x2030  PER MILLE SIGN
+      352,   # 0x8A  0x0160  LATIN CAPITAL LETTER S WITH CARON
+      8249,  # 0x8B  0x2039  SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+      338,   # 0x8C  0x0152  LATIN CAPITAL LIGATURE OE
+      65533, # 0x8D          UNDEFINED
+      381,   # 0x8E  0x017D  LATIN CAPITAL LETTER Z WITH CARON
+      65533, # 0x8F          UNDEFINED
+      65533, # 0x90          UNDEFINED
+      8216,  # 0x91  0x2018  LEFT SINGLE QUOTATION MARK
+      8217,  # 0x92  0x2019  RIGHT SINGLE QUOTATION MARK
+      8220,  # 0x93  0x201C  LEFT DOUBLE QUOTATION MARK
+      8221,  # 0x94  0x201D  RIGHT DOUBLE QUOTATION MARK
+      8226,  # 0x95  0x2022  BULLET
+      8211,  # 0x96  0x2013  EN DASH
+      8212,  # 0x97  0x2014  EM DASH
+      732,   # 0x98  0x02DC  SMALL TILDE
+      8482,  # 0x99  0x2122  TRADE MARK SIGN
+      353,   # 0x9A  0x0161  LATIN SMALL LETTER S WITH CARON
+      8250,  # 0x9B  0x203A  SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+      339,   # 0x9C  0x0153  LATIN SMALL LIGATURE OE
+      65533, # 0x9D          UNDEFINED
+      382,   # 0x9E  0x017E  LATIN SMALL LETTER Z WITH CARON
+      376    # 0x9F  0x0178  LATIN CAPITAL LETTER Y WITH DIAERESIS
+  ]
+
+  # ENTITIES was generated from Python using the following code:
+  #
+  # import constants
+  # entities = constants.entities.items()
+  # entities.sort()
+  # list = [ ' '.join([repr(entity), '=>', ord(value)<128 and 
+  #   repr(str(value)) or repr(value.encode('utf-8')).replace("'",'"')])
+  #   for entity, value in entities]
+  #   print '  ENTITIES = {\n    ' + ',\n    '.join(list) + '\n  }'
+
+  ENTITIES = {
+    'AElig' => "\xc3\x86",
+    'AElig;' => "\xc3\x86",
+    'AMP' => '&',
+    'AMP;' => '&',
+    'Aacute' => "\xc3\x81",
+    'Aacute;' => "\xc3\x81",
+    'Acirc' => "\xc3\x82",
+    'Acirc;' => "\xc3\x82",
+    'Agrave' => "\xc3\x80",
+    'Agrave;' => "\xc3\x80",
+    'Alpha;' => "\xce\x91",
+    'Aring' => "\xc3\x85",
+    'Aring;' => "\xc3\x85",
+    'Atilde' => "\xc3\x83",
+    'Atilde;' => "\xc3\x83",
+    'Auml' => "\xc3\x84",
+    'Auml;' => "\xc3\x84",
+    'Beta;' => "\xce\x92",
+    'COPY' => "\xc2\xa9",
+    'COPY;' => "\xc2\xa9",
+    'Ccedil' => "\xc3\x87",
+    'Ccedil;' => "\xc3\x87",
+    'Chi;' => "\xce\xa7",
+    'Dagger;' => "\xe2\x80\xa1",
+    'Delta;' => "\xce\x94",
+    'ETH' => "\xc3\x90",
+    'ETH;' => "\xc3\x90",
+    'Eacute' => "\xc3\x89",
+    'Eacute;' => "\xc3\x89",
+    'Ecirc' => "\xc3\x8a",
+    'Ecirc;' => "\xc3\x8a",
+    'Egrave' => "\xc3\x88",
+    'Egrave;' => "\xc3\x88",
+    'Epsilon;' => "\xce\x95",
+    'Eta;' => "\xce\x97",
+    'Euml' => "\xc3\x8b",
+    'Euml;' => "\xc3\x8b",
+    'GT' => '>',
+    'GT;' => '>',
+    'Gamma;' => "\xce\x93",
+    'Iacute' => "\xc3\x8d",
+    'Iacute;' => "\xc3\x8d",
+    'Icirc' => "\xc3\x8e",
+    'Icirc;' => "\xc3\x8e",
+    'Igrave' => "\xc3\x8c",
+    'Igrave;' => "\xc3\x8c",
+    'Iota;' => "\xce\x99",
+    'Iuml' => "\xc3\x8f",
+    'Iuml;' => "\xc3\x8f",
+    'Kappa;' => "\xce\x9a",
+    'LT' => '<',
+    'LT;' => '<',
+    'Lambda;' => "\xce\x9b",
+    'Mu;' => "\xce\x9c",
+    'Ntilde' => "\xc3\x91",
+    'Ntilde;' => "\xc3\x91",
+    'Nu;' => "\xce\x9d",
+    'OElig;' => "\xc5\x92",
+    'Oacute' => "\xc3\x93",
+    'Oacute;' => "\xc3\x93",
+    'Ocirc' => "\xc3\x94",
+    'Ocirc;' => "\xc3\x94",
+    'Ograve' => "\xc3\x92",
+    'Ograve;' => "\xc3\x92",
+    'Omega;' => "\xce\xa9",
+    'Omicron;' => "\xce\x9f",
+    'Oslash' => "\xc3\x98",
+    'Oslash;' => "\xc3\x98",
+    'Otilde' => "\xc3\x95",
+    'Otilde;' => "\xc3\x95",
+    'Ouml' => "\xc3\x96",
+    'Ouml;' => "\xc3\x96",
+    'Phi;' => "\xce\xa6",
+    'Pi;' => "\xce\xa0",
+    'Prime;' => "\xe2\x80\xb3",
+    'Psi;' => "\xce\xa8",
+    'QUOT' => '"',
+    'QUOT;' => '"',
+    'REG' => "\xc2\xae",
+    'REG;' => "\xc2\xae",
+    'Rho;' => "\xce\xa1",
+    'Scaron;' => "\xc5\xa0",
+    'Sigma;' => "\xce\xa3",
+    'THORN' => "\xc3\x9e",
+    'THORN;' => "\xc3\x9e",
+    'TRADE;' => "\xe2\x84\xa2",
+    'Tau;' => "\xce\xa4",
+    'Theta;' => "\xce\x98",
+    'Uacute' => "\xc3\x9a",
+    'Uacute;' => "\xc3\x9a",
+    'Ucirc' => "\xc3\x9b",
+    'Ucirc;' => "\xc3\x9b",
+    'Ugrave' => "\xc3\x99",
+    'Ugrave;' => "\xc3\x99",
+    'Upsilon;' => "\xce\xa5",
+    'Uuml' => "\xc3\x9c",
+    'Uuml;' => "\xc3\x9c",
+    'Xi;' => "\xce\x9e",
+    'Yacute' => "\xc3\x9d",
+    'Yacute;' => "\xc3\x9d",
+    'Yuml;' => "\xc5\xb8",
+    'Zeta;' => "\xce\x96",
+    'aacute' => "\xc3\xa1",
+    'aacute;' => "\xc3\xa1",
+    'acirc' => "\xc3\xa2",
+    'acirc;' => "\xc3\xa2",
+    'acute' => "\xc2\xb4",
+    'acute;' => "\xc2\xb4",
+    'aelig' => "\xc3\xa6",
+    'aelig;' => "\xc3\xa6",
+    'agrave' => "\xc3\xa0",
+    'agrave;' => "\xc3\xa0",
+    'alefsym;' => "\xe2\x84\xb5",
+    'alpha;' => "\xce\xb1",
+    'amp' => '&',
+    'amp;' => '&',
+    'and;' => "\xe2\x88\xa7",
+    'ang;' => "\xe2\x88\xa0",
+    'apos;' => "'",
+    'aring' => "\xc3\xa5",
+    'aring;' => "\xc3\xa5",
+    'asymp;' => "\xe2\x89\x88",
+    'atilde' => "\xc3\xa3",
+    'atilde;' => "\xc3\xa3",
+    'auml' => "\xc3\xa4",
+    'auml;' => "\xc3\xa4",
+    'bdquo;' => "\xe2\x80\x9e",
+    'beta;' => "\xce\xb2",
+    'brvbar' => "\xc2\xa6",
+    'brvbar;' => "\xc2\xa6",
+    'bull;' => "\xe2\x80\xa2",
+    'cap;' => "\xe2\x88\xa9",
+    'ccedil' => "\xc3\xa7",
+    'ccedil;' => "\xc3\xa7",
+    'cedil' => "\xc2\xb8",
+    'cedil;' => "\xc2\xb8",
+    'cent' => "\xc2\xa2",
+    'cent;' => "\xc2\xa2",
+    'chi;' => "\xcf\x87",
+    'circ;' => "\xcb\x86",
+    'clubs;' => "\xe2\x99\xa3",
+    'cong;' => "\xe2\x89\x85",
+    'copy' => "\xc2\xa9",
+    'copy;' => "\xc2\xa9",
+    'crarr;' => "\xe2\x86\xb5",
+    'cup;' => "\xe2\x88\xaa",
+    'curren' => "\xc2\xa4",
+    'curren;' => "\xc2\xa4",
+    'dArr;' => "\xe2\x87\x93",
+    'dagger;' => "\xe2\x80\xa0",
+    'darr;' => "\xe2\x86\x93",
+    'deg' => "\xc2\xb0",
+    'deg;' => "\xc2\xb0",
+    'delta;' => "\xce\xb4",
+    'diams;' => "\xe2\x99\xa6",
+    'divide' => "\xc3\xb7",
+    'divide;' => "\xc3\xb7",
+    'eacute' => "\xc3\xa9",
+    'eacute;' => "\xc3\xa9",
+    'ecirc' => "\xc3\xaa",
+    'ecirc;' => "\xc3\xaa",
+    'egrave' => "\xc3\xa8",
+    'egrave;' => "\xc3\xa8",
+    'empty;' => "\xe2\x88\x85",
+    'emsp;' => "\xe2\x80\x83",
+    'ensp;' => "\xe2\x80\x82",
+    'epsilon;' => "\xce\xb5",
+    'equiv;' => "\xe2\x89\xa1",
+    'eta;' => "\xce\xb7",
+    'eth' => "\xc3\xb0",
+    'eth;' => "\xc3\xb0",
+    'euml' => "\xc3\xab",
+    'euml;' => "\xc3\xab",
+    'euro;' => "\xe2\x82\xac",
+    'exist;' => "\xe2\x88\x83",
+    'fnof;' => "\xc6\x92",
+    'forall;' => "\xe2\x88\x80",
+    'frac12' => "\xc2\xbd",
+    'frac12;' => "\xc2\xbd",
+    'frac14' => "\xc2\xbc",
+    'frac14;' => "\xc2\xbc",
+    'frac34' => "\xc2\xbe",
+    'frac34;' => "\xc2\xbe",
+    'frasl;' => "\xe2\x81\x84",
+    'gamma;' => "\xce\xb3",
+    'ge;' => "\xe2\x89\xa5",
+    'gt' => '>',
+    'gt;' => '>',
+    'hArr;' => "\xe2\x87\x94",
+    'harr;' => "\xe2\x86\x94",
+    'hearts;' => "\xe2\x99\xa5",
+    'hellip;' => "\xe2\x80\xa6",
+    'iacute' => "\xc3\xad",
+    'iacute;' => "\xc3\xad",
+    'icirc' => "\xc3\xae",
+    'icirc;' => "\xc3\xae",
+    'iexcl' => "\xc2\xa1",
+    'iexcl;' => "\xc2\xa1",
+    'igrave' => "\xc3\xac",
+    'igrave;' => "\xc3\xac",
+    'image;' => "\xe2\x84\x91",
+    'infin;' => "\xe2\x88\x9e",
+    'int;' => "\xe2\x88\xab",
+    'iota;' => "\xce\xb9",
+    'iquest' => "\xc2\xbf",
+    'iquest;' => "\xc2\xbf",
+    'isin;' => "\xe2\x88\x88",
+    'iuml' => "\xc3\xaf",
+    'iuml;' => "\xc3\xaf",
+    'kappa;' => "\xce\xba",
+    'lArr;' => "\xe2\x87\x90",
+    'lambda;' => "\xce\xbb",
+    'lang;' => "\xe3\x80\x88",
+    'laquo' => "\xc2\xab",
+    'laquo;' => "\xc2\xab",
+    'larr;' => "\xe2\x86\x90",
+    'lceil;' => "\xe2\x8c\x88",
+    'ldquo;' => "\xe2\x80\x9c",
+    'le;' => "\xe2\x89\xa4",
+    'lfloor;' => "\xe2\x8c\x8a",
+    'lowast;' => "\xe2\x88\x97",
+    'loz;' => "\xe2\x97\x8a",
+    'lrm;' => "\xe2\x80\x8e",
+    'lsaquo;' => "\xe2\x80\xb9",
+    'lsquo;' => "\xe2\x80\x98",
+    'lt' => '<',
+    'lt;' => '<',
+    'macr' => "\xc2\xaf",
+    'macr;' => "\xc2\xaf",
+    'mdash;' => "\xe2\x80\x94",
+    'micro' => "\xc2\xb5",
+    'micro;' => "\xc2\xb5",
+    'middot' => "\xc2\xb7",
+    'middot;' => "\xc2\xb7",
+    'minus;' => "\xe2\x88\x92",
+    'mu;' => "\xce\xbc",
+    'nabla;' => "\xe2\x88\x87",
+    'nbsp' => "\xc2\xa0",
+    'nbsp;' => "\xc2\xa0",
+    'ndash;' => "\xe2\x80\x93",
+    'ne;' => "\xe2\x89\xa0",
+    'ni;' => "\xe2\x88\x8b",
+    'not' => "\xc2\xac",
+    'not;' => "\xc2\xac",
+    'notin;' => "\xe2\x88\x89",
+    'nsub;' => "\xe2\x8a\x84",
+    'ntilde' => "\xc3\xb1",
+    'ntilde;' => "\xc3\xb1",
+    'nu;' => "\xce\xbd",
+    'oacute' => "\xc3\xb3",
+    'oacute;' => "\xc3\xb3",
+    'ocirc' => "\xc3\xb4",
+    'ocirc;' => "\xc3\xb4",
+    'oelig;' => "\xc5\x93",
+    'ograve' => "\xc3\xb2",
+    'ograve;' => "\xc3\xb2",
+    'oline;' => "\xe2\x80\xbe",
+    'omega;' => "\xcf\x89",
+    'omicron;' => "\xce\xbf",
+    'oplus;' => "\xe2\x8a\x95",
+    'or;' => "\xe2\x88\xa8",
+    'ordf' => "\xc2\xaa",
+    'ordf;' => "\xc2\xaa",
+    'ordm' => "\xc2\xba",
+    'ordm;' => "\xc2\xba",
+    'oslash' => "\xc3\xb8",
+    'oslash;' => "\xc3\xb8",
+    'otilde' => "\xc3\xb5",
+    'otilde;' => "\xc3\xb5",
+    'otimes;' => "\xe2\x8a\x97",
+    'ouml' => "\xc3\xb6",
+    'ouml;' => "\xc3\xb6",
+    'para' => "\xc2\xb6",
+    'para;' => "\xc2\xb6",
+    'part;' => "\xe2\x88\x82",
+    'permil;' => "\xe2\x80\xb0",
+    'perp;' => "\xe2\x8a\xa5",
+    'phi;' => "\xcf\x86",
+    'pi;' => "\xcf\x80",
+    'piv;' => "\xcf\x96",
+    'plusmn' => "\xc2\xb1",
+    'plusmn;' => "\xc2\xb1",
+    'pound' => "\xc2\xa3",
+    'pound;' => "\xc2\xa3",
+    'prime;' => "\xe2\x80\xb2",
+    'prod;' => "\xe2\x88\x8f",
+    'prop;' => "\xe2\x88\x9d",
+    'psi;' => "\xcf\x88",
+    'quot' => '"',
+    'quot;' => '"',
+    'rArr;' => "\xe2\x87\x92",
+    'radic;' => "\xe2\x88\x9a",
+    'rang;' => "\xe3\x80\x89",
+    'raquo' => "\xc2\xbb",
+    'raquo;' => "\xc2\xbb",
+    'rarr;' => "\xe2\x86\x92",
+    'rceil;' => "\xe2\x8c\x89",
+    'rdquo;' => "\xe2\x80\x9d",
+    'real;' => "\xe2\x84\x9c",
+    'reg' => "\xc2\xae",
+    'reg;' => "\xc2\xae",
+    'rfloor;' => "\xe2\x8c\x8b",
+    'rho;' => "\xcf\x81",
+    'rlm;' => "\xe2\x80\x8f",
+    'rsaquo;' => "\xe2\x80\xba",
+    'rsquo;' => "\xe2\x80\x99",
+    'sbquo;' => "\xe2\x80\x9a",
+    'scaron;' => "\xc5\xa1",
+    'sdot;' => "\xe2\x8b\x85",
+    'sect' => "\xc2\xa7",
+    'sect;' => "\xc2\xa7",
+    'shy' => "\xc2\xad",
+    'shy;' => "\xc2\xad",
+    'sigma;' => "\xcf\x83",
+    'sigmaf;' => "\xcf\x82",
+    'sim;' => "\xe2\x88\xbc",
+    'spades;' => "\xe2\x99\xa0",
+    'sub;' => "\xe2\x8a\x82",
+    'sube;' => "\xe2\x8a\x86",
+    'sum;' => "\xe2\x88\x91",
+    'sup1' => "\xc2\xb9",
+    'sup1;' => "\xc2\xb9",
+    'sup2' => "\xc2\xb2",
+    'sup2;' => "\xc2\xb2",
+    'sup3' => "\xc2\xb3",
+    'sup3;' => "\xc2\xb3",
+    'sup;' => "\xe2\x8a\x83",
+    'supe;' => "\xe2\x8a\x87",
+    'szlig' => "\xc3\x9f",
+    'szlig;' => "\xc3\x9f",
+    'tau;' => "\xcf\x84",
+    'there4;' => "\xe2\x88\xb4",
+    'theta;' => "\xce\xb8",
+    'thetasym;' => "\xcf\x91",
+    'thinsp;' => "\xe2\x80\x89",
+    'thorn' => "\xc3\xbe",
+    'thorn;' => "\xc3\xbe",
+    'tilde;' => "\xcb\x9c",
+    'times' => "\xc3\x97",
+    'times;' => "\xc3\x97",
+    'trade;' => "\xe2\x84\xa2",
+    'uArr;' => "\xe2\x87\x91",
+    'uacute' => "\xc3\xba",
+    'uacute;' => "\xc3\xba",
+    'uarr;' => "\xe2\x86\x91",
+    'ucirc' => "\xc3\xbb",
+    'ucirc;' => "\xc3\xbb",
+    'ugrave' => "\xc3\xb9",
+    'ugrave;' => "\xc3\xb9",
+    'uml' => "\xc2\xa8",
+    'uml;' => "\xc2\xa8",
+    'upsih;' => "\xcf\x92",
+    'upsilon;' => "\xcf\x85",
+    'uuml' => "\xc3\xbc",
+    'uuml;' => "\xc3\xbc",
+    'weierp;' => "\xe2\x84\x98",
+    'xi;' => "\xce\xbe",
+    'yacute' => "\xc3\xbd",
+    'yacute;' => "\xc3\xbd",
+    'yen' => "\xc2\xa5",
+    'yen;' => "\xc2\xa5",
+    'yuml' => "\xc3\xbf",
+    'yuml;' => "\xc3\xbf",
+    'zeta;' => "\xce\xb6",
+    'zwj;' => "\xe2\x80\x8d",
+    'zwnj;' => "\xe2\x80\x8c"
+  }
+
+  ENCODINGS = %w[
+      ansi_x3.4-1968
+      iso-ir-6
+      ansi_x3.4-1986
+      iso_646.irv:1991
+      ascii
+      iso646-us
+      us-ascii
+      us
+      ibm367
+      cp367
+      csascii
+      ks_c_5601-1987
+      korean
+      iso-2022-kr
+      csiso2022kr
+      euc-kr
+      iso-2022-jp
+      csiso2022jp
+      iso-2022-jp-2
+      iso-ir-58
+      chinese
+      csiso58gb231280
+      iso_8859-1:1987
+      iso-ir-100
+      iso_8859-1
+      iso-8859-1
+      latin1
+      l1
+      ibm819
+      cp819
+      csisolatin1
+      iso_8859-2:1987
+      iso-ir-101
+      iso_8859-2
+      iso-8859-2
+      latin2
+      l2
+      csisolatin2
+      iso_8859-3:1988
+      iso-ir-109
+      iso_8859-3
+      iso-8859-3
+      latin3
+      l3
+      csisolatin3
+      iso_8859-4:1988
+      iso-ir-110
+      iso_8859-4
+      iso-8859-4
+      latin4
+      l4
+      csisolatin4
+      iso_8859-6:1987
+      iso-ir-127
+      iso_8859-6
+      iso-8859-6
+      ecma-114
+      asmo-708
+      arabic
+      csisolatinarabic
+      iso_8859-7:1987
+      iso-ir-126
+      iso_8859-7
+      iso-8859-7
+      elot_928
+      ecma-118
+      greek
+      greek8
+      csisolatingreek
+      iso_8859-8:1988
+      iso-ir-138
+      iso_8859-8
+      iso-8859-8
+      hebrew
+      csisolatinhebrew
+      iso_8859-5:1988
+      iso-ir-144
+      iso_8859-5
+      iso-8859-5
+      cyrillic
+      csisolatincyrillic
+      iso_8859-9:1989
+      iso-ir-148
+      iso_8859-9
+      iso-8859-9
+      latin5
+      l5
+      csisolatin5
+      iso-8859-10
+      iso-ir-157
+      l6
+      iso_8859-10:1992
+      csisolatin6
+      latin6
+      hp-roman8
+      roman8
+      r8
+      ibm037
+      cp037
+      csibm037
+      ibm424
+      cp424
+      csibm424
+      ibm437
+      cp437
+      437
+      cspc8codepage437
+      ibm500
+      cp500
+      csibm500
+      ibm775
+      cp775
+      cspc775baltic
+      ibm850
+      cp850
+      850
+      cspc850multilingual
+      ibm852
+      cp852
+      852
+      cspcp852
+      ibm855
+      cp855
+      855
+      csibm855
+      ibm857
+      cp857
+      857
+      csibm857
+      ibm860
+      cp860
+      860
+      csibm860
+      ibm861
+      cp861
+      861
+      cp-is
+      csibm861
+      ibm862
+      cp862
+      862
+      cspc862latinhebrew
+      ibm863
+      cp863
+      863
+      csibm863
+      ibm864
+      cp864
+      csibm864
+      ibm865
+      cp865
+      865
+      csibm865
+      ibm866
+      cp866
+      866
+      csibm866
+      ibm869
+      cp869
+      869
+      cp-gr
+      csibm869
+      ibm1026
+      cp1026
+      csibm1026
+      koi8-r
+      cskoi8r
+      koi8-u
+      big5-hkscs
+      ptcp154
+      csptcp154
+      pt154
+      cp154
+      utf-7
+      utf-16be
+      utf-16le
+      utf-16
+      utf-8
+      iso-8859-13
+      iso-8859-14
+      iso-ir-199
+      iso_8859-14:1998
+      iso_8859-14
+      latin8
+      iso-celtic
+      l8
+      iso-8859-15
+      iso_8859-15
+      iso-8859-16
+      iso-ir-226
+      iso_8859-16:2001
+      iso_8859-16
+      latin10
+      l10
+      gbk
+      cp936
+      ms936
+      gb18030
+      shift_jis
+      ms_kanji
+      csshiftjis
+      euc-jp
+      gb2312
+      big5
+      csbig5
+      windows-1250
+      windows-1251
+      windows-1252
+      windows-1253
+      windows-1254
+      windows-1255
+      windows-1256
+      windows-1257
+      windows-1258
+      tis-620
+      hz-gb-2312
+  ]
+
+end
--- a/vendor/plugins/HTML5lib/lib/html5/filters.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/filters.rb
@ -0,0 +1 @@
+require 'html5/filters/optionaltags'
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters/base.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/filters/base.rb
@ -1,7 +1,7 @@
 require 'delegate'
 require 'enumerator'

-module HTML5lib
+module HTML5
  module Filters
    class Base < SimpleDelegator
      include Enumerable
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters/inject_meta_charset.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/filters/inject_meta_charset.rb
@ -1,6 +1,6 @@
-require 'html5lib/filters/base'
+require 'html5/filters/base'

-module HTML5lib
+module HTML5
  module Filters
    class InjectMetaCharset < Base
      def initialize(source, encoding)
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters/optionaltags.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/filters/optionaltags.rb
@ -1,7 +1,7 @@
-require 'html5lib/constants'
-require 'html5lib/filters/base'
+require 'html5/constants'
+require 'html5/filters/base'

-module HTML5lib
+module HTML5
  module Filters

    class OptionalTagFilter < Base
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters/sanitizer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/filters/sanitizer.rb
@ -1,7 +1,7 @@
-require 'html5lib/filters/base'
-require 'html5lib/sanitizer'
+require 'html5/filters/base'
+require 'html5/sanitizer'

-module HTML5lib
+module HTML5
  module Filters
    class HTMLSanitizeFilter < Base
      include HTMLSanitizeModule
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters/whitespace.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/filters/whitespace.rb
@ -1,7 +1,7 @@
-require 'html5lib/constants'
-require 'html5lib/filters/base'
+require 'html5/constants'
+require 'html5/filters/base'

-module HTML5lib
+module HTML5
  module Filters
    class WhitespaceFilter < Base

--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser.rb
@ -1,246 +1,246 @@
-require 'html5lib/constants'
-require 'html5lib/tokenizer'
-require 'html5lib/treebuilders/rexml'
-
-Dir.glob(File.join(File.dirname(__FILE__), 'html5parser', '*_phase.rb')).each do |path|
-  require 'html5lib/html5parser/' + File.basename(path)
-end
-
-module HTML5lib
-
-  # Error in parsed document
-  class ParseError < Exception; end
-  class AssertionError < Exception; end
-
-  # HTML parser. Generates a tree structure from a stream of (possibly malformed) HTML
-  #
-  class HTMLParser
-
-    attr_accessor :phase, :firstStartTag, :innerHTML, :lastPhase, :insertFromTable
-
-    attr_reader :phases, :tokenizer, :tree, :errors
-
-    def self.parse(stream, options = {})
-      encoding = options.delete(:encoding)
-      new(options).parse(stream,encoding)
-    end
-
-    def self.parseFragment(stream, options = {})
-      container = options.delete(:container) || 'div'
-      encoding = options.delete(:encoding)
-      new(options).parseFragment(stream,container,encoding)
-    end
-
-    @@phases = %w( initial rootElement beforeHead inHead afterHead inBody inTable inCaption
-      inColumnGroup inTableBody inRow inCell inSelect afterBody inFrameset afterFrameset trailingEnd )
-
-    # :strict - raise an exception when a parse error is encountered
-    # :tree - a treebuilder class controlling the type of tree that will be
-    # returned. Built in treebuilders can be accessed through
-    # HTML5lib::TreeBuilders[treeType]
-    def initialize(options = {})
-      @strict = false
-      @errors = []
-     
-      @tokenizer =  HTMLTokenizer
-      @tree = TreeBuilders::REXML::TreeBuilder
- 
-      options.each { |name, value| instance_variable_set("@#{name}", value) }
-
-      @tree = @tree.new
-
-      @phases = @@phases.inject({}) do |phases, phase_name|
-        phase_class_name = phase_name.sub(/(.)/) { $1.upcase } + 'Phase'
-        phases[phase_name.to_sym] = HTML5lib.const_get(phase_class_name).new(self, @tree)
-        phases 
-      end
-    end
-
-    def _parse(stream, innerHTML, encoding, container = 'div')
-      @tree.reset
-      @firstStartTag = false
-      @errors = []
-
-      @tokenizer = @tokenizer.class unless Class === @tokenizer
-      @tokenizer = @tokenizer.new(stream, :encoding => encoding,
-        :parseMeta => !innerHTML)
-
-      if innerHTML
-        case @innerHTML = container.downcase
-          when 'title', 'textarea'
-            @tokenizer.contentModelFlag = :RCDATA
-          when 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript'
-            @tokenizer.contentModelFlag = :CDATA
-          when 'plaintext'
-            @tokenizer.contentModelFlag = :PLAINTEXT
-          else
-          # contentModelFlag already is PCDATA
-          #@tokenizer.contentModelFlag = :PCDATA
-        end
-      
-        @phase = @phases[:rootElement]
-        @phase.insertHtmlElement
-        resetInsertionMode
-      else
-        @innerHTML = false
-        @phase = @phases[:initial]
-      end
-
-      # We only seem to have InBodyPhase testcases where the following is
-      # relevant ... need others too
-      @lastPhase = nil
-
-      # XXX This is temporary for the moment so there isn't any other
-      # changes needed for the parser to work with the iterable tokenizer
-      @tokenizer.each do |token|
-        token = normalizeToken(token)
-
-        method = 'process%s' % token[:type]
-
-        case token[:type]
-          when :Characters, :SpaceCharacters, :Comment
-            @phase.send method, token[:data]
-          when :StartTag
-            @phase.send method, token[:name], token[:data]
-          when :EndTag
-            @phase.send method, token[:name]
-          when :Doctype
-            @phase.send method, token[:name], token[:publicId],
-              token[:systemId], token[:correct]
-          else
-            parseError(token[:data])
-        end
-      end
-
-      # When the loop finishes it's EOF
-      @phase.processEOF
-    end
-
-    # Parse a HTML document into a well-formed tree
-    #
-    # stream - a filelike object or string containing the HTML to be parsed
-    #
-    # The optional encoding parameter must be a string that indicates
-    # the encoding.  If specified, that encoding will be used,
-    # regardless of any BOM or later declaration (such as in a meta
-    # element)
-    def parse(stream, encoding=nil)
-      _parse(stream, false, encoding)
-      return @tree.getDocument
-    end
-  
-    # Parse a HTML fragment into a well-formed tree fragment
-    
-    # container - name of the element we're setting the innerHTML property
-    # if set to nil, default to 'div'
-    #
-    # stream - a filelike object or string containing the HTML to be parsed
-    #
-    # The optional encoding parameter must be a string that indicates
-    # the encoding.  If specified, that encoding will be used,
-    # regardless of any BOM or later declaration (such as in a meta
-    # element)
-    def parseFragment(stream, container='div', encoding=nil)
-      _parse(stream, true, encoding, container)
-      return @tree.getFragment
-    end
-
-    def parseError(data = 'XXX ERROR MESSAGE NEEDED')
-      # XXX The idea is to make data mandatory.
-      @errors.push([@tokenizer.stream.position, data])
-      raise ParseError if @strict
-    end
-
-    # HTML5 specific normalizations to the token stream
-    def normalizeToken(token)
-
-      if token[:type] == :EmptyTag
-        # When a solidus (/) is encountered within a tag name what happens
-        # depends on whether the current tag name matches that of a void
-        # element.  If it matches a void element atheists did the wrong
-        # thing and if it doesn't it's wrong for everyone.
-
-        unless VOID_ELEMENTS.include?(token[:name])
-          parseError(_('Solidus (/) incorrectly placed in tag.'))
-        end
-
-        token[:type] = :StartTag
-      end
-
-      if token[:type] == :StartTag
-        token[:name] = token[:name].tr(ASCII_UPPERCASE,ASCII_LOWERCASE)
-
-        # We need to remove the duplicate attributes and convert attributes
-        # to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
-
-        unless token[:data].empty?
-          data = token[:data].reverse.map { |attr, value| [attr.tr(ASCII_UPPERCASE, ASCII_LOWERCASE), value] }
-          token[:data] = Hash[*data.flatten]
-        end
-
-      elsif token[:type] == :EndTag
-        parseError(_('End tag contains unexpected attributes.')) unless token[:data].empty?
-        token[:name] = token[:name].downcase
-      end
-
-      return token
-    end
-
-    @@new_modes = {
-      'select' => :inSelect,
-      'td' => :inCell,
-      'th' => :inCell,
-      'tr' => :inRow,
-      'tbody' => :inTableBody,
-      'thead' => :inTableBody,
-      'tfoot' => :inTableBody,
-      'caption' => :inCaption,
-      'colgroup' => :inColumnGroup,
-      'table' => :inTable,
-      'head' => :inBody,
-      'body' => :inBody,
-      'frameset' => :inFrameset
-    }
-
-    def resetInsertionMode
-      # The name of this method is mostly historical. (It's also used in the
-      # specification.)
-      last = false
-
-      @tree.openElements.reverse.each do |node|
-        nodeName = node.name
-
-        if node == @tree.openElements[0]
-          last = true
-          unless ['td', 'th'].include?(nodeName)
-            # XXX
-            # assert @innerHTML
-            nodeName = @innerHTML
-          end
-        end
-
-        # Check for conditions that should only happen in the innerHTML
-        # case
-        if ['select', 'colgroup', 'head', 'frameset'].include?(nodeName)
-          # XXX
-          # assert @innerHTML
-        end
-
-        if @@new_modes.has_key?(nodeName)
-          @phase = @phases[@@new_modes[nodeName]]
-        elsif nodeName == 'html'
-          @phase = @phases[@tree.headPointer.nil?? :beforeHead : :afterHead]
-        elsif last
-          @phase = @phases[:inBody]
-        else
-          next
-        end
-
-        break
-      end
-    end
-
-    def _(string); string; end
-  end
-
-end
+require 'html5/constants'
+require 'html5/tokenizer'
+require 'html5/treebuilders/rexml'
+
+Dir.glob(File.join(File.dirname(__FILE__), 'html5parser', '*_phase.rb')).each do |path|
+  require 'html5/html5parser/' + File.basename(path)
+end
+
+module HTML5
+
+  # Error in parsed document
+  class ParseError < Exception; end
+  class AssertionError < Exception; end
+
+  # HTML parser. Generates a tree structure from a stream of (possibly malformed) HTML
+  #
+  class HTMLParser
+
+    attr_accessor :phase, :firstStartTag, :innerHTML, :lastPhase, :insertFromTable
+
+    attr_reader :phases, :tokenizer, :tree, :errors
+
+    def self.parse(stream, options = {})
+      encoding = options.delete(:encoding)
+      new(options).parse(stream,encoding)
+    end
+
+    def self.parseFragment(stream, options = {})
+      container = options.delete(:container) || 'div'
+      encoding = options.delete(:encoding)
+      new(options).parseFragment(stream,container,encoding)
+    end
+
+    @@phases = %w( initial rootElement beforeHead inHead afterHead inBody inTable inCaption
+      inColumnGroup inTableBody inRow inCell inSelect afterBody inFrameset afterFrameset trailingEnd )
+
+    # :strict - raise an exception when a parse error is encountered
+    # :tree - a treebuilder class controlling the type of tree that will be
+    # returned. Built in treebuilders can be accessed through
+    # HTML5::TreeBuilders[treeType]
+    def initialize(options = {})
+      @strict = false
+      @errors = []
+     
+      @tokenizer =  HTMLTokenizer
+      @tree = TreeBuilders::REXML::TreeBuilder
+ 
+      options.each { |name, value| instance_variable_set("@#{name}", value) }
+
+      @tree = @tree.new
+
+      @phases = @@phases.inject({}) do |phases, phase_name|
+        phase_class_name = phase_name.sub(/(.)/) { $1.upcase } + 'Phase'
+        phases[phase_name.to_sym] = HTML5.const_get(phase_class_name).new(self, @tree)
+        phases 
+      end
+    end
+
+    def _parse(stream, innerHTML, encoding, container = 'div')
+      @tree.reset
+      @firstStartTag = false
+      @errors = []
+
+      @tokenizer = @tokenizer.class unless Class === @tokenizer
+      @tokenizer = @tokenizer.new(stream, :encoding => encoding,
+        :parseMeta => !innerHTML)
+
+      if innerHTML
+        case @innerHTML = container.downcase
+          when 'title', 'textarea'
+            @tokenizer.contentModelFlag = :RCDATA
+          when 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript'
+            @tokenizer.contentModelFlag = :CDATA
+          when 'plaintext'
+            @tokenizer.contentModelFlag = :PLAINTEXT
+          else
+          # contentModelFlag already is PCDATA
+          #@tokenizer.contentModelFlag = :PCDATA
+        end
+      
+        @phase = @phases[:rootElement]
+        @phase.insertHtmlElement
+        resetInsertionMode
+      else
+        @innerHTML = false
+        @phase = @phases[:initial]
+      end
+
+      # We only seem to have InBodyPhase testcases where the following is
+      # relevant ... need others too
+      @lastPhase = nil
+
+      # XXX This is temporary for the moment so there isn't any other
+      # changes needed for the parser to work with the iterable tokenizer
+      @tokenizer.each do |token|
+        token = normalizeToken(token)
+
+        method = 'process%s' % token[:type]
+
+        case token[:type]
+          when :Characters, :SpaceCharacters, :Comment
+            @phase.send method, token[:data]
+          when :StartTag
+            @phase.send method, token[:name], token[:data]
+          when :EndTag
+            @phase.send method, token[:name]
+          when :Doctype
+            @phase.send method, token[:name], token[:publicId],
+              token[:systemId], token[:correct]
+          else
+            parseError(token[:data])
+        end
+      end
+
+      # When the loop finishes it's EOF
+      @phase.processEOF
+    end
+
+    # Parse a HTML document into a well-formed tree
+    #
+    # stream - a filelike object or string containing the HTML to be parsed
+    #
+    # The optional encoding parameter must be a string that indicates
+    # the encoding.  If specified, that encoding will be used,
+    # regardless of any BOM or later declaration (such as in a meta
+    # element)
+    def parse(stream, encoding=nil)
+      _parse(stream, false, encoding)
+      return @tree.getDocument
+    end
+  
+    # Parse a HTML fragment into a well-formed tree fragment
+    
+    # container - name of the element we're setting the innerHTML property
+    # if set to nil, default to 'div'
+    #
+    # stream - a filelike object or string containing the HTML to be parsed
+    #
+    # The optional encoding parameter must be a string that indicates
+    # the encoding.  If specified, that encoding will be used,
+    # regardless of any BOM or later declaration (such as in a meta
+    # element)
+    def parseFragment(stream, container='div', encoding=nil)
+      _parse(stream, true, encoding, container)
+      return @tree.getFragment
+    end
+
+    def parseError(data = 'XXX ERROR MESSAGE NEEDED')
+      # XXX The idea is to make data mandatory.
+      @errors.push([@tokenizer.stream.position, data])
+      raise ParseError if @strict
+    end
+
+    # HTML5 specific normalizations to the token stream
+    def normalizeToken(token)
+
+      if token[:type] == :EmptyTag
+        # When a solidus (/) is encountered within a tag name what happens
+        # depends on whether the current tag name matches that of a void
+        # element.  If it matches a void element atheists did the wrong
+        # thing and if it doesn't it's wrong for everyone.
+
+        unless VOID_ELEMENTS.include?(token[:name])
+          parseError(_('Solidus (/) incorrectly placed in tag.'))
+        end
+
+        token[:type] = :StartTag
+      end
+
+      if token[:type] == :StartTag
+        token[:name] = token[:name].tr(ASCII_UPPERCASE,ASCII_LOWERCASE)
+
+        # We need to remove the duplicate attributes and convert attributes
+        # to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
+
+        unless token[:data].empty?
+          data = token[:data].reverse.map { |attr, value| [attr.tr(ASCII_UPPERCASE, ASCII_LOWERCASE), value] }
+          token[:data] = Hash[*data.flatten]
+        end
+
+      elsif token[:type] == :EndTag
+        parseError(_('End tag contains unexpected attributes.')) unless token[:data].empty?
+        token[:name] = token[:name].downcase
+      end
+
+      return token
+    end
+
+    @@new_modes = {
+      'select' => :inSelect,
+      'td' => :inCell,
+      'th' => :inCell,
+      'tr' => :inRow,
+      'tbody' => :inTableBody,
+      'thead' => :inTableBody,
+      'tfoot' => :inTableBody,
+      'caption' => :inCaption,
+      'colgroup' => :inColumnGroup,
+      'table' => :inTable,
+      'head' => :inBody,
+      'body' => :inBody,
+      'frameset' => :inFrameset
+    }
+
+    def resetInsertionMode
+      # The name of this method is mostly historical. (It's also used in the
+      # specification.)
+      last = false
+
+      @tree.openElements.reverse.each do |node|
+        nodeName = node.name
+
+        if node == @tree.openElements[0]
+          last = true
+          unless ['td', 'th'].include?(nodeName)
+            # XXX
+            # assert @innerHTML
+            nodeName = @innerHTML
+          end
+        end
+
+        # Check for conditions that should only happen in the innerHTML
+        # case
+        if ['select', 'colgroup', 'head', 'frameset'].include?(nodeName)
+          # XXX
+          # assert @innerHTML
+        end
+
+        if @@new_modes.has_key?(nodeName)
+          @phase = @phases[@@new_modes[nodeName]]
+        elsif nodeName == 'html'
+          @phase = @phases[@tree.headPointer.nil?? :beforeHead : :afterHead]
+        elsif last
+          @phase = @phases[:inBody]
+        else
+          next
+        end
+
+        break
+      end
+    end
+
+    def _(string); string; end
+  end
+
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_body_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_body_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class AfterBodyPhase < Phase

    handle_end 'html'
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_frameset_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_frameset_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class AfterFramesetPhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#after3
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_head_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_head_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class AfterHeadPhase < Phase
  
    handle_start 'html', 'body', 'frameset', %w( base link meta script style title ) => 'FromHead'
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/before_head_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/before_head_phase.rb
@ -1,11 +1,11 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class BeforeHeadPhase < Phase

    handle_start 'html', 'head'

-    handle_end %w( html head body br ) => 'ImplyHead'
+    handle_end %w( html head body br p ) => 'ImplyHead'

    def processEOF
      startTagHead('head', {})
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_body_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_body_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InBodyPhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#in-body
@ -112,7 +112,7 @@ module HTML5lib

    def startTagForm(name, attributes)
      if @tree.formPointer
-        @parser.parseError('Unexpected start tag (form). Ignored.')
+        @parser.parseError(_('Unexpected start tag (form). Ignored.'))
      else
        endTagP('p') if in_scope?('p')
        @tree.insertElement(name, attributes)
@ -129,9 +129,9 @@ module HTML5lib
        if stopName.include?(node.name)
          poppedNodes = (0..i).collect { @tree.openElements.pop }
          if i >= 1
-            @parser.parseError("Missing end tag%s (%s)" % [
+            @parser.parseError(_("Missing end tag%s (%s)" % [
              (i>1 ? 's' : ''),
-              poppedNodes.reverse.map {|item| item.name}.join(', ')])
+              poppedNodes.reverse.map {|item| item.name}.join(', ')]))
          end
          break
        end
@ -251,7 +251,7 @@ module HTML5lib
    end

    def startTagIsindex(name, attributes)
-      @parser.parseError("Unexpected start tag isindex. Don't use it!")
+      @parser.parseError(_("Unexpected start tag isindex. Don't use it!"))
      return if @tree.formPointer
      processStartTag('form', {})
      processStartTag('hr', {})
@ -311,8 +311,13 @@ module HTML5lib

    def endTagP(name)
      @tree.generateImpliedEndTags('p') if in_scope?('p')
-      @parser.parseError('Unexpected end tag (p).') unless @tree.openElements[-1].name == 'p'
-      @tree.openElements.pop while in_scope?('p')
+      @parser.parseError(_('Unexpected end tag (p).')) unless @tree.openElements[-1].name == 'p'
+      if in_scope?('p')
+        @tree.openElements.pop while in_scope?('p')
+      else
+        startTagCloseP('p', {})
+        endTagP('p')
+      end
    end

    def endTagBody(name)
@ -342,7 +347,7 @@ module HTML5lib
      @tree.generateImpliedEndTags if in_scope?(name)

      unless @tree.openElements[-1].name == name
-        @parser.parseError(("End tag (#{name}) seen too early. Expected other end tag."))
+        @parser.parseError(_("End tag (#{name}) seen too early. Expected other end tag."))
      end

      if in_scope?(name)
@ -351,7 +356,14 @@ module HTML5lib
    end

    def endTagForm(name)
-      endTagBlock(name)
+      if in_scope?(name)
+        @tree.generateImpliedEndTags
+      end
+      if @tree.openElements[-1].name != name
+        @parser.parseError(_("End tag (form) seen too early. Ignored."))
+      else
+        @tree.openElements.pop
+      end
      @tree.formPointer = nil
    end

@ -361,7 +373,7 @@ module HTML5lib
        @tree.generateImpliedEndTags(name)

        unless @tree.openElements[-1].name == name
-          @parser.parseError(("End tag (#{name}) seen too early. Expected other end tag."))
+          @parser.parseError(_("End tag (#{name}) seen too early. Expected other end tag."))
        end
      end

@ -377,7 +389,7 @@ module HTML5lib
      end

      unless @tree.openElements[-1].name == name
-        @parser.parseError(("Unexpected end tag (#{name}). Expected other end tag."))
+        @parser.parseError(_("Unexpected end tag (#{name}). Expected other end tag."))
      end

      HEADING_ELEMENTS.each do |element|
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_caption_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_caption_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InCaptionPhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#in-caption
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_cell_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_cell_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InCellPhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#in-cell
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_column_group_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_column_group_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InColumnGroupPhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#in-column
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_frameset_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_frameset_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InFramesetPhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_head_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_head_phase.rb
@ -1,12 +1,12 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InHeadPhase < Phase

    handle_start 'html', 'head', 'title', 'style', 'script', %w( base link meta )

    handle_end 'head'
-    handle_end %w( html body br ) => 'ImplyAfterHead'
+    handle_end %w( html body br p ) => 'ImplyAfterHead'
    handle_end %w( title style script )

    def processEOF
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_row_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_row_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InRowPhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#in-row
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_select_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_select_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InSelectPhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#in-select
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_body_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_body_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InTableBodyPhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#in-table0
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InTablePhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#in-table
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/initial_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/initial_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InitialPhase < Phase

    # This phase deals with error handling as well which is currently not
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/phase.rb
@ -1,4 +1,4 @@
-module HTML5lib
+module HTML5
  # Base class for helper objects that implement each phase of processing.
  #
  # Handler methods should be in the following order (they can be omitted):
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/root_element_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/root_element_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class RootElementPhase < Phase

    def processEOF
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/trailing_end_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/trailing_end_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class TrailingEndPhase < Phase

    def processEOF
--- a/vendor/plugins/HTML5lib/lib/html5lib/inputstream.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/inputstream.rb
@ -1,7 +1,7 @@
 require 'stringio'
-require 'html5lib/constants'
+require 'html5/constants'

-module HTML5lib
+module HTML5

  # Provides a unicode stream of characters to the HTMLTokenizer.

@ -10,7 +10,7 @@ module HTML5lib

  class HTMLInputStream

-    attr_accessor :queue, :char_encoding
+    attr_accessor :queue, :char_encoding, :errors

    # Initialises the HTMLInputStream.
    # 
@ -40,25 +40,31 @@ module HTML5lib
      #Number of bytes to use when looking for a meta element with
      #encoding information
      @NUM_BYTES_META = 512
+      #Number of bytes to use when using detecting encoding using chardet
+      @NUM_BYTES_CHARDET = 256
+      #Number of bytes to use when reading content
+      @NUM_BYTES_BUFFER = 1024
+
      #Encoding to use if no other information can be found
      @DEFAULT_ENCODING = 'windows-1252'
    
      #Detect encoding iff no explicit "transport level" encoding is supplied
-      if @encoding.nil? or not HTML5lib.is_valid_encoding(@encoding)
+      if @encoding.nil? or not HTML5.is_valid_encoding(@encoding)
        @char_encoding = detect_encoding
      else
        @char_encoding = @encoding
      end

      # Read bytes from stream decoding them into Unicode
-      uString = @raw_stream.read
+      @buffer = @raw_stream.read(@NUM_BYTES_BUFFER) || ''
      if @char_encoding == 'windows-1252'
        @win1252 = true
      elsif @char_encoding != 'utf-8'
        begin
          require 'iconv'
          begin
-            uString = Iconv.iconv('utf-8', @char_encoding, uString).first
+            @buffer << @raw_stream.read unless @raw_stream.eof?
+            @buffer = Iconv.iconv('utf-8', @char_encoding, @buffer).first
          rescue
            @win1252 = true
          end
@ -67,10 +73,8 @@ module HTML5lib
        end
      end

-      # Convert the unicode string into a list to be used as the data stream
-      @data_stream = uString
-
      @queue = []
+      @errors = []

      # Reset position in the list to read from
      @tell = 0
@ -109,9 +113,22 @@ module HTML5lib
        begin
          require 'rubygems'
          require 'UniversalDetector' # gem install chardet
-          buffer = @raw_stream.read
-          encoding = UniversalDetector::chardet(buffer)['encoding']
-          seek(buffer, 0)
+          buffers = []
+          detector = UniversalDetector::Detector.instance
+          detector.reset
+          until @raw_stream.eof?
+            buffer = @raw_stream.read(@NUM_BYTES_CHARDET)
+            break if !buffer or buffer.empty?
+            buffers << buffer
+            detector.feed(buffer)
+            break if detector.instance_eval {@done}
+            detector.instance_eval {
+              @_mLastChar = @_mLastChar.chr if Fixnum === @_mLastChar
+            }
+          end
+          detector.close
+          encoding = detector.result['encoding']
+          seek(buffers*'', 0)
        rescue LoadError
        end
      end
@ -242,14 +259,20 @@ module HTML5lib
      unless @queue.empty?
        return @queue.shift
      else
-        c = @data_stream[@tell]
+        if @tell + 3 > @buffer.length and !@raw_stream.eof?
+          # read next block
+          @buffer = @buffer[@tell .. -1] + @raw_stream.read(@NUM_BYTES_BUFFER)
+          @tell = 0
+        end
+
+        c = @buffer[@tell]
        @tell += 1

        case c
        when 0x01 .. 0x7F
          if c == 0x0D
            # normalize newlines
-            @tell += 1 if @data_stream[@tell] == 0x0A
+            @tell += 1 if @buffer[@tell] == 0x0A
            c = 0x0A
          end

@ -276,7 +299,7 @@ module HTML5lib
        when 0xC0 .. 0xFF
          if @win1252
            "\xC3" + (c-64).chr # convert to utf-8
-          elsif @data_stream[@tell-1 .. -1] =~ /^
+          elsif @buffer[@tell-1 .. @tell+3] =~ /^
                ( [\xC2-\xDF][\x80-\xBF]             # non-overlong 2-byte
                |  \xE0[\xA0-\xBF][\x80-\xBF]        # excluding overlongs
                | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}  # straight 3-byte
@ -292,6 +315,8 @@ module HTML5lib
          end

        when 0x00
+          @errors.push('null character found in input stream, ' +
+            'replaced with U+FFFD')
          [0xFFFD].pack('U') # null characters are invalid

        else
@ -317,6 +342,10 @@ module HTML5lib
      @queue.insert(0, c) unless c == :EOF
      return char_stack.join('')
    end
+
+    def unget(characters)
+      @queue.unshift(*characters.to_a) unless characters == :EOF
+    end
  end

  # String-like object with an assosiated position and various extra methods
@ -433,14 +462,14 @@ module HTML5lib
        
        if attr[0] == 'charset'
          tentative_encoding = attr[1]
-          if HTML5lib.is_valid_encoding(tentative_encoding)
+          if HTML5.is_valid_encoding(tentative_encoding)
            @encoding = tentative_encoding  
            return false
          end
        elsif attr[0] == 'content'
          content_parser = ContentAttrParser.new(EncodingBytes.new(attr[1]))
          tentative_encoding = content_parser.parse
-          if HTML5lib.is_valid_encoding(tentative_encoding)
+          if HTML5.is_valid_encoding(tentative_encoding)
            @encoding = tentative_encoding
            return false
          end
--- a/vendor/plugins/HTML5lib/lib/html5lib/liberalxmlparser.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/liberalxmlparser.rb
@ -11,10 +11,10 @@
 # 
 # @@TODO:
 # * Selectively lowercase only XHTML, but not foreign markup
-require 'html5lib/html5parser'
-require 'html5lib/constants'
+require 'html5/html5parser'
+require 'html5/constants'

-module HTML5lib
+module HTML5

  # liberal XML parser
  class XMLParser < HTMLParser
@ -25,25 +25,35 @@ module HTML5lib
    end

    def normalizeToken(token)
-      if token[:type] == :StartTag or token[:type] == :EmptyTag
+      case token[:type]
+      when :StartTag, :EmptyTag
        # We need to remove the duplicate attributes and convert attributes
-        # to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
+        # to a Hash so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}

        token[:data] = Hash[*token[:data].reverse.flatten]

        # For EmptyTags, process both a Start and an End tag
        if token[:type] == :EmptyTag
+          save = @tokenizer.contentModelFlag
          @phase.processStartTag(token[:name], token[:data])
+          @tokenizer.contentModelFlag = save
          token[:data] = {}
          token[:type] = :EndTag
        end

-      elsif token[:type] == :EndTag
+      when :Characters
+        # un-escape RCDATA_ELEMENTS (e.g. style, script)
+        if @tokenizer.contentModelFlag == :CDATA
+          token[:data] = token[:data].
+            gsub('&lt;','<').gsub('&gt;','>').gsub('&amp;','&')
+        end
+
+      when :EndTag
        if token[:data]
           parseError(_("End tag contains unexpected attributes."))
        end

-      elsif token[:type] == :Comment
+      when :Comment
        # Rescue CDATA from the comments
        if token[:data][0..6] == "[CDATA[" and token[:data][-2..-1] == "]]"
          token[:type] = :Characters
--- a/vendor/plugins/HTML5lib/lib/html5lib/sanitizer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/sanitizer.rb
@ -1,6 +1,7 @@
 require 'cgi'
+require 'html5/tokenizer'

-module HTML5lib
+module HTML5

 # This module provides sanitization of XHTML+MathML+SVG
 # and of inline style attributes.
--- a/vendor/plugins/HTML5lib/lib/html5/serializer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/serializer.rb
@ -0,0 +1,2 @@
+require 'html5/serializer/htmlserializer'
+require 'html5/serializer/xhtmlserializer'
--- a/vendor/plugins/HTML5lib/lib/html5lib/serializer/htmlserializer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/serializer/htmlserializer.rb
@ -1,6 +1,6 @@
-require 'html5lib/constants'
+require 'html5/constants'

-module HTML5lib
+module HTML5

  class HTMLSerializer

@ -21,6 +21,7 @@ module HTML5lib
      @use_trailing_solidus = false
      @space_before_trailing_solidus = true
      @escape_lt_in_attrs = false
+      @escape_rcdata = false

      @omit_optional_tags = true
      @sanitize = false
@ -43,22 +44,22 @@ module HTML5lib
      @errors = []

      if encoding and @inject_meta_charset
-        require 'html5lib/filters/inject_meta_charset'
+        require 'html5/filters/inject_meta_charset'
        treewalker = Filters::InjectMetaCharset.new(treewalker, encoding)
      end

      if @strip_whitespace
-        require 'html5lib/filters/whitespace'
+        require 'html5/filters/whitespace'
        treewalker = Filters::WhitespaceFilter.new(treewalker)
      end

      if @sanitize
-        require 'html5lib/filters/sanitizer'
+        require 'html5/filters/sanitizer'
        treewalker = Filters::HTMLSanitizeFilter.new(treewalker)
      end

      if @omit_optional_tags
-        require 'html5lib/filters/optionaltags'
+        require 'html5/filters/optionaltags'
        treewalker = Filters::OptionalTagFilter.new(treewalker)
      end

@ -81,7 +82,7 @@ module HTML5lib

        elsif [:StartTag, :EmptyTag].include? type
          name = token[:name]
-          if RCDATA_ELEMENTS.include?(name)
+          if RCDATA_ELEMENTS.include?(name) and not @escape_rcdata
            in_cdata = true
          elsif in_cdata
            serializeError(_("Unexpected child element of a CDATA element"))
--- a/vendor/plugins/HTML5lib/lib/html5lib/serializer/xhtmlserializer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/serializer/xhtmlserializer.rb
@ -1,6 +1,6 @@
-require 'html5lib/serializer/htmlserializer'
+require 'html5/serializer/htmlserializer'

-module HTML5lib
+module HTML5

  class XHTMLSerializer < HTMLSerializer
    DEFAULTS = {
@ -8,7 +8,8 @@ module HTML5lib
      :minimize_boolean_attributes => false,
      :use_trailing_solidus => true,
      :escape_lt_in_attrs => true,
-      :omit_optional_tags => false
+      :omit_optional_tags => false,
+      :escape_rcdata => true
    }

    def initialize(options={})
--- a/vendor/plugins/HTML5lib/lib/html5lib/tokenizer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/tokenizer.rb
@ -1,7 +1,7 @@
-require 'html5lib/constants'
-require 'html5lib/inputstream'
+require 'html5/constants'
+require 'html5/inputstream'

-module HTML5lib
+module HTML5

  # This class takes care of tokenizing HTML.
  #
@ -84,9 +84,9 @@ module HTML5lib
      # Start processing. When EOF is reached @state will return false
      # instead of true and the loop will terminate.
      while send @state
-        while not @tokenQueue.empty?
-          yield @tokenQueue.shift
-        end
+        yield :type => :ParseError, :data => @stream.errors.shift until
+          @stream.errors.empty?
+        yield @tokenQueue.shift until @tokenQueue.empty?
      end
    end

@ -109,7 +109,7 @@ module HTML5lib

      # The character we just consumed need to be put back on the stack so it
      # doesn't get lost...
-      @stream.queue.push(data)
+      @stream.unget(data)
    end

    # This function returns either U+FFFD or the character based on the
@ -128,7 +128,6 @@ module HTML5lib
        radix = 16
      end

-      char = [0xFFFD].pack('U')
      charStack = []

      # Consume all the characters that are in range while making sure we
@ -142,17 +141,25 @@ module HTML5lib
      # Convert the set of characters consumed to an int.
      charAsInt = charStack.join('').to_i(radix)

-      # If the integer is between 127 and 160 (so 128 and bigger and 159 and
-      # smaller) we need to do the "windows trick".
-      if (127...160).include? charAsInt
+      if charAsInt == 13
+        @tokenQueue.push({:type => :ParseError, :data =>
+          _("Incorrect CR newline entity. Replaced with LF.")})
+        charAsInt = 10
+      elsif (128..159).include? charAsInt
+        # If the integer is between 127 and 160 (so 128 and bigger and 159
+        # and smaller) we need to do the "windows trick".
        @tokenQueue.push({:type => :ParseError, :data =>
          _("Entity used with illegal number (windows-1252 reference).")})

        charAsInt = ENTITIES_WINDOWS1252[charAsInt - 128]
      end

-      if charAsInt > 0 and charAsInt <= 1114111
+      if 0 < charAsInt and charAsInt <= 1114111 and not (55296 <= charAsInt and charAsInt <= 57343)
        char = [charAsInt].pack('U')
+      else
+        char = [0xFFFD].pack('U')
+        @tokenQueue.push({:type => :ParseError, :data =>
+          _("Numeric entity represents an illegal codepoint.")})
      end

      # Discard the ; if present. Otherwise, put it back on the queue and
@ -160,18 +167,18 @@ module HTML5lib
      if c != ";"
        @tokenQueue.push({:type => :ParseError, :data =>
          _("Numeric entity didn't end with ';'.")})
-        @stream.queue.push(c)
+        @stream.unget(c)
      end

      return char
    end

-    def consumeEntity
+    def consumeEntity(from_attribute=false)
      char = nil
      charStack = [@stream.char]
      if SPACE_CHARACTERS.include?(charStack[0]) or 
        [:EOF, '<', '&'].include?(charStack[0])
-        @stream.queue+= charStack
+        @stream.unget(charStack)
      elsif charStack[0] == "#"
        # We might have a number entity here.
        charStack += [@stream.char, @stream.char]
@ -179,22 +186,22 @@ module HTML5lib
          # If we reach the end of the file put everything up to :EOF
          # back in the queue
          charStack = charStack[0...charStack.index(:EOF)]
-          @stream.queue+= charStack
+          @stream.unget(charStack)
          @tokenQueue.push({:type => :ParseError, :data =>
            _("Numeric entity expected. Got end of file instead.")})
        else
          if charStack[1].downcase == "x" \
            and HEX_DIGITS.include? charStack[2]
            # Hexadecimal entity detected.
-            @stream.queue.push(charStack[2])
+            @stream.unget(charStack[2])
            char = consumeNumberEntity(true)
          elsif DIGITS.include? charStack[1]
            # Decimal entity detected.
-            @stream.queue += charStack[1..-1]
+            @stream.unget(charStack[1..-1])
            char = consumeNumberEntity(false)
          else
            # No number entity detected.
-            @stream.queue += charStack
+            @stream.unget(charStack)
            @tokenQueue.push({:type => :ParseError, :data =>
              _("Numeric entity expected but none found.")})
          end
@ -209,6 +216,8 @@ module HTML5lib
        filteredEntityList.reject! {|e| e[0].chr != charStack[0]}
        entityName = nil

+        # Try to find the longest entity the string will match to take care
+        # of &noti for instance.
        while charStack[-1] != :EOF
          name = charStack.join('')
          if filteredEntityList.any? {|e| e[0...name.length] == name}
@ -220,6 +229,7 @@ module HTML5lib

          if ENTITIES.include? name
            entityName = name
+            break if entityName[-1] == ';'
          end
        end

@ -228,15 +238,23 @@ module HTML5lib

          # Check whether or not the last character returned can be
          # discarded or needs to be put back.
-          if not charStack[-1] == ";"
+          if entityName[-1] != ?;
            @tokenQueue.push({:type => :ParseError, :data =>
              _("Named entity didn't end with ';'.")})
-            @stream.queue += charStack[entityName.length..-1]
+          end
+
+          if charStack[-1] != ";" and from_attribute and
+             (ASCII_LETTERS.include?(charStack[entityName.length]) or
+              DIGITS.include?(charStack[entityName.length]))
+            @stream.unget(charStack)
+            char = '&'
+          else
+            @stream.unget(charStack[entityName.length..-1])
          end
        else
          @tokenQueue.push({:type => :ParseError, :data =>
            _("Named entity expected. Got none.")})
-          @stream.queue += charStack
+          @stream.unget(charStack)
        end
      end
      return char
@ -244,7 +262,7 @@ module HTML5lib

    # This method replaces the need for "entityInAttributeValueState".
    def processEntityInAttribute
-      entity = consumeEntity
+      entity = consumeEntity(true)
      if entity
        @currentToken[:data][-1][1] += entity
      else
@ -274,20 +292,23 @@ module HTML5lib
        @lastFourChars.shift if @lastFourChars.length > 4
      end

-      if data == "&" and [:PCDATA,:RCDATA].include?(@contentModelFlag)
-        @state = @states[:entityData]
+      if data == "&" and !@escapeFlag and
+        [:PCDATA,:RCDATA].include?(@contentModelFlag)
+          @state = @states[:entityData]

-      elsif data == "-" and [:CDATA,:RCDATA].include?(@contentModelFlag) and
-        @escapeFlag == false and @lastFourChars.join('') == "<!--"
+      elsif data == "-" and !@escapeFlag and
+        [:CDATA,:RCDATA].include?(@contentModelFlag) and
+        @lastFourChars.join('') == "<!--"
          @escapeFlag = true
          @tokenQueue.push({:type => :Characters, :data => data})

-      elsif data == "<" and @escapeFlag == false and
+      elsif data == "<" and !@escapeFlag and
        [:PCDATA,:CDATA,:RCDATA].include?(@contentModelFlag)
          @state = @states[:tagOpen]

-      elsif data == ">" and [:CDATA,:RCDATA].include?(@contentModelFlag) and
-        @escapeFlag == true and @lastFourChars[1..-1].join('') == "-->"
+      elsif data == ">" and @escapeFlag and 
+        [:CDATA,:RCDATA].include?(@contentModelFlag) and
+        @lastFourChars[1..-1].join('') == "-->"
          @escapeFlag = false
          @tokenQueue.push({:type => :Characters, :data => data})

@ -345,14 +366,14 @@ module HTML5lib
          @tokenQueue.push({:type => :ParseError, :data =>
            _("Expected tag name. Got '?' instead (HTML doesn't " +
            "support processing instructions).")})
-          @stream.queue.push(data)
+          @stream.unget(data)
          @state = @states[:bogusComment]
        else
          # XXX
          @tokenQueue.push({:type => :ParseError, :data =>
            _("Expected tag name. Got something else instead")})
          @tokenQueue.push({:type => :Characters, :data => "<"})
-          @stream.queue.push(data)
+          @stream.unget(data)
          @state = @states[:data]
        end
      else
@ -363,7 +384,7 @@ module HTML5lib
          @state = @states[:closeTagOpen]
        else
          @tokenQueue.push({:type => :Characters, :data => "<"})
-          @stream.queue.insert(0, data)
+          @stream.unget(data)
          @state = @states[:data]
        end
      end
@ -388,7 +409,7 @@ module HTML5lib

          # Since this is just for checking. We put the characters back on
          # the stack.
-          @stream.queue += charStack
+          @stream.unget(charStack)
        end

        if @currentToken and
@ -426,7 +447,7 @@ module HTML5lib
        # XXX data can be _'_...
        @tokenQueue.push({:type => :ParseError, :data =>
          _("Expected closing tag. Unexpected character '#{data}' found.")})
-        @stream.queue.push(data)
+        @stream.unget(data)
        @state = @states[:bogusComment]
      end

@ -556,7 +577,7 @@ module HTML5lib
        @state = @states[:attributeValueDoubleQuoted]
      elsif data == "&"
        @state = @states[:attributeValueUnQuoted]
-        @stream.queue.push(data);
+        @stream.unget(data);
      elsif data == "'"
        @state = @states[:attributeValueSingleQuoted]
      elsif data == ">"
@ -656,7 +677,7 @@ module HTML5lib
        else
          @tokenQueue.push({:type => :ParseError, :data =>
            _("Expected '--' or 'DOCTYPE'. Not found.")})
-          @stream.queue += charStack
+          @stream.unget(charStack)
          @state = @states[:bogusComment]
        end
      end
@ -771,7 +792,7 @@ module HTML5lib
      else
        @tokenQueue.push({:type => :ParseError, :data =>
          _("No space after literal string 'DOCTYPE'.")})
-        @stream.queue.push(data)
+        @stream.unget(data)
        @state = @states[:beforeDoctypeName]
      end
      return true
@ -827,7 +848,7 @@ module HTML5lib
        @state = @states[:data]
      elsif data == :EOF
        @currentToken[:data] = true
-        @stream.queue.push(data)
+        @stream.unget(data)
        @tokenQueue.push({:type => :ParseError, :data =>
          _("Unexpected end of file in DOCTYPE.")})
        @currentToken[:correct] = false
@ -842,7 +863,7 @@ module HTML5lib
        elsif token == "system"
          @state = @states[:beforeDoctypeSystemIdentifier]
        else
-          @stream.queue += charStack
+          @stream.unget(charStack)
          @tokenQueue.push({:type => :ParseError, :data =>
            _("Expected 'public' or 'system'. Got '#{charStack.join('')}'")})
          @state = @states[:bogusDoctype]
@ -1028,7 +1049,7 @@ module HTML5lib
        @state = @states[:data]
      elsif data == :EOF
        # XXX EMIT
-        @stream.queue.push(data)
+        @stream.unget(data)
        @tokenQueue.push({:type => :ParseError, :data =>
          _("Unexpected end of file in bogus doctype.")})
        @currentToken[:correct] = false
--- a/vendor/plugins/HTML5lib/lib/html5lib/treebuilders.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treebuilders.rb
@ -1,17 +1,17 @@
-module HTML5lib
+module HTML5
  module TreeBuilders

    class << self
      def [](name)
        case name.to_s.downcase
        when 'simpletree' then
-          require 'html5lib/treebuilders/simpletree'
+          require 'html5/treebuilders/simpletree'
          SimpleTree::TreeBuilder
        when 'rexml' then
-          require 'html5lib/treebuilders/rexml'
+          require 'html5/treebuilders/rexml'
          REXML::TreeBuilder
        when 'hpricot' then
-          require 'html5lib/treebuilders/hpricot'
+          require 'html5/treebuilders/hpricot'
          Hpricot::TreeBuilder
        else
          raise "Unknown TreeBuilder #{name}"
--- a/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/base.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/base.rb
@ -1,8 +1,8 @@
-require 'html5lib/constants'
+require 'html5/constants'

 #XXX - TODO; make the default interface more ElementTree-like rather than DOM-like

-module HTML5lib
+module HTML5

  # The scope markers are inserted when entering buttons, object elements,
  # marquees, table cells, and table captions, and are used to prevent formatting
--- a/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/hpricot.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/hpricot.rb
@ -1,221 +1,221 @@
-require 'html5lib/treebuilders/base'
-require 'rubygems'
-require 'hpricot'
-require 'forwardable'
-
-module HTML5lib
-  module TreeBuilders
-    module Hpricot
-
-      class Node < Base::Node
-
-        extend Forwardable
-
-        def_delegators :@hpricot, :name
-
-        attr_accessor :hpricot
-
-        def initialize(name)
-          super(name)
-          @hpricot = self.class.hpricot_class.new name
-        end
-
-        def appendChild(node)
-          if node.kind_of?(TextNode) and childNodes.any? and childNodes.last.kind_of?(TextNode)
-            childNodes[-1].hpricot.content = childNodes[-1].hpricot.to_s + node.hpricot.to_s
-          else
-            childNodes << node
-            hpricot.children << node.hpricot
-          end
-          if (oldparent = node.hpricot.parent) != nil
-            oldparent.children.delete_at(oldparent.children.index(node.hpricot))
-          end
-          node.hpricot.parent = hpricot
-          node.parent = self
-        end
-
-        def removeChild(node)
-           childNodes.delete(node)
-           hpricot.children.delete_at(hpricot.children.index(node.hpricot))
-           node.hpricot.parent = nil
-           node.parent = nil
-        end
-
-        def insertText(data, before=nil)
-          if before
-            insertBefore(TextNode.new(data), before)
-          else
-            appendChild(TextNode.new(data))
-          end
-        end
-
-        def insertBefore(node, refNode)
-          index = childNodes.index(refNode)
-          if node.kind_of?(TextNode) and index > 0 and childNodes[index-1].kind_of?(TextNode)
-            childNodes[index-1].hpricot.content = childNodes[index-1].hpricot.to_s + node.hpricot.to_s
-          else
-            refNode.hpricot.parent.insert_before(node.hpricot,refNode.hpricot)
-            childNodes.insert(index, node)
-          end
-        end
-
-        def hasContent
-          childNodes.any?
-        end
-      end
-
-      class Element < Node
-        def self.hpricot_class
-          ::Hpricot::Elem
-        end
-
-        def initialize(name)
-          super(name)
-
-          @hpricot = ::Hpricot::Elem.new(::Hpricot::STag.new(name))
-        end
-
-        def name
-          @hpricot.stag.name
-        end
-
-        def cloneNode
-          attributes.inject(self.class.new(name)) do |node, (name, value)|
-            node.hpricot[name] = value
-            node
-          end
-        end
-
-        # A call to Hpricot::Elem#raw_attributes is built dynamically,
-        # so alterations to the returned value (a hash) will be lost.
-        #
-        # AttributeProxy works around this by forwarding :[]= calls
-        # to the raw_attributes accessor on the element start tag.
-        #
-        class AttributeProxy
-          def initialize(hpricot)
-            @hpricot = hpricot
-          end
-
-          def []=(k, v)
-            @hpricot.stag.send(stag_attributes_method)[k] = v
-          end
-
-          def stag_attributes_method
-            # STag#attributes changed to STag#raw_attributes after Hpricot 0.5
-            @hpricot.stag.respond_to?(:raw_attributes) ? :raw_attributes : :attributes
-          end
-
-          def method_missing(*a, &b)
-            @hpricot.attributes.send(*a, &b)
-          end
-        end
-
-        def attributes
-          AttributeProxy.new(@hpricot)
-        end
-
-        def attributes=(attrs)
-          attrs.each { |name, value| @hpricot[name] = value }
-        end
-
-        def printTree(indent=0)
-          tree = "\n|#{' ' * indent}<#{name}>"
-          indent += 2
-          attributes.each do |name, value|
-            next if name == 'xmlns'
-            tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
-          end
-          childNodes.inject(tree) { |tree, child| tree + child.printTree(indent) }
-        end
-      end
-
-      class Document < Node
-        def self.hpricot_class
-          ::Hpricot::Doc
-        end
-
-        def initialize
-          super(nil)
-        end
-
-        def printTree(indent=0)
-          childNodes.inject('#document') { |tree, child| tree + child.printTree(indent + 2) }
-        end
-      end
-
-      class DocumentType < Node
-        def self.hpricot_class
-          ::Hpricot::DocType
-        end
-
-        def initialize(name)
-          begin
-            super(name)
-          rescue ArgumentError # needs 3...
-          end
-
-          @hpricot = ::Hpricot::DocType.new(name, nil, nil)
-        end
-
-        def printTree(indent=0)
-          "\n|#{' ' * indent}<!DOCTYPE #{hpricot.target}>"
-        end
-      end
-
-      class DocumentFragment < Element
-        def initialize
-          super('')
-        end
-
-        def printTree(indent=0)
-          childNodes.inject('') { |tree, child| tree + child.printTree(indent+2) }
-        end
-      end
-
-      class TextNode < Node
-        def initialize(data)
-          @hpricot = ::Hpricot::Text.new(data)
-        end
-
-        def printTree(indent=0)
-          "\n|#{' ' * indent}\"#{hpricot.content}\""
-        end
-      end
-
-      class CommentNode < Node
-        def self.hpricot_class
-          ::Hpricot::Comment
-        end
-
-        def printTree(indent=0)
-          "\n|#{' ' * indent}<!-- #{hpricot.content} -->"
-        end
-      end
-
-      class TreeBuilder < Base::TreeBuilder
-        def initialize
-          @documentClass = Document
-          @doctypeClass = DocumentType
-          @elementClass = Element
-          @commentClass = CommentNode
-          @fragmentClass = DocumentFragment
-        end
-
-        def testSerializer(node)
-          node.printTree
-        end
-
-        def getDocument
-          @document.hpricot
-        end
-
-        def getFragment
-          @document = super
-          return @document.hpricot.children
-        end
-      end
-
-    end
-  end
-end
+require 'html5/treebuilders/base'
+require 'rubygems'
+require 'hpricot'
+require 'forwardable'
+
+module HTML5
+  module TreeBuilders
+    module Hpricot
+
+      class Node < Base::Node
+
+        extend Forwardable
+
+        def_delegators :@hpricot, :name
+
+        attr_accessor :hpricot
+
+        def initialize(name)
+          super(name)
+          @hpricot = self.class.hpricot_class.new name
+        end
+
+        def appendChild(node)
+          if node.kind_of?(TextNode) and childNodes.any? and childNodes.last.kind_of?(TextNode)
+            childNodes[-1].hpricot.content = childNodes[-1].hpricot.to_s + node.hpricot.to_s
+          else
+            childNodes << node
+            hpricot.children << node.hpricot
+          end
+          if (oldparent = node.hpricot.parent) != nil
+            oldparent.children.delete_at(oldparent.children.index(node.hpricot))
+          end
+          node.hpricot.parent = hpricot
+          node.parent = self
+        end
+
+        def removeChild(node)
+           childNodes.delete(node)
+           hpricot.children.delete_at(hpricot.children.index(node.hpricot))
+           node.hpricot.parent = nil
+           node.parent = nil
+        end
+
+        def insertText(data, before=nil)
+          if before
+            insertBefore(TextNode.new(data), before)
+          else
+            appendChild(TextNode.new(data))
+          end
+        end
+
+        def insertBefore(node, refNode)
+          index = childNodes.index(refNode)
+          if node.kind_of?(TextNode) and index > 0 and childNodes[index-1].kind_of?(TextNode)
+            childNodes[index-1].hpricot.content = childNodes[index-1].hpricot.to_s + node.hpricot.to_s
+          else
+            refNode.hpricot.parent.insert_before(node.hpricot,refNode.hpricot)
+            childNodes.insert(index, node)
+          end
+        end
+
+        def hasContent
+          childNodes.any?
+        end
+      end
+
+      class Element < Node
+        def self.hpricot_class
+          ::Hpricot::Elem
+        end
+
+        def initialize(name)
+          super(name)
+
+          @hpricot = ::Hpricot::Elem.new(::Hpricot::STag.new(name))
+        end
+
+        def name
+          @hpricot.stag.name
+        end
+
+        def cloneNode
+          attributes.inject(self.class.new(name)) do |node, (name, value)|
+            node.hpricot[name] = value
+            node
+          end
+        end
+
+        # A call to Hpricot::Elem#raw_attributes is built dynamically,
+        # so alterations to the returned value (a hash) will be lost.
+        #
+        # AttributeProxy works around this by forwarding :[]= calls
+        # to the raw_attributes accessor on the element start tag.
+        #
+        class AttributeProxy
+          def initialize(hpricot)
+            @hpricot = hpricot
+          end
+
+          def []=(k, v)
+            @hpricot.stag.send(stag_attributes_method)[k] = v
+          end
+
+          def stag_attributes_method
+            # STag#attributes changed to STag#raw_attributes after Hpricot 0.5
+            @hpricot.stag.respond_to?(:raw_attributes) ? :raw_attributes : :attributes
+          end
+
+          def method_missing(*a, &b)
+            @hpricot.attributes.send(*a, &b)
+          end
+        end
+
+        def attributes
+          AttributeProxy.new(@hpricot)
+        end
+
+        def attributes=(attrs)
+          attrs.each { |name, value| @hpricot[name] = value }
+        end
+
+        def printTree(indent=0)
+          tree = "\n|#{' ' * indent}<#{name}>"
+          indent += 2
+          attributes.each do |name, value|
+            next if name == 'xmlns'
+            tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
+          end
+          childNodes.inject(tree) { |tree, child| tree + child.printTree(indent) }
+        end
+      end
+
+      class Document < Node
+        def self.hpricot_class
+          ::Hpricot::Doc
+        end
+
+        def initialize
+          super(nil)
+        end
+
+        def printTree(indent=0)
+          childNodes.inject('#document') { |tree, child| tree + child.printTree(indent + 2) }
+        end
+      end
+
+      class DocumentType < Node
+        def self.hpricot_class
+          ::Hpricot::DocType
+        end
+
+        def initialize(name)
+          begin
+            super(name)
+          rescue ArgumentError # needs 3...
+          end
+
+          @hpricot = ::Hpricot::DocType.new(name, nil, nil)
+        end
+
+        def printTree(indent=0)
+          "\n|#{' ' * indent}<!DOCTYPE #{hpricot.target}>"
+        end
+      end
+
+      class DocumentFragment < Element
+        def initialize
+          super('')
+        end
+
+        def printTree(indent=0)
+          childNodes.inject('') { |tree, child| tree + child.printTree(indent+2) }
+        end
+      end
+
+      class TextNode < Node
+        def initialize(data)
+          @hpricot = ::Hpricot::Text.new(data)
+        end
+
+        def printTree(indent=0)
+          "\n|#{' ' * indent}\"#{hpricot.content}\""
+        end
+      end
+
+      class CommentNode < Node
+        def self.hpricot_class
+          ::Hpricot::Comment
+        end
+
+        def printTree(indent=0)
+          "\n|#{' ' * indent}<!-- #{hpricot.content} -->"
+        end
+      end
+
+      class TreeBuilder < Base::TreeBuilder
+        def initialize
+          @documentClass = Document
+          @doctypeClass = DocumentType
+          @elementClass = Element
+          @commentClass = CommentNode
+          @fragmentClass = DocumentFragment
+        end
+
+        def testSerializer(node)
+          node.printTree
+        end
+
+        def getDocument
+          @document.hpricot
+        end
+
+        def getFragment
+          @document = super
+          return @document.hpricot.children
+        end
+      end
+
+    end
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/rexml.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/rexml.rb
@ -1,8 +1,8 @@
-require 'html5lib/treebuilders/base'
+require 'html5/treebuilders/base'
 require 'rexml/document'
 require 'forwardable'

-module HTML5lib
+module HTML5
  module TreeBuilders
    module REXML

--- a/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/simpletree.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/simpletree.rb
@ -1,6 +1,6 @@
-require 'html5lib/treebuilders/base'
+require 'html5/treebuilders/base'

-module HTML5lib
+module HTML5
  module TreeBuilders
    module SimpleTree

--- a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treewalkers.rb
@ -1,19 +1,19 @@
-require 'html5lib/treewalkers/base'
+require 'html5/treewalkers/base'

-module HTML5lib
+module HTML5
  module TreeWalkers

    class << self
      def [](name)
        case name.to_s.downcase
        when 'simpletree' then
-          require 'html5lib/treewalkers/simpletree'
+          require 'html5/treewalkers/simpletree'
          SimpleTree::TreeWalker
        when 'rexml' then
-          require 'html5lib/treewalkers/rexml'
+          require 'html5/treewalkers/rexml'
          REXML::TreeWalker
        when 'hpricot' then
-          require 'html5lib/treewalkers/hpricot'
+          require 'html5/treewalkers/hpricot'
          Hpricot::TreeWalker
        else
          raise "Unknown TreeWalker #{name}"
--- a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/base.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/base.rb
@ -1,5 +1,5 @@
-require 'html5lib/constants'
-module HTML5lib
+require 'html5/constants'
+module HTML5
 module TreeWalkers

 module TokenConstructor
--- a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/hpricot.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/hpricot.rb
@ -1,10 +1,10 @@
-require 'html5lib/treewalkers/base'
+require 'html5/treewalkers/base'
 require 'rexml/document'

-module HTML5lib
+module HTML5
  module TreeWalkers
    module Hpricot
-      class TreeWalker < HTML5lib::TreeWalkers::NonRecursiveTreeWalker
+      class TreeWalker < HTML5::TreeWalkers::NonRecursiveTreeWalker

        def node_details(node)
          case node
--- a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/rexml.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/rexml.rb
@ -1,10 +1,10 @@
-require 'html5lib/treewalkers/base'
+require 'html5/treewalkers/base'
 require 'rexml/document'

-module HTML5lib
+module HTML5
  module TreeWalkers
    module REXML
-      class TreeWalker < HTML5lib::TreeWalkers::NonRecursiveTreeWalker
+      class TreeWalker < HTML5::TreeWalkers::NonRecursiveTreeWalker

        def node_details(node)
          case node
--- a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/simpletree.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/simpletree.rb
@ -1,10 +1,10 @@
-require 'html5lib/treewalkers/base'
+require 'html5/treewalkers/base'

-module HTML5lib
+module HTML5
  module TreeWalkers
    module SimpleTree
-      class TreeWalker < HTML5lib::TreeWalkers::Base
-        include HTML5lib::TreeBuilders::SimpleTree
+      class TreeWalker < HTML5::TreeWalkers::Base
+        include HTML5::TreeBuilders::SimpleTree

        def walk(node)
          case node
--- a/vendor/plugins/HTML5lib/lib/html5lib/constants.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/constants.rb
@ -1,708 +0,0 @@
-module HTML5lib
-
-  class EOF < Exception; end
-
-  CONTENT_MODEL_FLAGS = [
-      :PCDATA,
-      :RCDATA,
-      :CDATA,
-      :PLAINTEXT
-  ]
-
-  SCOPING_ELEMENTS = %w[
-      button
-      caption
-      html
-      marquee
-      object
-      table
-      td
-      th
-  ]
-
-  FORMATTING_ELEMENTS = %w[
-      a
-      b
-      big
-      em
-      font
-      i
-      nobr
-      s
-      small
-      strike
-      strong
-      tt
-      u
-  ]
-
-  SPECIAL_ELEMENTS = %w[
-      address
-      area
-      base
-      basefont
-      bgsound
-      blockquote
-      body
-      br
-      center
-      col
-      colgroup
-      dd
-      dir
-      div
-      dl
-      dt
-      embed
-      fieldset
-      form
-      frame
-      frameset
-      h1
-      h2
-      h3
-      h4
-      h5
-      h6
-      head
-      hr
-      iframe
-      image
-      img
-      input
-      isindex
-      li
-      link
-      listing
-      menu
-      meta
-      noembed
-      noframes
-      noscript
-      ol
-      optgroup
-      option
-      p
-      param
-      plaintext
-      pre
-      script
-      select
-      spacer
-      style
-      tbody
-      textarea
-      tfoot
-      thead
-      title
-      tr
-      ul
-      wbr
-  ]
-
-  SPACE_CHARACTERS = %W[
-      \t
-      \n
-      \x0B
-      \x0C
-      \x20
-      \r
-  ]
-
-  TABLE_INSERT_MODE_ELEMENTS = %w[
-      table
-      tbody
-      tfoot
-      thead
-      tr
-  ]
-
-  ASCII_LOWERCASE = ('a'..'z').to_a.join('')
-  ASCII_UPPERCASE = ('A'..'Z').to_a.join('')
-  ASCII_LETTERS = ASCII_LOWERCASE + ASCII_UPPERCASE
-  DIGITS = '0'..'9'
-  HEX_DIGITS = DIGITS.to_a + ('a'..'f').to_a + ('A'..'F').to_a
-
-  # Heading elements need to be ordered 
-  HEADING_ELEMENTS = %w[
-      h1
-      h2
-      h3
-      h4
-      h5
-      h6
-  ]
-
-  # XXX What about event-source and command?
-  VOID_ELEMENTS = %w[
-      base
-      link
-      meta
-      hr
-      br
-      img
-      embed
-      param
-      area
-      col
-      input
-  ]
-
-  CDATA_ELEMENTS = %w[title textarea]
-
-  RCDATA_ELEMENTS = %w[
-    style
-    script
-    xmp
-    iframe
-    noembed
-    noframes
-    noscript
-  ]
-
-  BOOLEAN_ATTRIBUTES = {
-    :global => %w[irrelevant],
-    'style' => %w[scoped],
-    'img' => %w[ismap],
-    'audio' => %w[autoplay controls],
-    'video' => %w[autoplay controls],
-    'script' => %w[defer async],
-    'details' => %w[open],
-    'datagrid' => %w[multiple disabled],
-    'command' => %w[hidden disabled checked default],
-    'menu' => %w[autosubmit],
-    'fieldset' => %w[disabled readonly],
-    'option' => %w[disabled readonly selected],
-    'optgroup' => %w[disabled readonly],
-    'button' => %w[disabled autofocus],
-    'input' => %w[disabled readonly required autofocus checked ismap],
-    'select' => %w[disabled readonly autofocus multiple],
-    'output' => %w[disabled readonly]
-  }
-
-  # entitiesWindows1252 has to be _ordered_ and needs to have an index.
-  ENTITIES_WINDOWS1252 = [
-      8364,  # 0x80  0x20AC  EURO SIGN
-      65533, # 0x81          UNDEFINED
-      8218,  # 0x82  0x201A  SINGLE LOW-9 QUOTATION MARK
-      402,   # 0x83  0x0192  LATIN SMALL LETTER F WITH HOOK
-      8222,  # 0x84  0x201E  DOUBLE LOW-9 QUOTATION MARK
-      8230,  # 0x85  0x2026  HORIZONTAL ELLIPSIS
-      8224,  # 0x86  0x2020  DAGGER
-      8225,  # 0x87  0x2021  DOUBLE DAGGER
-      710,   # 0x88  0x02C6  MODIFIER LETTER CIRCUMFLEX ACCENT
-      8240,  # 0x89  0x2030  PER MILLE SIGN
-      352,   # 0x8A  0x0160  LATIN CAPITAL LETTER S WITH CARON
-      8249,  # 0x8B  0x2039  SINGLE LEFT-POINTING ANGLE QUOTATION MARK
-      338,   # 0x8C  0x0152  LATIN CAPITAL LIGATURE OE
-      65533, # 0x8D          UNDEFINED
-      381,   # 0x8E  0x017D  LATIN CAPITAL LETTER Z WITH CARON
-      65533, # 0x8F          UNDEFINED
-      65533, # 0x90          UNDEFINED
-      8216,  # 0x91  0x2018  LEFT SINGLE QUOTATION MARK
-      8217,  # 0x92  0x2019  RIGHT SINGLE QUOTATION MARK
-      8220,  # 0x93  0x201C  LEFT DOUBLE QUOTATION MARK
-      8221,  # 0x94  0x201D  RIGHT DOUBLE QUOTATION MARK
-      8226,  # 0x95  0x2022  BULLET
-      8211,  # 0x96  0x2013  EN DASH
-      8212,  # 0x97  0x2014  EM DASH
-      732,   # 0x98  0x02DC  SMALL TILDE
-      8482,  # 0x99  0x2122  TRADE MARK SIGN
-      353,   # 0x9A  0x0161  LATIN SMALL LETTER S WITH CARON
-      8250,  # 0x9B  0x203A  SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
-      339,   # 0x9C  0x0153  LATIN SMALL LIGATURE OE
-      65533, # 0x9D          UNDEFINED
-      382,   # 0x9E  0x017E  LATIN SMALL LETTER Z WITH CARON
-      376    # 0x9F  0x0178  LATIN CAPITAL LETTER Y WITH DIAERESIS
-  ]
-
-  private
-
-    def self.U n
-      [n].pack('U')
-    end
-
-  public
-
-  ENTITIES = {
-      "AElig" => U(0xC6),
-      "Aacute" => U(0xC1),
-      "Acirc" => U(0xC2),
-      "Agrave" => U(0xC0),
-      "Alpha" => U(0x0391),
-      "Aring" => U(0xC5),
-      "Atilde" => U(0xC3),
-      "Auml" => U(0xC4),
-      "Beta" => U(0x0392),
-      "Ccedil" => U(0xC7),
-      "Chi" => U(0x03A7),
-      "Dagger" => U(0x2021),
-      "Delta" => U(0x0394),
-      "ETH" => U(0xD0),
-      "Eacute" => U(0xC9),
-      "Ecirc" => U(0xCA),
-      "Egrave" => U(0xC8),
-      "Epsilon" => U(0x0395),
-      "Eta" => U(0x0397),
-      "Euml" => U(0xCB),
-      "Gamma" => U(0x0393),
-      "Iacute" => U(0xCD),
-      "Icirc" => U(0xCE),
-      "Igrave" => U(0xCC),
-      "Iota" => U(0x0399),
-      "Iuml" => U(0xCF),
-      "Kappa" => U(0x039A),
-      "Lambda" => U(0x039B),
-      "Mu" => U(0x039C),
-      "Ntilde" => U(0xD1),
-      "Nu" => U(0x039D),
-      "OElig" => U(0x0152),
-      "Oacute" => U(0xD3),
-      "Ocirc" => U(0xD4),
-      "Ograve" => U(0xD2),
-      "Omega" => U(0x03A9),
-      "Omicron" => U(0x039F),
-      "Oslash" => U(0xD8),
-      "Otilde" => U(0xD5),
-      "Ouml" => U(0xD6),
-      "Phi" => U(0x03A6),
-      "Pi" => U(0x03A0),
-      "Prime" => U(0x2033),
-      "Psi" => U(0x03A8),
-      "Rho" => U(0x03A1),
-      "Scaron" => U(0x0160),
-      "Sigma" => U(0x03A3),
-      "THORN" => U(0xDE),
-      "Tau" => U(0x03A4),
-      "Theta" => U(0x0398),
-      "Uacute" => U(0xDA),
-      "Ucirc" => U(0xDB),
-      "Ugrave" => U(0xD9),
-      "Upsilon" => U(0x03A5),
-      "Uuml" => U(0xDC),
-      "Xi" => U(0x039E),
-      "Yacute" => U(0xDD),
-      "Yuml" => U(0x0178),
-      "Zeta" => U(0x0396),
-      "aacute" => U(0xE1),
-      "acirc" => U(0xE2),
-      "acute" => U(0xB4),
-      "aelig" => U(0xE6),
-      "agrave" => U(0xE0),
-      "alefsym" => U(0x2135),
-      "alpha" => U(0x03B1),
-      "amp" => U(0x26),
-      "AMP" => U(0x26),
-      "and" => U(0x2227),
-      "ang" => U(0x2220),
-      "apos" => U(0x27),
-      "aring" => U(0xE5),
-      "asymp" => U(0x2248),
-      "atilde" => U(0xE3),
-      "auml" => U(0xE4),
-      "bdquo" => U(0x201E),
-      "beta" => U(0x03B2),
-      "brvbar" => U(0xA6),
-      "bull" => U(0x2022),
-      "cap" => U(0x2229),
-      "ccedil" => U(0xE7),
-      "cedil" => U(0xB8),
-      "cent" => U(0xA2),
-      "chi" => U(0x03C7),
-      "circ" => U(0x02C6),
-      "clubs" => U(0x2663),
-      "cong" => U(0x2245),
-      "copy" => U(0xA9),
-      "COPY" => U(0xA9),
-      "crarr" => U(0x21B5),
-      "cup" => U(0x222A),
-      "curren" => U(0xA4),
-      "dArr" => U(0x21D3),
-      "dagger" => U(0x2020),
-      "darr" => U(0x2193),
-      "deg" => U(0xB0),
-      "delta" => U(0x03B4),
-      "diams" => U(0x2666),
-      "divide" => U(0xF7),
-      "eacute" => U(0xE9),
-      "ecirc" => U(0xEA),
-      "egrave" => U(0xE8),
-      "empty" => U(0x2205),
-      "emsp" => U(0x2003),
-      "ensp" => U(0x2002),
-      "epsilon" => U(0x03B5),
-      "equiv" => U(0x2261),
-      "eta" => U(0x03B7),
-      "eth" => U(0xF0),
-      "euml" => U(0xEB),
-      "euro" => U(0x20AC),
-      "exist" => U(0x2203),
-      "fnof" => U(0x0192),
-      "forall" => U(0x2200),
-      "frac12" => U(0xBD),
-      "frac14" => U(0xBC),
-      "frac34" => U(0xBE),
-      "frasl" => U(0x2044),
-      "gamma" => U(0x03B3),
-      "ge" => U(0x2265),
-      "gt" => U(0x3E),
-      "GT" => U(0x3E),
-      "hArr" => U(0x21D4),
-      "harr" => U(0x2194),
-      "hearts" => U(0x2665),
-      "hellip" => U(0x2026),
-      "iacute" => U(0xED),
-      "icirc" => U(0xEE),
-      "iexcl" => U(0xA1),
-      "igrave" => U(0xEC),
-      "image" => U(0x2111),
-      "infin" => U(0x221E),
-      "int" => U(0x222B),
-      "iota" => U(0x03B9),
-      "iquest" => U(0xBF),
-      "isin" => U(0x2208),
-      "iuml" => U(0xEF),
-      "kappa" => U(0x03BA),
-      "lArr" => U(0x21D0),
-      "lambda" => U(0x03BB),
-      "lang" => U(0x2329),
-      "laquo" => U(0xAB),
-      "larr" => U(0x2190),
-      "lceil" => U(0x2308),
-      "ldquo" => U(0x201C),
-      "le" => U(0x2264),
-      "lfloor" => U(0x230A),
-      "lowast" => U(0x2217),
-      "loz" => U(0x25CA),
-      "lrm" => U(0x200E),
-      "lsaquo" => U(0x2039),
-      "lsquo" => U(0x2018),
-      "lt" => U(0x3C),
-      "LT" => U(0x3C),
-      "macr" => U(0xAF),
-      "mdash" => U(0x2014),
-      "micro" => U(0xB5),
-      "middot" => U(0xB7),
-      "minus" => U(0x2212),
-      "mu" => U(0x03BC),
-      "nabla" => U(0x2207),
-      "nbsp" => U(0xA0),
-      "ndash" => U(0x2013),
-      "ne" => U(0x2260),
-      "ni" => U(0x220B),
-      "not" => U(0xAC),
-      "notin" => U(0x2209),
-      "nsub" => U(0x2284),
-      "ntilde" => U(0xF1),
-      "nu" => U(0x03BD),
-      "oacute" => U(0xF3),
-      "ocirc" => U(0xF4),
-      "oelig" => U(0x0153),
-      "ograve" => U(0xF2),
-      "oline" => U(0x203E),
-      "omega" => U(0x03C9),
-      "omicron" => U(0x03BF),
-      "oplus" => U(0x2295),
-      "or" => U(0x2228),
-      "ordf" => U(0xAA),
-      "ordm" => U(0xBA),
-      "oslash" => U(0xF8),
-      "otilde" => U(0xF5),
-      "otimes" => U(0x2297),
-      "ouml" => U(0xF6),
-      "para" => U(0xB6),
-      "part" => U(0x2202),
-      "permil" => U(0x2030),
-      "perp" => U(0x22A5),
-      "phi" => U(0x03C6),
-      "pi" => U(0x03C0),
-      "piv" => U(0x03D6),
-      "plusmn" => U(0xB1),
-      "pound" => U(0xA3),
-      "prime" => U(0x2032),
-      "prod" => U(0x220F),
-      "prop" => U(0x221D),
-      "psi" => U(0x03C8),
-      "quot" => U(0x22),
-      "QUOT" => U(0x22),
-      "rArr" => U(0x21D2),
-      "radic" => U(0x221A),
-      "rang" => U(0x232A),
-      "raquo" => U(0xBB),
-      "rarr" => U(0x2192),
-      "rceil" => U(0x2309),
-      "rdquo" => U(0x201D),
-      "real" => U(0x211C),
-      "reg" => U(0xAE),
-      "REG" => U(0xAE),
-      "rfloor" => U(0x230B),
-      "rho" => U(0x03C1),
-      "rlm" => U(0x200F),
-      "rsaquo" => U(0x203A),
-      "rsquo" => U(0x2019),
-      "sbquo" => U(0x201A),
-      "scaron" => U(0x0161),
-      "sdot" => U(0x22C5),
-      "sect" => U(0xA7),
-      "shy" => U(0xAD),
-      "sigma" => U(0x03C3),
-      "sigmaf" => U(0x03C2),
-      "sim" => U(0x223C),
-      "spades" => U(0x2660),
-      "sub" => U(0x2282),
-      "sube" => U(0x2286),
-      "sum" => U(0x2211),
-      "sup" => U(0x2283),
-      "sup1" => U(0xB9),
-      "sup2" => U(0xB2),
-      "sup3" => U(0xB3),
-      "supe" => U(0x2287),
-      "szlig" => U(0xDF),
-      "tau" => U(0x03C4),
-      "there4" => U(0x2234),
-      "theta" => U(0x03B8),
-      "thetasym" => U(0x03D1),
-      "thinsp" => U(0x2009),
-      "thorn" => U(0xFE),
-      "tilde" => U(0x02DC),
-      "times" => U(0xD7),
-      "trade" => U(0x2122),
-      "uArr" => U(0x21D1),
-      "uacute" => U(0xFA),
-      "uarr" => U(0x2191),
-      "ucirc" => U(0xFB),
-      "ugrave" => U(0xF9),
-      "uml" => U(0xA8),
-      "upsih" => U(0x03D2),
-      "upsilon" => U(0x03C5),
-      "uuml" => U(0xFC),
-      "weierp" => U(0x2118),
-      "xi" => U(0x03BE),
-      "yacute" => U(0xFD),
-      "yen" => U(0xA5),
-      "yuml" => U(0xFF),
-      "zeta" => U(0x03B6),
-      "zwj" => U(0x200D),
-      "zwnj" => U(0x200C)
-  }
-
-  ENCODINGS = %w[
-      ansi_x3.4-1968
-      iso-ir-6
-      ansi_x3.4-1986
-      iso_646.irv:1991
-      ascii
-      iso646-us
-      us-ascii
-      us
-      ibm367
-      cp367
-      csascii
-      ks_c_5601-1987
-      korean
-      iso-2022-kr
-      csiso2022kr
-      euc-kr
-      iso-2022-jp
-      csiso2022jp
-      iso-2022-jp-2
-      iso-ir-58
-      chinese
-      csiso58gb231280
-      iso_8859-1:1987
-      iso-ir-100
-      iso_8859-1
-      iso-8859-1
-      latin1
-      l1
-      ibm819
-      cp819
-      csisolatin1
-      iso_8859-2:1987
-      iso-ir-101
-      iso_8859-2
-      iso-8859-2
-      latin2
-      l2
-      csisolatin2
-      iso_8859-3:1988
-      iso-ir-109
-      iso_8859-3
-      iso-8859-3
-      latin3
-      l3
-      csisolatin3
-      iso_8859-4:1988
-      iso-ir-110
-      iso_8859-4
-      iso-8859-4
-      latin4
-      l4
-      csisolatin4
-      iso_8859-6:1987
-      iso-ir-127
-      iso_8859-6
-      iso-8859-6
-      ecma-114
-      asmo-708
-      arabic
-      csisolatinarabic
-      iso_8859-7:1987
-      iso-ir-126
-      iso_8859-7
-      iso-8859-7
-      elot_928
-      ecma-118
-      greek
-      greek8
-      csisolatingreek
-      iso_8859-8:1988
-      iso-ir-138
-      iso_8859-8
-      iso-8859-8
-      hebrew
-      csisolatinhebrew
-      iso_8859-5:1988
-      iso-ir-144
-      iso_8859-5
-      iso-8859-5
-      cyrillic
-      csisolatincyrillic
-      iso_8859-9:1989
-      iso-ir-148
-      iso_8859-9
-      iso-8859-9
-      latin5
-      l5
-      csisolatin5
-      iso-8859-10
-      iso-ir-157
-      l6
-      iso_8859-10:1992
-      csisolatin6
-      latin6
-      hp-roman8
-      roman8
-      r8
-      ibm037
-      cp037
-      csibm037
-      ibm424
-      cp424
-      csibm424
-      ibm437
-      cp437
-      437
-      cspc8codepage437
-      ibm500
-      cp500
-      csibm500
-      ibm775
-      cp775
-      cspc775baltic
-      ibm850
-      cp850
-      850
-      cspc850multilingual
-      ibm852
-      cp852
-      852
-      cspcp852
-      ibm855
-      cp855
-      855
-      csibm855
-      ibm857
-      cp857
-      857
-      csibm857
-      ibm860
-      cp860
-      860
-      csibm860
-      ibm861
-      cp861
-      861
-      cp-is
-      csibm861
-      ibm862
-      cp862
-      862
-      cspc862latinhebrew
-      ibm863
-      cp863
-      863
-      csibm863
-      ibm864
-      cp864
-      csibm864
-      ibm865
-      cp865
-      865
-      csibm865
-      ibm866
-      cp866
-      866
-      csibm866
-      ibm869
-      cp869
-      869
-      cp-gr
-      csibm869
-      ibm1026
-      cp1026
-      csibm1026
-      koi8-r
-      cskoi8r
-      koi8-u
-      big5-hkscs
-      ptcp154
-      csptcp154
-      pt154
-      cp154
-      utf-7
-      utf-16be
-      utf-16le
-      utf-16
-      utf-8
-      iso-8859-13
-      iso-8859-14
-      iso-ir-199
-      iso_8859-14:1998
-      iso_8859-14
-      latin8
-      iso-celtic
-      l8
-      iso-8859-15
-      iso_8859-15
-      iso-8859-16
-      iso-ir-226
-      iso_8859-16:2001
-      iso_8859-16
-      latin10
-      l10
-      gbk
-      cp936
-      ms936
-      gb18030
-      shift_jis
-      ms_kanji
-      csshiftjis
-      euc-jp
-      gb2312
-      big5
-      csbig5
-      windows-1250
-      windows-1251
-      windows-1252
-      windows-1253
-      windows-1254
-      windows-1255
-      windows-1256
-      windows-1257
-      windows-1258
-      tis-620
-      hz-gb-2312
-  ]
-
-end
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/filters.rb
@ -1 +0,0 @@
-require 'html5lib/filters/optionaltags'
--- a/vendor/plugins/HTML5lib/lib/html5lib/serializer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/serializer.rb
@ -1,2 +0,0 @@
-require 'html5lib/serializer/htmlserializer'
-require 'html5lib/serializer/xhtmlserializer'
--- a/vendor/plugins/HTML5lib/parse.rb
+++ b/vendor/plugins/HTML5lib/parse.rb
@ -26,15 +26,15 @@ def parse(opts, args)
    exit(1)
  end

-  require 'html5lib/treebuilders'
-  treebuilder = HTML5lib::TreeBuilders[opts.treebuilder]
+  require 'html5/treebuilders'
+  treebuilder = HTML5::TreeBuilders[opts.treebuilder]

  if opts.output == :xml
-    require 'html5lib/liberalxmlparser'
-    p = HTML5lib::XHTMLParser.new(:tree=>treebuilder)
+    require 'html5/liberalxmlparser'
+    p = HTML5::XHTMLParser.new(:tree=>treebuilder)
  else
-    require 'html5lib/html5parser'
-    p = HTML5lib::HTMLParser.new(:tree=>treebuilder)
+    require 'html5/html5parser'
+    p = HTML5::HTMLParser.new(:tree=>treebuilder)
  end

  if opts.parsemethod == :parse
@ -70,10 +70,10 @@ def printOutput(parser, document, opts)
  when :xml
    print document
  when :html
-    require 'html5lib/treewalkers'
-    tokens = HTML5lib::TreeWalkers[opts.treebuilder].new(document)
-    require 'html5lib/serializer'
-    puts HTML5lib::HTMLSerializer.serialize(tokens, opts.serializer)
+    require 'html5/treewalkers'
+    tokens = HTML5::TreeWalkers[opts.treebuilder].new(document)
+    require 'html5/serializer'
+    puts HTML5::HTMLSerializer.serialize(tokens, opts.serializer)
  when :hilite
    print document.hilite
  when :tree
@ -188,6 +188,10 @@ opts = OptionParser.new do |opts|
    options.serializer[:escape_lt_in_attrs] = lt
  end

+  opts.on("--[no-]escape-rcdata", "Escape rcdata element values") do |rcdata|
+    options.serializer[:escape_rcdata] = rcdata
+  end
+
  opts.separator ""
  opts.separator "Other Options:"

--- a/vendor/plugins/HTML5lib/testdata/encoding/tests2.dat
+++ b/vendor/plugins/HTML5lib/testdata/encoding/tests2.dat
@ -33,7 +33,6 @@ EUC-jp
 #encoding
 EUC-jp

-
 #data
 <!-- -->
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
--- a/vendor/plugins/HTML5lib/testdata/serializer/core.test
+++ b/vendor/plugins/HTML5lib/testdata/serializer/core.test
@ -92,7 +92,8 @@

 {"description": "rcdata",
 "input": [["StartTag", "script", {}], ["Characters", "a<b>c&d"]],
- "expected": ["<script>a<b>c&d"]
+ "expected": ["<script>a<b>c&d"],
+ "xhtml": ["<script>a&lt;b&gt;c&amp;d"]
 },

 {"description": "doctype",
--- a/vendor/plugins/HTML5lib/testdata/serializer/options.test
+++ b/vendor/plugins/HTML5lib/testdata/serializer/options.test
@ -49,6 +49,12 @@
 "options": {"escape_lt_in_attrs": true},
 "input": [["StartTag", "a", {"title": "a<b>c&d"}]],
 "expected": ["<a title=\"a&lt;b>c&amp;d\">"]
+},
+
+{"description": "rcdata",
+ "options": {"escape_rcdata": true},
+ "input": [["StartTag", "script", {}], ["Characters", "a<b>c&d"]],
+ "expected": ["<script>a&lt;b&gt;c&amp;d"]
 }

 ]}
--- a/vendor/plugins/HTML5lib/testdata/tokenizer/test1.test
+++ b/vendor/plugins/HTML5lib/testdata/tokenizer/test1.test
@ -135,7 +135,7 @@

 {"description":"Entity without trailing semicolon (2)",
 "input":"I'm &notin",
-"output":[["Character","I'm "], "ParseError", ["Character", "∉"]]},
+"output":[["Character","I'm "], "ParseError", ["Character", "¬in"]]},

 {"description":"Partial entity match at end of file",
 "input":"I'm &no",
@ -151,6 +151,18 @@

 {"description":"Hexadecimal entity in attribute",
 "input":"<h a='&#x3f;'></h>",
-"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]}
+"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]},
+
+{"description":"Entity in attribute without semicolon ending in x",
+"input":"<h a='&notx'>",
+"output":["ParseError", ["StartTag", "h", {"a":"&notx"}]]},
+
+{"description":"Entity in attribute without semicolon ending in 1",
+"input":"<h a='&not1'>",
+"output":["ParseError", ["StartTag", "h", {"a":"&not1"}]]},
+
+{"description":"Entity in attribute without semicolon",
+"input":"<h a='&COPY'>",
+"output":["ParseError", ["StartTag", "h", {"a":"©"}]]}

 ]}
--- a/vendor/plugins/HTML5lib/testdata/tokenizer/test2.test
+++ b/vendor/plugins/HTML5lib/testdata/tokenizer/test2.test
@ -42,19 +42,23 @@

 {"description":"Numeric entity representing the NUL character",
 "input":"&#0000;",
-"output":[["Character", "\uFFFD"]]},
+"output":["ParseError", ["Character", "\uFFFD"]]},

 {"description":"Hexadecimal entity representing the NUL character",
 "input":"&#x0000;",
-"output":[["Character", "\uFFFD"]]},
+"output":["ParseError", ["Character", "\uFFFD"]]},

 {"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
 "input":"&#2225222;",
-"output":[["Character", "\uFFFD"]]},
+"output":["ParseError", ["Character", "\uFFFD"]]},

 {"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)",
 "input":"&#x1010FFFF;",
-"output":[["Character", "\uFFFD"]]},
+"output":["ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"Hexadecimal entity pair representing a surrogate pair",
+"input":"&#xD869;&#xDED6;",
+"output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]},

 {"description":"Numeric entity representing a Windows-1252 'codepoint'",
 "input":"&#137;",
@ -118,7 +122,7 @@

 {"description":"Null Byte Replacement",
 "input":"\u0000",
-"output":[["Character", "\ufffd"]]}
+"output":["ParseError", ["Character", "\ufffd"]]}

 ]}

--- a/vendor/plugins/HTML5lib/testdata/tree-construction/tests1.dat
+++ b/vendor/plugins/HTML5lib/testdata/tree-construction/tests1.dat
@ -285,6 +285,7 @@ Line1<br>Line2<br>Line3<br>Line4
 |     <div>
 |       <b>
 |         <marquee>
+|           <p>
 |           "X"

 #data
@ -330,6 +331,7 @@ Unexpected end of file
 |   <body>
 |     <p>
 |     <hr>
+|     <p>

 #data
 <select><b><option><select><option></b></select>X
@ -1369,13 +1371,14 @@ unexpected EOF
 <head></p><meta><p>
 #errors
 6: missing document type declaration
-10: unexpected p element end tag in head
+10: unexpected p element end tag
 #document
 | <html>
 |   <head>
-|     <meta>
 |   <body>
 |     <p>
+|     <meta>
+|     <p>

 #data
 <head></html><meta><p>
@ -1485,6 +1488,7 @@ unexpected EOF
 |     <div>
 |       <b>
 |         <marquee>
+|           <p>

 #data
 <script></script></div><title></title><p><p>
@ -1511,6 +1515,7 @@ unexpected EOF
 |   <body>
 |     <p>
 |     <hr>
+|     <p>

 #data
 <select><b><option><select><option></b></select>
@ -1807,6 +1812,7 @@ Unexpected EOF
 |   <head>
 |   <body>
 |     <br>
+|     <p>

 #data
 <table><tr></strong></b></em></i></u></strike></s></blink></tt></pre></big></small></font></select></h1></h2></h3></h4></h5></h6></body></br></a></img></title></span></style></script></table></th></td></tr></frame></area></link></param></hr></input></col></base></meta></basefont></bgsound></embed></spacer></p></dd></dt></caption></colgroup></tbody></tfoot></thead></address></blockquote></center></dir></div></dl></fieldset></listing></menu></ol></ul></li></nobr></wbr></form></button></marquee></object></html></frameset></head></iframe></image></isindex></noembed></noframes></noscript></optgroup></option></plaintext></textarea>
@ -1928,3 +1934,4 @@ Unexpected EOF
 |     <table>
 |       <tbody>
 |         <tr>
+|     <p>
--- a/vendor/plugins/HTML5lib/testdata/tree-construction/tests2.dat
+++ b/vendor/plugins/HTML5lib/testdata/tree-construction/tests2.dat
@ -777,3 +777,4 @@ Unexpected </p> end tag.
 |       <tbody>
 |         <tr>
 |           <td>
+|             <p>
--- a/vendor/plugins/HTML5lib/testdata/tree-construction/tests3.dat
+++ b/vendor/plugins/HTML5lib/testdata/tree-construction/tests3.dat
@ -61,7 +61,6 @@ No DOCTYPE

 #data
 <!DOCTYPE htML><html><head></head><body><pre>
-
 foo</pre></body></html>
 #errors
 #document
@ -72,10 +71,22 @@ foo</pre></body></html>
 |     <pre>
 |       "foo"

-
 #data
 <!DOCTYPE htML><html><head></head><body><pre>

+foo</pre></body></html>
+#errors
+#document
+| <!DOCTYPE htML>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "
+foo"
+
+#data
+<!DOCTYPE htML><html><head></head><body><pre>
 foo
 </pre></body></html>
 #errors
@ -183,7 +194,6 @@ y</pre></body></html>

 #data
 <!DOCTYPE htML><textarea>
-
 foo</textarea>
 #errors
 #document
@ -194,6 +204,20 @@ foo</textarea>
 |     <textarea>
 |       "foo"

+#data
+<!DOCTYPE htML><textarea>
+
+foo</textarea>
+#errors
+#document
+| <!DOCTYPE htML>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "
+foo"
+
 #data
 <!DOCTYPE htML><html><head></head><body><ul><li><div><p><li></ul></body></html>
 #errors
--- a/vendor/plugins/HTML5lib/testdata/tree-construction/tests4.dat
+++ b/vendor/plugins/HTML5lib/testdata/tree-construction/tests4.dat
@ -1,37 +1,49 @@
 #data
 direct div content
 #errors
-#document-fragment div
+#document-fragment
+div
+#document
 | "direct div content"

 #data
 direct textarea content
 #errors
-#document-fragment textarea
+#document-fragment
+textarea
+#document
 | "direct textarea content"

 #data
 textarea content with <em>pseudo</em> <foo>markup
 #errors
-#document-fragment textarea
+#document-fragment
+textarea
+#document
 | "textarea content with <em>pseudo</em> <foo>markup"

 #data
 this is &#x0043;DATA inside a <style> element
 #errors
-#document-fragment style
+#document-fragment
+style
+#document
 | "this is &#x0043;DATA inside a <style> element"

 #data
 </plaintext>
 #errors
-#document-fragment plaintext
+#document-fragment
+plaintext
+#document
 | "</plaintext>"

 #data
 setting html's innerHTML
 #errors
-#document-fragment html
+#document-fragment
+html
+#document
 | <head>
 | <body>
 |   "setting html's innerHTML"
@ -39,6 +51,8 @@ setting html's innerHTML
 #data
 <title>setting head's innerHTML</title>
 #errors
-#document-fragment head
+#document-fragment
+head
+#document
 | <title>
 |   "setting head's innerHTML"
--- a/vendor/plugins/HTML5lib/testdata/tree-construction/tests6.dat
+++ b/vendor/plugins/HTML5lib/testdata/tree-construction/tests6.dat
@ -27,3 +27,41 @@
 |   <head>
 |   <body>
 |     <meta>
+
+#data
+<!doctype HTml><form><div></form><div>
+#errors
+Form end tag ignored.
+Unexpected end of file.
+#document
+| <!DOCTYPE HTml>
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       <div>
+|         <div>
+
+#data
+<!doctype HTml><title>&amp;</title>
+#errors
+Unexpected end of file.
+#document
+| <!DOCTYPE HTml>
+| <html>
+|   <head>
+|     <title>
+|       "&"
+|   <body>
+
+#data
+<!doctype HTml><title><!--&amp;--></title>
+#errors
+Unexpected end of file.
+#document
+| <!DOCTYPE HTml>
+| <html>
+|   <head>
+|     <title>
+|       "<!--&amp;-->"
+|   <body>
--- a/vendor/plugins/HTML5lib/tests/preamble.rb
+++ b/vendor/plugins/HTML5lib/tests/preamble.rb
@ -1,81 +1,81 @@
-require 'test/unit'
-
-HTML5LIB_BASE = File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__)))) 
-
-if File.exists?(File.join(HTML5LIB_BASE, 'testdata'))
-  TESTDATA_DIR = File.join(HTML5LIB_BASE, 'testdata')
-else
-  TESTDATA_DIR = File.join(File.dirname(File.dirname(File.expand_path(__FILE__))), 'testdata')
-end
-
-$:.unshift File.join(File.dirname(File.dirname(__FILE__)),'lib')
-
-$:.unshift File.dirname(__FILE__)
-
-def html5lib_test_files(subdirectory)
-  Dir[File.join(TESTDATA_DIR, subdirectory, '*.*')]
-end
-
-begin
-  require 'rubygems'
-  require 'json'
-rescue LoadError
-  class JSON
-    def self.parse json
-      json.gsub!(/"\s*:/, '"=>')
-      json.gsub!(/\\u[0-9a-fA-F]{4}/) {|x| [x[2..-1].to_i(16)].pack('U')}
-      null = nil
-      eval json
-    end
-  end
-end
-
-module HTML5lib
-  module TestSupport
-    def self.startswith?(a, b)
-      b[0... a.length] == a
-    end
-
-    def self.parseTestcase(data)
-      innerHTML = nil
-      input = []
-      output = []
-      errors = []
-      currentList = input
-      data.split(/\n/).each do |line|
-        if !line.empty? and !startswith?("#errors", line) and
-          !startswith?("#document", line) and
-          !startswith?("#data", line) and
-          !startswith?("#document-fragment", line)
-
-          if currentList == output and startswith?("|", line)
-            currentList.push(line[2..-1])
-          else
-            currentList.push(line)
-          end
-        elsif line == "#errors"
-          currentList = errors
-        elsif line == "#document" or startswith?("#document-fragment", line)
-          if startswith?("#document-fragment", line)
-            innerHTML = line[19..-1]
-            raise AssertionError unless innerHTML
-          end
-          currentList = output
-        end
-      end
-      return innerHTML, input.join("\n"), output.join("\n"), errors
-    end
-
-    # convert the output of str(document) to the format used in the testcases
-    def convertTreeDump(treedump)
-      treedump.split(/\n/)[1..-1].map { |line| (line.length > 2 and line[0] == ?|) ? line[3..-1] : line }.join("\n")
-    end
-
-    def sortattrs(output)
-      output.gsub(/^(\s+)\w+=.*(\n\1\w+=.*)+/) do |match|
-         match.split("\n").sort.join("\n")
-      end
-    end
-
-  end
-end
+require 'test/unit'
+
+HTML5_BASE = File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__)))) 
+
+if File.exists?(File.join(HTML5_BASE, 'testdata'))
+  TESTDATA_DIR = File.join(HTML5_BASE, 'testdata')
+else
+  TESTDATA_DIR = File.join(File.dirname(File.dirname(File.expand_path(__FILE__))), 'testdata')
+end
+
+$:.unshift File.join(File.dirname(File.dirname(__FILE__)),'lib')
+
+$:.unshift File.dirname(__FILE__)
+
+def html5_test_files(subdirectory)
+  Dir[File.join(TESTDATA_DIR, subdirectory, '*.*')]
+end
+
+begin
+  require 'rubygems'
+  require 'json'
+rescue LoadError
+  class JSON
+    def self.parse json
+      json.gsub!(/"\s*:/, '"=>')
+      json.gsub!(/\\u[0-9a-fA-F]{4}/) {|x| [x[2..-1].to_i(16)].pack('U')}
+      null = nil
+      eval json
+    end
+  end
+end
+
+module HTML5
+  module TestSupport
+    # convert the output of str(document) to the format used in the testcases
+    def convertTreeDump(treedump)
+      treedump.split(/\n/)[1..-1].map { |line| (line.length > 2 and line[0] == ?|) ? line[3..-1] : line }.join("\n")
+    end
+
+    def sortattrs(output)
+      output.gsub(/^(\s+)\w+=.*(\n\1\w+=.*)+/) do |match|
+         match.split("\n").sort.join("\n")
+      end
+    end
+
+    class TestData
+      include Enumerable
+
+      def initialize(filename, sections)
+        @f = open(filename)
+        @sections = sections
+      end
+    
+      def each
+        data = {}
+        key=nil
+        @f.each_line do |line|
+          if line[0] == ?# and @sections.include?(line[1..-2])
+            heading = line[1..-2]
+            if data.any? and heading == @sections[0]
+              data[key].chomp!  #Remove trailing newline
+              yield normaliseOutput(data)
+              data = {}
+            end
+            key = heading
+            data[key]=""
+          elsif key
+            data[key] += line
+          end
+        end
+        yield normaliseOutput(data) if data
+      end
+        
+      def normaliseOutput(data)
+        #Remove trailing newlines
+        data.keys.each { |key| data[key].chomp! }
+        @sections.map {|heading| data[heading]}
+      end
+    end
+  end
+end
--- a/vendor/plugins/HTML5lib/tests/test_encoding.rb
+++ b/vendor/plugins/HTML5lib/tests/test_encoding.rb
@ -1,8 +1,10 @@
 require File.join(File.dirname(__FILE__), 'preamble')

-require 'html5lib/inputstream'
+require 'html5/inputstream'

 class Html5EncodingTestCase < Test::Unit::TestCase
+  include HTML5
+  include TestSupport

  begin
    require 'rubygems'
@ -10,23 +12,21 @@ class Html5EncodingTestCase < Test::Unit::TestCase

    def test_chardet
      file = File.open(File.join(TESTDATA_DIR, 'encoding', 'chardet', 'test_big5.txt'), 'r')
-      stream = HTML5lib::HTMLInputStream.new(file, :chardet => true)
+      stream = HTML5::HTMLInputStream.new(file, :chardet => true)
      assert_equal 'big5', stream.char_encoding.downcase
    rescue LoadError
      puts "chardet not found, skipping chardet tests"
    end
  end

-  html5lib_test_files('encoding').each do |test_file|        
+  html5_test_files('encoding').each do |test_file|        
    test_name = File.basename(test_file).sub('.dat', '').tr('-', '')

-    File.read(test_file).split("#data\n").each_with_index do |data, index|
-      next if data.empty?
-      input, encoding = data.split(/\n#encoding\s+/, 2)
-      encoding = encoding.split[0]
+    TestData.new(test_file, %w(data encoding)).
+      each_with_index do |(input, encoding), index|

      define_method 'test_%s_%d' % [ test_name, index + 1 ] do
-        stream = HTML5lib::HTMLInputStream.new(input, :chardet => false)
+        stream = HTML5::HTMLInputStream.new(input, :chardet => false)
        assert_equal encoding.downcase, stream.char_encoding.downcase, input
      end
    end
--- a/vendor/plugins/HTML5lib/tests/test_lxp.rb
+++ b/vendor/plugins/HTML5lib/tests/test_lxp.rb
@ -1,23 +1,23 @@
 require File.join(File.dirname(__FILE__), 'preamble')

-require 'html5lib/liberalxmlparser'
+require 'html5/liberalxmlparser'

 XMLELEM = /<(\w+\s*)((?:[-:\w]+="[^"]*"\s*)+)(\/?)>/
-SORTATTRS = '<#{$1+$2.split.sort.join(' ')+$3}>'

-def assert_xml_equal(input, expected=nil, parser=HTML5lib::XMLParser)
+def assert_xml_equal(input, expected=nil, parser=HTML5::XMLParser)
+  sortattrs = proc {"<#{$1+$2.split.sort.join(' ')+$3}>"}
  document = parser.parse(input.chomp).root
  if not expected
-    expected = input.chomp.gsub(XMLELEM,SORTATTRS)
+    expected = input.chomp.gsub(XMLELEM,&sortattrs)
    expected = expected.gsub(/&#(\d+);/) {[$1.to_i].pack('U')}
-    output = document.to_s.gsub(/'/,'"').gsub(XMLELEM,SORTATTRS)
+    output = document.to_s.gsub(/'/,'"').gsub(XMLELEM,&sortattrs)
    assert_equal(expected, output)
  else
    assert_equal(expected, document.to_s.gsub(/'/,'"'))
  end
 end

-def assert_xhtml_equal(input, expected=nil, parser=HTML5lib::XHTMLParser)
+def assert_xhtml_equal(input, expected=nil, parser=HTML5::XHTMLParser)
  assert_xml_equal(input, expected, parser)
 end

@ -34,10 +34,10 @@ class BasicXhtml5Test < Test::Unit::TestCase

  def test_title_body_named_charref
    assert_xhtml_equal(
-      '<title>mdash</title>A &mdash B',
+      '<title>ntilde</title>A &ntilde B',
      '<html xmlns="http://www.w3.org/1999/xhtml">' +
-      '<head><title>mdash</title></head>' + 
-      '<body>A '+ [0x2014].pack('U') + ' B</body>' +
+      '<head><title>ntilde</title></head>' + 
+      '<body>A '+ [0xF1].pack('U') + ' B</body>' +
      '</html>')
  end
 end
@ -193,20 +193,71 @@ EOX
  def test_br
    assert_xhtml_equal <<EOX1
 <html xmlns="http://www.w3.org/1999/xhtml">
-<head><title>XLINK</title></head>
+<head><title>BR</title></head>
 <body>
 <br/>
 </body></html>
 EOX1
  end

-  def xtest_strong
+  def test_strong
    assert_xhtml_equal <<EOX
 <html xmlns="http://www.w3.org/1999/xhtml">
-<head><title>XLINK</title></head>
+<head><title>STRONG</title></head>
 <body>
 <strong></strong>
 </body></html>
 EOX
  end
+
+  def test_script
+    assert_xhtml_equal <<EOX
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><title>SCRIPT</title></head>
+<body>
+<script>1 &lt; 2 &amp; 3</script>
+</body></html>
+EOX
+  end
+
+  def test_script_src
+    assert_xhtml_equal <<EOX1, <<EOX2.strip
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><title>SCRIPT</title><script src="http://example.com"/></head>
+<body>
+<script>1 &lt; 2 &amp; 3</script>
+</body></html>
+EOX1
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><title>SCRIPT</title><script src="http://example.com"></script></head>
+<body>
+<script>1 &lt; 2 &amp; 3</script>
+</body></html>
+EOX2
+  end
+
+  def test_title
+    assert_xhtml_equal <<EOX
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><title>1 &lt; 2 &amp; 3</title></head>
+<body>
+</body></html>
+EOX
+  end
+
+  def test_prolog
+    assert_xhtml_equal <<EOX1, <<EOX2.strip
+<?xml version="1.0" encoding="UTF-8" ?>
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><title>PROLOG</title></head>
+<body>
+</body></html>
+EOX1
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><title>PROLOG</title></head>
+<body>
+</body></html>
+EOX2
+  end
+
 end
--- a/vendor/plugins/HTML5lib/tests/test_parser.rb
+++ b/vendor/plugins/HTML5lib/tests/test_parser.rb
@ -1,7 +1,7 @@
 require File.join(File.dirname(__FILE__), 'preamble')

-require 'html5lib/treebuilders'
-require 'html5lib/html5parser'
+require 'html5/treebuilders'
+require 'html5/html5parser'


 $tree_types_to_test = ['simpletree', 'rexml']
@ -18,18 +18,17 @@ puts 'Testing tree builders: ' + $tree_types_to_test * ', '


 class Html5ParserTestCase < Test::Unit::TestCase
-  include HTML5lib
+  include HTML5
  include TestSupport

-  html5lib_test_files('tree-construction').each do |test_file|
+  html5_test_files('tree-construction').each do |test_file|

    test_name = File.basename(test_file).sub('.dat', '')

-    File.read(test_file).split("#data\n").each_with_index do |data, index|
-      next if data.empty?
-     
-      innerHTML, input, expected_output, expected_errors =
-        TestSupport.parseTestcase(data)
+    TestData.new(test_file, %w(data errors document-fragment document)).
+      each_with_index do |(input, errors, innerHTML, expected), index|
+
+      expected = expected.gsub("\n| ","\n")[2..-1]

      $tree_types_to_test.each do |tree_name|
        define_method 'test_%s_%d_%s' % [ test_name, index + 1, tree_name ] do
@ -44,9 +43,9 @@ class Html5ParserTestCase < Test::Unit::TestCase
        
          actual_output = convertTreeDump(parser.tree.testSerializer(parser.tree.document))

-          assert_equal sortattrs(expected_output), sortattrs(actual_output), [
+          assert_equal sortattrs(expected), sortattrs(actual_output), [
            '', 'Input:', input,
-            '', 'Expected:', expected_output,
+            '', 'Expected:', expected,
            '', 'Recieved:', actual_output
          ].join("\n")

@ -54,9 +53,9 @@ class Html5ParserTestCase < Test::Unit::TestCase
            actual_errors = parser.errors.map do |(line, col), message|
              'Line: %i Col: %i %s' % [line, col, message]
            end
-            assert_equal expected_errors.length, parser.errors.length, [
+            assert_equal errors.length, parser.errors.length, [
              'Input', input + "\n",
-              'Expected errors:', expected_errors.join("\n"),
+              'Expected errors:', errors.join("\n"),
              'Actual errors:', actual_errors.join("\n") 
            ].join("\n")
          end
--- a/vendor/plugins/HTML5lib/tests/test_sanitizer.rb
+++ b/vendor/plugins/HTML5lib/tests/test_sanitizer.rb
@ -2,14 +2,14 @@

 require File.join(File.dirname(__FILE__), 'preamble')

-require 'html5lib/html5parser'
-require 'html5lib/liberalxmlparser'
-require 'html5lib/treewalkers'
-require 'html5lib/serializer'
-require 'html5lib/sanitizer'
+require 'html5/html5parser'
+require 'html5/liberalxmlparser'
+require 'html5/treewalkers'
+require 'html5/serializer'
+require 'html5/sanitizer'

 class SanitizeTest < Test::Unit::TestCase
-  include HTML5lib
+  include HTML5

  def sanitize_xhtml stream
    XHTMLParser.parseFragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8'}).to_s
@ -131,7 +131,7 @@ class SanitizeTest < Test::Unit::TestCase
 #    check_sanitization(input, output, output, output)
 #  end

-  html5lib_test_files('sanitizer').each do |filename|
+  html5_test_files('sanitizer').each do |filename|
    JSON::parse(open(filename).read).each do |test|
      define_method "test_#{test['name']}" do
        check_sanitization(
--- a/vendor/plugins/HTML5lib/tests/test_serializer.rb
+++ b/vendor/plugins/HTML5lib/tests/test_serializer.rb
@ -1,13 +1,13 @@
 require File.join(File.dirname(__FILE__), 'preamble')

-require 'html5lib/html5parser'
-require 'html5lib/serializer'
-require 'html5lib/treewalkers'
+require 'html5/html5parser'
+require 'html5/serializer'
+require 'html5/treewalkers'

 #Run the serialize error checks
 checkSerializeErrors = false

-class JsonWalker < HTML5lib::TreeWalkers::Base
+class JsonWalker < HTML5::TreeWalkers::Base
  def each
    @tree.each do |token|
      case token[0]
@ -31,7 +31,7 @@ class JsonWalker < HTML5lib::TreeWalkers::Base
 end

 class Html5SerializeTestcase < Test::Unit::TestCase
-  html5lib_test_files('serializer').each do |filename|
+  html5_test_files('serializer').each do |filename|
    test_name = File.basename(filename).sub('.test', '')
    tests = JSON::parse(open(filename).read)
    tests['tests'].each_with_index do |test, index|
@ -41,7 +41,7 @@ class Html5SerializeTestcase < Test::Unit::TestCase
          test["options"][:encoding] = test["options"]["encoding"]
        end

-        result = HTML5lib::HTMLSerializer.
+        result = HTML5::HTMLSerializer.
          serialize(JsonWalker.new(test["input"]), (test["options"] || {}))
        expected = test["expected"]
        if expected.length == 1
@ -52,7 +52,7 @@ class Html5SerializeTestcase < Test::Unit::TestCase

        return if test_name == 'optionaltags'

-        result = HTML5lib::XHTMLSerializer.
+        result = HTML5::XHTMLSerializer.
          serialize(JsonWalker.new(test["input"]), (test["options"] || {}))
        expected = test["xhtml"] || test["expected"]
        if expected.length == 1
--- a/vendor/plugins/HTML5lib/tests/test_stream.rb
+++ b/vendor/plugins/HTML5lib/tests/test_stream.rb
@ -1,9 +1,9 @@
 require File.join(File.dirname(__FILE__), 'preamble')

-require 'html5lib/inputstream'
+require 'html5/inputstream'

 class HTMLInputStreamTest < Test::Unit::TestCase
-  include HTML5lib
+  include HTML5

  def test_char_ascii
    stream = HTMLInputStream.new("'", :encoding=>'ascii')
--- a/vendor/plugins/HTML5lib/tests/test_tokenizer.rb
+++ b/vendor/plugins/HTML5lib/tests/test_tokenizer.rb
@ -1,6 +1,6 @@
 require File.join(File.dirname(__FILE__), 'preamble')

-require 'html5lib/tokenizer'
+require 'html5/tokenizer'

 require 'tokenizer_test_parser'

@ -36,7 +36,7 @@ class Html5TokenizerTestCase < Test::Unit::TestCase
        '' ] * "\n"

      assert_nothing_raised message do
-        tokenizer = HTML5lib::HTMLTokenizer.new(data['input'])
+        tokenizer = HTML5::HTMLTokenizer.new(data['input'])

        tokenizer.contentModelFlag = content_model_flag.to_sym

@ -53,7 +53,7 @@ class Html5TokenizerTestCase < Test::Unit::TestCase
    end 
  end

-  html5lib_test_files('tokenizer').each do |test_file|
+  html5_test_files('tokenizer').each do |test_file|
    test_name = File.basename(test_file).sub('.test', '')

    tests = JSON.parse(File.read(test_file))['tests']
--- a/vendor/plugins/HTML5lib/tests/test_treewalkers.rb
+++ b/vendor/plugins/HTML5lib/tests/test_treewalkers.rb
@ -1,25 +1,25 @@
 require File.join(File.dirname(__FILE__), 'preamble')

-require 'html5lib/html5parser'
-require 'html5lib/treewalkers'
-require 'html5lib/treebuilders'
+require 'html5/html5parser'
+require 'html5/treewalkers'
+require 'html5/treebuilders'

 $tree_types_to_test = {
  'simpletree' =>
-    {:builder => HTML5lib::TreeBuilders['simpletree'],
-     :walker  => HTML5lib::TreeWalkers['simpletree']},
+    {:builder => HTML5::TreeBuilders['simpletree'],
+     :walker  => HTML5::TreeWalkers['simpletree']},
  'rexml' =>
-    {:builder => HTML5lib::TreeBuilders['rexml'],
-     :walker  => HTML5lib::TreeWalkers['rexml']},
+    {:builder => HTML5::TreeBuilders['rexml'],
+     :walker  => HTML5::TreeWalkers['rexml']},
  'hpricot' =>
-    {:builder => HTML5lib::TreeBuilders['hpricot'],
-     :walker  => HTML5lib::TreeWalkers['hpricot']},
+    {:builder => HTML5::TreeBuilders['hpricot'],
+     :walker  => HTML5::TreeWalkers['hpricot']},
 }

 puts 'Testing tree walkers: ' + $tree_types_to_test.keys * ', '

 class TestTreeWalkers < Test::Unit::TestCase
-  include HTML5lib::TestSupport
+  include HTML5::TestSupport

  def concatenateCharacterTokens(tokens)
    charactersToken = nil
@ -70,22 +70,21 @@ class TestTreeWalkers < Test::Unit::TestCase
    return output.join("\n")
  end

-  html5lib_test_files('tree-construction').each do |test_file|
+  html5_test_files('tree-construction').each do |test_file|

    test_name = File.basename(test_file).sub('.dat', '')
    next if test_name == 'tests5' # TODO

-    File.read(test_file).split("#data\n").each_with_index do |data, index|
-      next if data.empty?
+    TestData.new(test_file, %w(data errors document-fragment document)).
+      each_with_index do |(input, errors, innerHTML, expected), index|

-      innerHTML, input, expected_output, expected_errors =
-        HTML5lib::TestSupport::parseTestcase(data)
+      expected = expected.gsub("\n| ","\n")[2..-1]

      $tree_types_to_test.each do |tree_name, tree_class|

        define_method "test_#{test_name}_#{index}_#{tree_name}" do

-          parser = HTML5lib::HTMLParser.new(:tree => tree_class[:builder])
+          parser = HTML5::HTMLParser.new(:tree => tree_class[:builder])

          if innerHTML
            parser.parseFragment(input, innerHTML)
@ -97,7 +96,7 @@ class TestTreeWalkers < Test::Unit::TestCase

          begin
            output = sortattrs(convertTokens(tree_class[:walker].new(document)))
-            expected = sortattrs(expected_output)
+            expected = sortattrs(expected)
            assert_equal expected, output, [
              '', 'Input:', input,
              '', 'Expected:', expected,
--- a/vendor/plugins/HTML5lib/tests/tokenizer_test_parser.rb
+++ b/vendor/plugins/HTML5lib/tests/tokenizer_test_parser.rb
@ -1,63 +1,63 @@
-require 'html5lib/constants'
-
-class TokenizerTestParser
-  def initialize(tokenizer)
-    @tokenizer = tokenizer
-  end
-
-  def parse
-    @outputTokens = []
-
-    debug = nil
-    for token in @tokenizer
-      debug = token.inspect if token[:type] == :ParseError
-      send(('process' + token[:type].to_s), token)
-    end
-
-    return @outputTokens
-  end
-
-  def processDoctype(token)
-    @outputTokens.push(["DOCTYPE", token[:name], token[:publicId],
-      token[:systemId], token[:correct]])
-  end
-
-  def processStartTag(token)
-    @outputTokens.push(["StartTag", token[:name], token[:data]])
-  end
-
-  def processEmptyTag(token)
-    if not HTML5lib::VOID_ELEMENTS.include? token[:name]
-      @outputTokens.push("ParseError")
-    end
-    @outputTokens.push(["StartTag", token[:name], token[:data]])
-  end
-
-  def processEndTag(token)
-    if token[:data].length > 0
-      self.processParseError(token)
-    end
-    @outputTokens.push(["EndTag", token[:name]])
-  end
-
-  def processComment(token)
-    @outputTokens.push(["Comment", token[:data]])
-  end
-
-  def processCharacters(token)
-    @outputTokens.push(["Character", token[:data]])
-  end
-
-  alias processSpaceCharacters processCharacters
-
-  def processCharacters(token)
-    @outputTokens.push(["Character", token[:data]])
-  end
-
-  def processEOF(token)
-  end
-
-  def processParseError(token)
-    @outputTokens.push("ParseError")
-  end
-end
+require 'html5/constants'
+
+class TokenizerTestParser
+  def initialize(tokenizer)
+    @tokenizer = tokenizer
+  end
+
+  def parse
+    @outputTokens = []
+
+    debug = nil
+    for token in @tokenizer
+      debug = token.inspect if token[:type] == :ParseError
+      send(('process' + token[:type].to_s), token)
+    end
+
+    return @outputTokens
+  end
+
+  def processDoctype(token)
+    @outputTokens.push(["DOCTYPE", token[:name], token[:publicId],
+      token[:systemId], token[:correct]])
+  end
+
+  def processStartTag(token)
+    @outputTokens.push(["StartTag", token[:name], token[:data]])
+  end
+
+  def processEmptyTag(token)
+    if not HTML5::VOID_ELEMENTS.include? token[:name]
+      @outputTokens.push("ParseError")
+    end
+    @outputTokens.push(["StartTag", token[:name], token[:data]])
+  end
+
+  def processEndTag(token)
+    if token[:data].length > 0
+      self.processParseError(token)
+    end
+    @outputTokens.push(["EndTag", token[:name]])
+  end
+
+  def processComment(token)
+    @outputTokens.push(["Comment", token[:data]])
+  end
+
+  def processCharacters(token)
+    @outputTokens.push(["Character", token[:data]])
+  end
+
+  alias processSpaceCharacters processCharacters
+
+  def processCharacters(token)
+    @outputTokens.push(["Character", token[:data]])
+  end
+
+  def processEOF(token)
+  end
+
+  def processParseError(token)
+    @outputTokens.push("ParseError")
+  end
+end
--- a/vendor/plugins/maruku/lib/maruku/input/linesource.rb
+++ b/vendor/plugins/maruku/lib/maruku/input/linesource.rb
@ -28,6 +28,7 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
 	
 class LineSource
 	include MaRuKu::Strings
+	attr_reader :parent
 	
 	def initialize(lines, parent=nil, parent_offset=nil)
 		raise "NIL lines? " if not lines
--- a/vendor/plugins/maruku/lib/maruku/input/parse_block.rb
+++ b/vendor/plugins/maruku/lib/maruku/input/parse_block.rb
@ -65,22 +65,8 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
 				when :ald
 					output.push read_ald(src)
 				when :text
-					if src.cur_line =~ MightBeTableHeader and 
-						(src.next_line && src.next_line =~ TableSeparator)
-						output.push read_table(src)
-					elsif [:header1,:header2].include? src.next_line.md_type
-						output.push read_header12(src)
-					elsif eventually_comes_a_def_list(src)
-					 	definition = read_definition(src)
-						if output.last.kind_of?(MDElement) && 
-							output.last.node_type == :definition_list then
-							output.last.children << definition
-						else
-							output.push md_el(:definition_list, [definition])
-						end
-					else # Start of a paragraph
-						output.push read_paragraph(src)
-					end
+					# paragraph, or table, or definition list
+					read_text_material(src, output)
 				when :header2, :hrule
 					# hrule
 					src.shift_line
@ -102,7 +88,12 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
 				when :raw_html; e = read_raw_html(src); output << e if e

 				when :footnote_text;   output.push read_footnote_text(src)
-				when :ref_definition;  read_ref_definition(src, output)
+				when :ref_definition;  
+					if src.parent && (src.cur_index == 0)
+						read_text_material(src, output)
+					else
+						read_ref_definition(src, output)
+					end
 				when :abbreviation;    output.push read_abbreviation(src)
 				when :xml_instr;       read_xml_instruction(src, output)
 				when :metadata;        
@ -149,6 +140,24 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
 		output
 	end
 	
+	def read_text_material(src, output)
+		if src.cur_line =~ MightBeTableHeader and 
+			(src.next_line && src.next_line =~ TableSeparator)
+			output.push read_table(src)
+		elsif [:header1,:header2].include? src.next_line.md_type
+			output.push read_header12(src)
+		elsif eventually_comes_a_def_list(src)
+		 	definition = read_definition(src)
+			if output.last.kind_of?(MDElement) && 
+				output.last.node_type == :definition_list then
+				output.last.children << definition
+			else
+				output.push md_el(:definition_list, [definition])
+			end
+		else # Start of a paragraph
+			output.push read_paragraph(src)
+		end
+	end
 	
 	
 	def read_ald(src)
@ -274,9 +283,9 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
 		item_type = src.cur_line.md_type
 		first = src.shift_line

-		# Ugly things going on inside `read_indented_content`
 		indentation = spaces_before_first_char(first)
 		break_list = [:ulist, :olist, :ial]
+		# Ugly things going on inside `read_indented_content`
 		lines, want_my_paragraph = 
 			read_indented_content(src,indentation, break_list, item_type)

@ -285,7 +294,7 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
 			stripped = first[indentation, first.size-1]
 		lines.unshift stripped
 		
-		#dbg_describe_ary(lines, 'LIST ITEM ')
+		# dbg_describe_ary(lines, 'LIST ITEM ')

 		src2 = LineSource.new(lines, src, parent_offset)
 		children = parse_blocks(src2)