Merged with Jacques' latest changes.

2007-09-03 09:14:51 -04:00 · 2007-09-03 09:14:51 -04:00 · b96ff30026
commit b96ff30026
parent 3070d6eeae 5ff1b7f6da
111 changed files with 12210 additions and 3632 deletions
--- a/app/controllers/application.rb
+++ b/app/controllers/application.rb
@ -152,8 +152,7 @@ class ApplicationController < ActionController::Base
    elsif %w(tex).include?(action_name)
      response.headers['Content-Type'] = 'text/plain; charset=UTF-8'
    elsif request.env['HTTP_USER_AGENT'] =~ /Validator/ or request.env.include?('HTTP_ACCEPT') &&
-           Mime::Type.parse(request.env["HTTP_ACCEPT"]).include?(Mime::XHTML) && 
-           !(request.env['HTTP_USER_AGENT'] =~ /Safari/ and  %w(s5).include?(action_name))
+           Mime::Type.parse(request.env["HTTP_ACCEPT"]).include?(Mime::XHTML)  
      response.headers['Content-Type'] = 'application/xhtml+xml; charset=UTF-8'
    elsif request.env['HTTP_USER_AGENT'] =~ /MathPlayer/ 
      response.headers['Content-Type'] = 'application/xhtml+xml'
--- a/app/views/wiki/atom.rxml
+++ b/app/views/wiki/atom.rxml
@ -18,7 +18,7 @@ xml.feed('xmlns' => "http://www.w3.org/2005/Atom", "xml:lang" => 'en') do
        xml.name(page.author)
      end
      if @hide_description
-        xml.summary('Content suppressed.', 'type' => 'text')
+        xml.summary("Updated by #{page.author} on #{page.updated_at.getgm.strftime("%Y-%m-%d")} at #{page.updated_at.getgm.strftime("%H:%M:%SZ")}.", 'type' => 'text')
      else
        xml.content('type' => 'xhtml', 'xml:base' => url_for(:only_path => false, :web => @web_name, :action => @link_action, :id => page.name) ) do
          xml.div('xmlns' => 'http://www.w3.org/1999/xhtml' ) do
--- a/app/views/wiki/tex.rhtml
+++ b/app/views/wiki/tex.rhtml
@ -11,6 +11,16 @@
 %----Macros----------
 \newcommand{\gt}{>}
 \newcommand{\lt}{<}
+\newcommand{\darr}{\downarrow}
+\newcommand{\nearr}{\nearrow}
+\newcommand{\nwarr}{\nwarrow}
+\newcommand{\searr}{\searrow}
+\newcommand{\swarr}{\swarrow}
+\newcommand{\iff}{\Longleftrightarrow}
+\newcommand{\impliedby}{\Leftarrow}
+\newcommand{\map}{\mapsto}
+\newcommand{\embedsin}{\hookrightarrow}
+\newcommand{\implies}{\Rightarrow}
 \newcommand{\qed}{\blacksquare}

 %-------------------------------------------------------------------
--- a/lib/chunks/category.rb
+++ b/lib/chunks/category.rb
@ -16,7 +16,7 @@ class Category < Chunk::Abstract
 def initialize(match_data, content)
    super(match_data, content)
    @hidden = match_data[1]
-    @list = match_data[2].split(',').map { |c| c.strip }
+    @list = match_data[2].split(',').map { |c| html_escape(c.strip) }
    @unmask_text = ''
    if @hidden
      @unmask_text = ''
--- a/lib/chunks/chunk.rb
+++ b/lib/chunks/chunk.rb
@ -74,6 +74,13 @@ module Chunk
      @content.delete_chunk(self)
    end

+    def  html_escape(string)
+      string.gsub( /&/, "&amp;" ).
+             gsub( /</, "&lt;" ).
+             gsub( />/, "&gt;" ).
+             gsub( /"/, "&quot;" )
+    end
+
  end

 end
--- a/lib/sanitize.rb
+++ b/lib/sanitize.rb
@ -25,14 +25,14 @@

 module Sanitize

-  require 'html5lib/html5parser'
-  require 'html5lib/liberalxmlparser'
-  require 'html5lib/treewalkers'
-  require 'html5lib/treebuilders'
-  require 'html5lib/serializer'
-  require 'html5lib/sanitizer'
+  require 'html5/html5parser'
+  require 'html5/liberalxmlparser'
+  require 'html5/treewalkers'
+  require 'html5/treebuilders'
+  require 'html5/serializer'
+  require 'html5/sanitizer'

-  include HTML5lib
+  include HTML5

 # Sanitize a string, parsed using XHTML parsing rules.
 #
@ -52,12 +52,12 @@ module Sanitize
    options.each do |name, value|
      next unless %w(encoding treebuilder to_tree).include? name.to_s
      if name.to_s == 'treebuilder'
-        @treebuilder =  HTML5lib::TreeBuilders.getTreeBuilder(value)
+        @treebuilder =  HTML5lib::TreeBuilders.get_tree_builder(value)
      else
        instance_variable_set("@#{name}", value)
      end
    end
-    parsed = XHTMLParser.parseFragment(html.to_ncr, {:tokenizer => HTMLSanitizer,
+    parsed = XHTMLParser.parse_fragment(html.to_ncr, {:tokenizer => HTMLSanitizer,
      :encoding => @encoding, :tree => @treebuilder })
    return parsed if @to_tree
    return parsed.to_s
@ -81,12 +81,12 @@ module Sanitize
    options.each do |name, value|
      next unless %w(encoding treebuilder to_tree).include? name.to_s
      if name.to_s == 'treebuilder'
-        @treebuilder =  HTML5lib::TreeBuilders.getTreeBuilder(value)
+        @treebuilder =  HTML5lib::TreeBuilders.get_tree_builder(value)
      else
        instance_variable_set("@#{name}", value)
      end
    end
-    parsed = HTMLParser.parseFragment(html.to_ncr, {:tokenizer => HTMLSanitizer,
+    parsed = HTMLParser.parse_fragment(html.to_ncr, {:tokenizer => HTMLSanitizer,
      :encoding => @encoding, :tree => @treebuilder })
    return parsed if @to_tree
    return parsed.to_s
@ -98,13 +98,9 @@ module Sanitize
 #    sanitize_rexml(tree)                    -> string
 #
  def sanitize_rexml(tree)
-    tokens = TreeWalkers.getTreeWalker('rexml').new(tree.to_ncr)
-    HTMLSerializer.serialize(tokens, {:encoding=>'utf-8',
-      :quote_attr_values => true,
-      :minimize_boolean_attributes => false,
-      :use_trailing_solidus => true,
+    tokens = TreeWalkers.get_tree_walker('rexml').new(tree.to_ncr)
+    XHTMLSerializer.serialize(tokens, {:encoding=>'utf-8',
      :space_before_trailing_solidus => true,
-      :omit_optional_tags => false,
      :inject_meta_charset => false,
      :sanitize => true})
  end
--- a/public/s5/ui/default/math.css
+++ b/public/s5/ui/default/math.css
@ -16,4 +16,4 @@ table.plaintable {
    text-align:center;
    margin-left:30px;
 }
-
+.noborder td, .noborder th {border:0}
--- a/public/s5/ui/default/pretty.css
+++ b/public/s5/ui/default/pretty.css
@ -1,6 +1,6 @@
 /* Following are the presentation styles -- edit away! */

-body {background: #FFF; color: #000; font-size: 2em;}
+body {background: #FFF; color: #000; font-size: 1.6em;}
 :link, :visited {text-decoration: none; color: #00C;}
 #controls :active {color: #8A8 !important;}
 #controls :focus {outline: 1px dotted #272;}
--- a/public/s5/ui/default/slides.js
+++ b/public/s5/ui/default/slides.js
@ -1,4 +1,5 @@
-// S5 v1.2a1 slides.js -- released into the Public Domain
+// S5 v1.2a2 slides.js -- released into the Public Domain
+// Many modifications by Jacques Distler to allow operation as real XHTML.
 //
 // Please see http://www.meyerweb.com/eric/tools/s5/credits.html for information 
 // about all the wonderful and talented contributors to this code!
@ -30,6 +31,7 @@ var countdown = {

 var isIE = navigator.appName == 'Microsoft Internet Explorer' && navigator.userAgent.indexOf('Opera') < 1 ? 1 : 0;
 var isOp = navigator.userAgent.indexOf('Opera') > -1 ? 1 : 0;
+var isSa = navigator.userAgent.indexOf('Safari') > -1 ? 1 : 0;
 var isGe = navigator.userAgent.indexOf('Gecko') > -1 && navigator.userAgent.indexOf('Safari') < 1 ? 1 : 0;

 function hasClass(object, className) {
@ -111,7 +113,14 @@ function slideLabel() {
 		for (var o = 0; o < menunodes.length; o++) {
 			otext += nodeValue(menunodes[o]);
 		}
-		list.options[list.length] = new Option(n + ' : '  + otext, n);
+               if (isSa) {
+		  var option = createElement('option');
+		  option.setAttribute('value', n);
+		  option.appendChild(document.createTextNode(n + ' : '  + otext) );
+		  list.appendChild(option);
+                } else {
+		  list.options[list.length] = new Option(n + ' : '  + otext, n);
+                }
 	}
 }

@ -122,12 +131,12 @@ function currentSlide() {
 	} else {
 		cs = document.currentSlide;
 	}
-	var plink = document.createElement('a');
+	var plink = createElement('a');
 	plink.id = 'plink';
 	plink.setAttribute('href', '');
-	var csHere = document.createElement('span');
-	var csSep = document.createElement('span');
-	var csTotal = document.createElement('span');
+	var csHere = createElement('span');
+	var csSep = createElement('span');
+	var csTotal = createElement('span');
 	csHere.id = 'csHere';
 	csSep.id = 'csSep';
 	csTotal.id = 'csTotal';
@ -376,7 +385,7 @@ function slideJump() {
 function fixLinks() {
 	var thisUri = window.location.href;
 	thisUri = thisUri.slice(0, thisUri.length - window.location.hash.length);
-	var aelements = document.getElementsByTagName('A');
+	var aelements = document.getElementsByTagName('a');
 	for (var i = 0; i < aelements.length; i++) {
 		var a = aelements[i].href;
 		var slideID = a.match('\#slide[0-9]{1,2}');
@ -418,43 +427,43 @@ function permaLink() {
 function createControls() {
 	var controlsDiv = document.getElementById("controls");
 	if (!controlsDiv) return;
-	var controlForm = document.createElement('form');
+	var controlForm = createElement('form');
 	controlForm.id = 'controlForm';
 	controlForm.setAttribute('action', '#');
 	if (controlVis == 'hidden') {
 	   controlForm.setAttribute('onmouseover', 'showHide(\'s\');');
 	   controlForm.setAttribute('onmouseout',  'showHide(\'h\');');
 	}
-	var navLinks = document.createElement('div');
+	var navLinks = createElement('div');
 	navLinks.id = 'navLinks';
-	var showNotes = document.createElement('a');
+	var showNotes = createElement('a');
 	showNotes.id = 'show-notes';
 	showNotes.setAttribute('accesskey', 'n');
 	showNotes.setAttribute('href', 'javascript:createNotesWindow();');
 	showNotes.setAttribute('title', 'Show Notes');
 	showNotes.appendChild(document.createTextNode('\u2261'));
-	var toggle =  document.createElement('a');
+	var toggle =  createElement('a');
 	toggle.id = 'toggle';
 	toggle.setAttribute('accesskey', 't');
 	toggle.setAttribute('href', 'javascript:toggle();');
 	toggle.appendChild(document.createTextNode('\u00D8'));
-    var prev =  document.createElement('a');
+    var prev =  createElement('a');
 	prev.id = 'prev';
 	prev.setAttribute('accesskey', 'z');
 	prev.setAttribute('href', 'javascript:go(-1);');
 	prev.appendChild(document.createTextNode('\u00AB'));
-    var next =  document.createElement('a');
+    var next =  createElement('a');
 	next.id = 'next';
 	next.setAttribute('accesskey', 'x');
 	next.setAttribute('href', 'javascript:go(1);');
 	next.appendChild(document.createTextNode('\u00BB'));	
-	var navList =  document.createElement('div');
+	var navList =  createElement('div');
 	navList.id = 'navList';
 	if (controlVis != 'hidden') {
 	   navList.setAttribute('onmouseover', 'showHide(\'s\');');
 	   navList.setAttribute('onmouseout',  'showHide(\'h\');');
 	}
-	var jumplist = document.createElement('select');
+	var jumplist = createElement('select');
 	jumplist.id = 'jumplist';
 	jumplist.setAttribute('onchange', 'go(\'j\');');
 	navList.appendChild(jumplist);
@ -503,7 +512,7 @@ function fontScale() {  // causes layout problems in FireFox that get fixed if b
 function fontSize(value) {
 	if (!(s5ss = document.getElementById('s5ss'))) {
 		if (!document.createStyleSheet) {
-			document.getElementsByTagName('head')[0].appendChild(s5ss = document.createElement('style'));
+			document.getElementsByTagName('head')[0].appendChild(s5ss = createElement('style'));
 			s5ss.setAttribute('media','screen, projection');
 			s5ss.setAttribute('id','s5ss');
 		} else {
@ -784,6 +793,14 @@ function readTime(val) {
 	}
 }

+function createElement(element) {
+  if (typeof document.createElementNS != 'undefined') {
+    return document.createElementNS('http://www.w3.org/1999/xhtml', element);
+  } else {
+    return document.createElement(element);
+  }
+}
+
 function windowChange() {
 	fontScale();
 }
--- a/public/s5/ui/s5-notes.xhtml
+++ b/public/s5/ui/s5-notes.xhtml
@ -0,0 +1,64 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN" "http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f.dtd" >
+
+<!--  Do not edit this document! The system will likely break if you do. -->
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<title>Notes</title>
+<link rel="stylesheet" href="default/notes.css" type="text/css" />
+<script type="text/javascript">
+// <![CDATA[
+	document.onkeyup = opener.keys;
+	document.onkeypress = opener.trap;
+	document.onclick = opener.clicker;
+// ]]>
+</script>
+</head>
+
+<body onload="opener.s5NotesWindowLoaded=true;" onunload="opener.s5NotesWindowLoaded=false;">
+
+
+<div class="timers" id="elapsed">
+<h1>
+<a href="#" onclick="opener.minimizeTimer('elapsed'); return false;">Elapsed Time</a>
+</h1>
+<ul>
+<li>
+<h2>Presentation</h2>
+<span class="clock" id="elapsed-presentation">00:00:00</span>
+</li>
+<li>
+<h2>Current Slide</h2>
+<span class="clock" id="elapsed-slide">00:00:00</span>
+</li>
+</ul>
+<div class="controls">
+<a href="#reset-elapsed" onclick="opener.resetElapsedTime(); return false;" title="Reset Elapsed Time">|&larr;</a>
+</div>
+</div>
+
+<div class="timers" id="remaining">
+<h1>
+<a href="#" onclick="opener.minimizeTimer('remaining'); return false;">Remaining Time</a>
+</h1>
+<p>
+<a href="#subtract-remaining" class="control" id="minus" onclick="opener.alterRemainingTime('-5'); return false;" title="Subtract 5 Minutes">-</a>
+<span class="clock" id="timeLeft">00:00:00</span>
+<a href="#add-remaining" class="control" id="plus" onclick="opener.alterRemainingTime('5'); return false;" title="Add 5 Minutes">+</a>
+</p>
+<div class="controls">
+<form action="#" onsubmit="opener.resetRemainingTime(); return false;">
+<input type="text" class="text" id="startFrom" value="0" size="4" maxlength="4" />
+<a href="#toggle-remaining" onclick="opener.toggleRemainingTime(); return false;" title="Pause/Run Remaining Time">||</a>
+<a href="#reset-remaining" onclick="opener.resetRemainingTime(); return false;" title="Reset Remaining Time">|&larr;</a>
+</form>
+</div>
+</div>
+
+<h2 id="slide">...</h2>
+<div id="notes"></div>
+
+<h2 id="next">...</h2>
+<div id="nextnotes"></div>
+
+</body>
+</html>
--- a/vendor/plugins/HTML5lib/History.txt
+++ b/vendor/plugins/HTML5lib/History.txt
@ -0,0 +1,5 @@
+== 0.1.0 / 2007-08-07
+
+* 1 major enhancement
+  * Birthday!
+
--- a/vendor/plugins/HTML5lib/Manifest.txt
+++ b/vendor/plugins/HTML5lib/Manifest.txt
@ -0,0 +1,59 @@
+History.txt
+Manifest.txt
+README
+Rakefile.rb
+lib/html5.rb
+lib/html5/constants.rb
+lib/html5/filters/base.rb
+lib/html5/filters/inject_meta_charset.rb
+lib/html5/filters/optionaltags.rb
+lib/html5/filters/sanitizer.rb
+lib/html5/filters/whitespace.rb
+lib/html5/html5parser.rb
+lib/html5/html5parser/after_body_phase.rb
+lib/html5/html5parser/after_frameset_phase.rb
+lib/html5/html5parser/after_head_phase.rb
+lib/html5/html5parser/before_head_phase.rb
+lib/html5/html5parser/in_body_phase.rb
+lib/html5/html5parser/in_caption_phase.rb
+lib/html5/html5parser/in_cell_phase.rb
+lib/html5/html5parser/in_column_group_phase.rb
+lib/html5/html5parser/in_frameset_phase.rb
+lib/html5/html5parser/in_head_phase.rb
+lib/html5/html5parser/in_row_phase.rb
+lib/html5/html5parser/in_select_phase.rb
+lib/html5/html5parser/in_table_body_phase.rb
+lib/html5/html5parser/in_table_phase.rb
+lib/html5/html5parser/initial_phase.rb
+lib/html5/html5parser/phase.rb
+lib/html5/html5parser/root_element_phase.rb
+lib/html5/html5parser/trailing_end_phase.rb
+lib/html5/inputstream.rb
+lib/html5/liberalxmlparser.rb
+lib/html5/sanitizer.rb
+lib/html5/serializer.rb
+lib/html5/serializer/htmlserializer.rb
+lib/html5/serializer/xhtmlserializer.rb
+lib/html5/tokenizer.rb
+lib/html5/treebuilders.rb
+lib/html5/treebuilders/base.rb
+lib/html5/treebuilders/hpricot.rb
+lib/html5/treebuilders/rexml.rb
+lib/html5/treebuilders/simpletree.rb
+lib/html5/treewalkers.rb
+lib/html5/treewalkers/base.rb
+lib/html5/treewalkers/hpricot.rb
+lib/html5/treewalkers/rexml.rb
+lib/html5/treewalkers/simpletree.rb
+lib/html5/version.rb
+parse.rb
+tests/preamble.rb
+tests/test_encoding.rb
+tests/test_lxp.rb
+tests/test_parser.rb
+tests/test_sanitizer.rb
+tests/test_serializer.rb
+tests/test_stream.rb
+tests/test_tokenizer.rb
+tests/test_treewalkers.rb
+tests/tokenizer_test_parser.rb
--- a/vendor/plugins/HTML5lib/README
+++ b/vendor/plugins/HTML5lib/README
@ -1,9 +1,45 @@
-= HTML5lib
+html5
+    by Ryan King, et al
+    http://code.google.com/p/html5lib

-== Basic Usage
+== DESCRIPTION:

-    require 'html5lib'
+A ruby implementation of the parsing algorithm in HTML5.

-    doc = HTML5lib.parse('<html>...</html>')

-    doc.class # REXML::Document
+== FEATURES/PROBLEMS:
+
+
+
+== SYNOPSIS:
+
+  TODO
+
+== REQUIREMENTS:
+
+* chardet, only tested with 0.9.0
+
+== INSTALL:
+
+* sudo gem install html5
+
+== LICENSE:
+
+Copyright (c) 2006-2007 The Authors
+
+Contributers:
+James Graham - jg307@cam.ac.uk
+Anne van Kesteren - annevankesteren@gmail.com
+Lachlan Hunt - lachlan.hunt@lachy.id.au
+Matt McDonald - kanashii@kanashii.ca
+Sam Ruby - rubys@intertwingly.net
+Ian Hickson (Google) - ian@hixie.ch
+Thomas Broyer - t.broyer@ltgt.net
+Jacques Distler - distler@golem.ph.utexas.edu
+Ryan King - ryan@theryanking.com
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/vendor/plugins/HTML5lib/Rakefile.rb
+++ b/vendor/plugins/HTML5lib/Rakefile.rb
@ -1,7 +1,33 @@
 require 'rake'
-require 'rake/testtask'
+require 'hoe'
+require 'lib/html5/version'

-Rake::TestTask.new do |task|
-  task.pattern = 'tests/test_*.rb'
-  task.verbose = true
+Hoe.new("html5", HTML5::VERSION) do |p|
+  p.name = "html5"
+  p.description = p.paragraphs_of('README', 2..5).join("\n\n")
+  p.summary = "HTML5 parser/tokenizer."
+
+  p.author   = ['Ryan King'] # TODO: add more names
+  p.email    = 'ryan@theryanking.com'
+  p.url      = 'http://code.google.com/p/html5lib'
+  p.need_zip = true
+
+  p.extra_deps << ['chardet', '>= 0.9.0']
+  p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
 end
+
+require 'rcov/rcovtask'
+
+namespace :test do 
+  namespace :coverage do
+    desc "Delete aggregate coverage data."
+    task(:clean) { rm_f "coverage.data" }
+  end
+  desc 'Aggregate code coverage for unit, functional and integration tests'
+  Rcov::RcovTask.new(:coverage => "test:coverage:clean") do |t|
+    t.libs << "tests"
+    t.test_files = FileList["tests/test_*.rb"]
+    t.output_dir = "tests/coverage/"
+    t.verbose = true
+  end
+end
--- a/vendor/plugins/HTML5lib/bin/html5
+++ b/vendor/plugins/HTML5lib/bin/html5
@ -0,0 +1,215 @@
+#!/usr/bin/env ruby
+
+$:.unshift File.dirname(__FILE__), 'lib'
+
+def parse(opts, args)
+  encoding = nil
+
+  f = args[-1]
+  if f
+    begin
+      if f[0..6] == 'http://'
+        require 'open-uri'
+        f = URI.parse(f).open
+        encoding = f.charset
+      elsif f == '-'
+        f = $stdin
+      else
+        f = open(f)
+      end
+    rescue
+    end
+  else
+    $stderr.write("No filename provided. Use -h for help\n")
+    exit(1)
+  end
+
+  require 'html5/treebuilders'
+  treebuilder = HTML5::TreeBuilders[opts.treebuilder]
+
+  if opts.output == :xml
+    require 'html5/liberalxmlparser'
+    p = HTML5::XMLParser.new(:tree=>treebuilder)
+  else
+    require 'html5/html5parser'
+    p = HTML5::HTMLParser.new(:tree=>treebuilder)
+  end
+
+  if opts.parsemethod == :parse
+    args = [f, encoding]
+  else
+    args = [f, 'div', encoding]
+  end
+
+  if opts.profile
+    require 'profiler'
+    Profiler__::start_profile
+    p.send(opts.parsemethod, *args)
+    Profiler__::stop_profile
+    Profiler__::print_profile($stderr)
+  elsif opts.time
+    require 'time' # TODO: switch to benchmark
+    t0 = Time.new
+    document = p.send(opts.parsemethod, *args)
+    t1 = Time.new
+    print_output(p, document, opts)
+    t2 = Time.new
+    puts "\n\nRun took: %fs (plus %fs to print the output)"%[t1-t0, t2-t1]
+  else
+    document = p.send(opts.parsemethod, *args)
+    print_output(p, document, opts)
+  end
+end
+
+def print_output(parser, document, opts)
+  puts "Encoding: #{parser.tokenizer.stream.char_encoding}" if opts.encoding
+
+  case opts.output
+  when :xml
+    print document
+  when :html
+    require 'html5/treewalkers'
+    tokens = HTML5::TreeWalkers[opts.treebuilder].new(document)
+    require 'html5/serializer'
+    puts HTML5::HTMLSerializer.serialize(tokens, opts.serializer)
+  when :hilite
+    print document.hilite
+  when :tree
+    document = [document] unless document.respond_to?(:each)
+    document.each {|fragment| puts parser.tree.testSerializer(fragment)}
+  end
+
+  if opts.error
+    errList=[]
+    for pos, message in parser.errors
+        errList << ("Line %i Col %i"%pos + " " + message)
+    end
+    $stdout.write("\nParse errors:\n" + errList.join("\n")+"\n")
+  end
+end
+
+require 'ostruct'
+options = OpenStruct.new
+options.profile = false
+options.time = false
+options.output = :html
+options.treebuilder = 'simpletree'
+options.error = false
+options.encoding = false
+options.parsemethod = :parse
+options.serializer = {
+  :encoding => 'utf-8',
+  :omit_optional_tags => false,
+  :inject_meta_charset => false
+}
+
+require 'optparse'
+opts = OptionParser.new do |opts|
+  opts.separator ""
+  opts.separator "Parse Options:"
+
+  opts.on("-b", "--treebuilder NAME") do |treebuilder|
+    options.treebuilder = treebuilder
+  end
+
+  opts.on("-f", "--fragment", "Parse as a fragment") do |parse|
+    options.parsemethod = :parse_fragment
+  end
+
+  opts.separator ""
+  opts.separator "Filter Options:"
+
+  opts.on("--[no-]inject-meta-charset", "inject <meta charset>") do |inject|
+    options.serializer[:inject_meta_charset] = inject
+  end
+
+  opts.on("--[no-]strip-whitespace", "strip unnecessary whitespace") do |strip|
+    options.serializer[:strip_whitespace] = strip
+  end
+
+  opts.on("--[no-]sanitize", "escape unsafe tags") do |sanitize|
+    options.serializer[:sanitize] = sanitize
+  end
+
+  opts.separator ""
+  opts.separator "Output Options:"
+
+  opts.on("--tree", "output as debug tree") do |tree|
+    options.output = :tree
+  end
+  
+  opts.on("-x", "--xml", "output as xml") do |xml|
+    options.output = :xml
+    options.treebuilder = "rexml"
+  end
+  
+  opts.on("--[no-]html", "Output as html") do |html|
+    options.output = (html ? :html : nil)
+  end
+  
+  opts.on("--hilite", "Output as formatted highlighted code.") do |hilite|
+    options.output = :hilite
+  end
+  
+  opts.on("-e", "--error", "Print a list of parse errors") do |error|
+    options.error = error
+  end
+
+  opts.separator ""
+  opts.separator "Serialization Options:"
+
+  opts.on("--[no-]omit-optional-tags", "Omit optional tags") do |omit|
+    options.serializer[:omit_optional_tags] = omit
+  end
+
+  opts.on("--[no-]quote-attr-values", "Quote attribute values") do |quote|
+    options.serializer[:quote_attr_values] = quote
+  end
+
+  opts.on("--[no-]use-best-quote-char", "Use best quote character") do |best|
+    options.serializer[:use_best_quote_char] = best
+  end
+
+  opts.on("--quote-char C", "Use specified quote character") do |c|
+    options.serializer[:quote_char] = c
+  end
+
+  opts.on("--[no-]minimize-boolean-attributes", "Minimize boolean attributes") do |min|
+    options.serializer[:minimize_boolean_attributes] = min
+  end
+
+  opts.on("--[no-]use-trailing-solidus", "Use trailing solidus") do |slash|
+    options.serializer[:use_trailing_solidus] = slash
+  end
+
+  opts.on("--[no-]escape-lt-in-attrs", "Escape less than signs in attribute values") do |lt|
+    options.serializer[:escape_lt_in_attrs] = lt
+  end
+
+  opts.on("--[no-]escape-rcdata", "Escape rcdata element values") do |rcdata|
+    options.serializer[:escape_rcdata] = rcdata
+  end
+
+  opts.separator ""
+  opts.separator "Other Options:"
+
+  opts.on("-p", "--[no-]profile", "Profile the run") do |profile|
+    options.profile = profile
+  end
+    
+  opts.on("-t", "--[no-]time", "Time the run") do |time|
+    options.time = time
+  end
+    
+  opts.on("-c", "--[no-]encoding", "Print character encoding used") do |encoding|
+    options.encoding = encoding
+  end
+
+  opts.on_tail("-h", "--help", "Show this message") do
+    puts opts
+    exit
+  end
+end
+
+opts.parse!(ARGV)
+parse options, ARGV
--- a/vendor/plugins/HTML5lib/lib/html5.rb
+++ b/vendor/plugins/HTML5lib/lib/html5.rb
@ -0,0 +1,13 @@
+require 'html5/html5parser'
+require 'html5/version'
+
+module HTML5
+
+  def self.parse(stream, options={})
+    HTMLParser.parse(stream, options)
+  end
+
+  def self.parse_fragment(stream, options={})
+    HTMLParser.parse(stream, options)
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5/constants.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/constants.rb
@ -0,0 +1,818 @@
+module HTML5
+
+  class EOF < Exception; end
+
+  CONTENT_MODEL_FLAGS = [
+      :PCDATA,
+      :RCDATA,
+      :CDATA,
+      :PLAINTEXT
+  ]
+
+  SCOPING_ELEMENTS = %w[
+      button
+      caption
+      html
+      marquee
+      object
+      table
+      td
+      th
+  ]
+
+  FORMATTING_ELEMENTS = %w[
+      a
+      b
+      big
+      em
+      font
+      i
+      nobr
+      s
+      small
+      strike
+      strong
+      tt
+      u
+  ]
+
+  SPECIAL_ELEMENTS = %w[
+      address
+      area
+      base
+      basefont
+      bgsound
+      blockquote
+      body
+      br
+      center
+      col
+      colgroup
+      dd
+      dir
+      div
+      dl
+      dt
+      embed
+      fieldset
+      form
+      frame
+      frameset
+      h1
+      h2
+      h3
+      h4
+      h5
+      h6
+      head
+      hr
+      iframe
+      image
+      img
+      input
+      isindex
+      li
+      link
+      listing
+      menu
+      meta
+      noembed
+      noframes
+      noscript
+      ol
+      optgroup
+      option
+      p
+      param
+      plaintext
+      pre
+      script
+      select
+      spacer
+      style
+      tbody
+      textarea
+      tfoot
+      thead
+      title
+      tr
+      ul
+      wbr
+  ]
+
+  SPACE_CHARACTERS = %W[
+      \t
+      \n
+      \x0B
+      \x0C
+      \x20
+      \r
+  ]
+
+  TABLE_INSERT_MODE_ELEMENTS = %w[
+      table
+      tbody
+      tfoot
+      thead
+      tr
+  ]
+
+  ASCII_LOWERCASE = ('a'..'z').to_a.join('')
+  ASCII_UPPERCASE = ('A'..'Z').to_a.join('')
+  ASCII_LETTERS = ASCII_LOWERCASE + ASCII_UPPERCASE
+  DIGITS = '0'..'9'
+  HEX_DIGITS = DIGITS.to_a + ('a'..'f').to_a + ('A'..'F').to_a
+
+  # Heading elements need to be ordered 
+  HEADING_ELEMENTS = %w[
+      h1
+      h2
+      h3
+      h4
+      h5
+      h6
+  ]
+
+  # XXX What about event-source and command?
+  VOID_ELEMENTS = %w[
+      base
+      link
+      meta
+      hr
+      br
+      img
+      embed
+      param
+      area
+      col
+      input
+  ]
+
+  CDATA_ELEMENTS = %w[title textarea]
+
+  RCDATA_ELEMENTS = %w[
+    style
+    script
+    xmp
+    iframe
+    noembed
+    noframes
+    noscript
+  ]
+
+  BOOLEAN_ATTRIBUTES = {
+    :global    => %w[irrelevant],
+    'style'    => %w[scoped],
+    'img'      => %w[ismap],
+    'audio'    => %w[autoplay controls],
+    'video'    => %w[autoplay controls],
+    'script'   => %w[defer async],
+    'details'  => %w[open],
+    'datagrid' => %w[multiple disabled],
+    'command'  => %w[hidden disabled checked default],
+    'menu'     => %w[autosubmit],
+    'fieldset' => %w[disabled readonly],
+    'option'   => %w[disabled readonly selected],
+    'optgroup' => %w[disabled readonly],
+    'button'   => %w[disabled autofocus],
+    'input'    => %w[disabled readonly required autofocus checked ismap],
+    'select'   => %w[disabled readonly autofocus multiple],
+    'output'   => %w[disabled readonly]
+
+  }
+
+  # entitiesWindows1252 has to be _ordered_ and needs to have an index.
+  ENTITIES_WINDOWS1252 = [
+      8364,  # 0x80  0x20AC  EURO SIGN
+      65533, # 0x81          UNDEFINED
+      8218,  # 0x82  0x201A  SINGLE LOW-9 QUOTATION MARK
+      402,   # 0x83  0x0192  LATIN SMALL LETTER F WITH HOOK
+      8222,  # 0x84  0x201E  DOUBLE LOW-9 QUOTATION MARK
+      8230,  # 0x85  0x2026  HORIZONTAL ELLIPSIS
+      8224,  # 0x86  0x2020  DAGGER
+      8225,  # 0x87  0x2021  DOUBLE DAGGER
+      710,   # 0x88  0x02C6  MODIFIER LETTER CIRCUMFLEX ACCENT
+      8240,  # 0x89  0x2030  PER MILLE SIGN
+      352,   # 0x8A  0x0160  LATIN CAPITAL LETTER S WITH CARON
+      8249,  # 0x8B  0x2039  SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+      338,   # 0x8C  0x0152  LATIN CAPITAL LIGATURE OE
+      65533, # 0x8D          UNDEFINED
+      381,   # 0x8E  0x017D  LATIN CAPITAL LETTER Z WITH CARON
+      65533, # 0x8F          UNDEFINED
+      65533, # 0x90          UNDEFINED
+      8216,  # 0x91  0x2018  LEFT SINGLE QUOTATION MARK
+      8217,  # 0x92  0x2019  RIGHT SINGLE QUOTATION MARK
+      8220,  # 0x93  0x201C  LEFT DOUBLE QUOTATION MARK
+      8221,  # 0x94  0x201D  RIGHT DOUBLE QUOTATION MARK
+      8226,  # 0x95  0x2022  BULLET
+      8211,  # 0x96  0x2013  EN DASH
+      8212,  # 0x97  0x2014  EM DASH
+      732,   # 0x98  0x02DC  SMALL TILDE
+      8482,  # 0x99  0x2122  TRADE MARK SIGN
+      353,   # 0x9A  0x0161  LATIN SMALL LETTER S WITH CARON
+      8250,  # 0x9B  0x203A  SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+      339,   # 0x9C  0x0153  LATIN SMALL LIGATURE OE
+      65533, # 0x9D          UNDEFINED
+      382,   # 0x9E  0x017E  LATIN SMALL LETTER Z WITH CARON
+      376    # 0x9F  0x0178  LATIN CAPITAL LETTER Y WITH DIAERESIS
+  ]
+
+  # ENTITIES was generated from Python using the following code:
+  #
+  # import constants
+  # entities = constants.entities.items()
+  # entities.sort()
+  # list = [ ' '.join([repr(entity), '=>', ord(value)<128 and 
+  #   repr(str(value)) or repr(value.encode('utf-8')).replace("'",'"')])
+  #   for entity, value in entities]
+  #   print '  ENTITIES = {\n    ' + ',\n    '.join(list) + '\n  }'
+
+  ENTITIES = {
+    'AElig'     => "\xc3\x86",
+    'AElig;'    => "\xc3\x86",
+    'AMP'       => '&',
+    'AMP;'      => '&',
+    'Aacute'    => "\xc3\x81",
+    'Aacute;'   => "\xc3\x81",
+    'Acirc'     => "\xc3\x82",
+    'Acirc;'    => "\xc3\x82",
+    'Agrave'    => "\xc3\x80",
+    'Agrave;'   => "\xc3\x80",
+    'Alpha;'    => "\xce\x91",
+    'Aring'     => "\xc3\x85",
+    'Aring;'    => "\xc3\x85",
+    'Atilde'    => "\xc3\x83",
+    'Atilde;'   => "\xc3\x83",
+    'Auml'      => "\xc3\x84",
+    'Auml;'     => "\xc3\x84",
+    'Beta;'     => "\xce\x92",
+    'COPY'      => "\xc2\xa9",
+    'COPY;'     => "\xc2\xa9",
+    'Ccedil'    => "\xc3\x87",
+    'Ccedil;'   => "\xc3\x87",
+    'Chi;'      => "\xce\xa7",
+    'Dagger;'   => "\xe2\x80\xa1",
+    'Delta;'    => "\xce\x94",
+    'ETH'       => "\xc3\x90",
+    'ETH;'      => "\xc3\x90",
+    'Eacute'    => "\xc3\x89",
+    'Eacute;'   => "\xc3\x89",
+    'Ecirc'     => "\xc3\x8a",
+    'Ecirc;'    => "\xc3\x8a",
+    'Egrave'    => "\xc3\x88",
+    'Egrave;'   => "\xc3\x88",
+    'Epsilon;'  => "\xce\x95",
+    'Eta;'      => "\xce\x97",
+    'Euml'      => "\xc3\x8b",
+    'Euml;'     => "\xc3\x8b",
+    'GT'        => '>',
+    'GT;'       => '>',
+    'Gamma;'    => "\xce\x93",
+    'Iacute'    => "\xc3\x8d",
+    'Iacute;'   => "\xc3\x8d",
+    'Icirc'     => "\xc3\x8e",
+    'Icirc;'    => "\xc3\x8e",
+    'Igrave'    => "\xc3\x8c",
+    'Igrave;'   => "\xc3\x8c",
+    'Iota;'     => "\xce\x99",
+    'Iuml'      => "\xc3\x8f",
+    'Iuml;'     => "\xc3\x8f",
+    'Kappa;'    => "\xce\x9a",
+    'LT'        => '<',
+    'LT;'       => '<',
+    'Lambda;'   => "\xce\x9b",
+    'Mu;'       => "\xce\x9c",
+    'Ntilde'    => "\xc3\x91",
+    'Ntilde;'   => "\xc3\x91",
+    'Nu;'       => "\xce\x9d",
+    'OElig;'    => "\xc5\x92",
+    'Oacute'    => "\xc3\x93",
+    'Oacute;'   => "\xc3\x93",
+    'Ocirc'     => "\xc3\x94",
+    'Ocirc;'    => "\xc3\x94",
+    'Ograve'    => "\xc3\x92",
+    'Ograve;'   => "\xc3\x92",
+    'Omega;'    => "\xce\xa9",
+    'Omicron;'  => "\xce\x9f",
+    'Oslash'    => "\xc3\x98",
+    'Oslash;'   => "\xc3\x98",
+    'Otilde'    => "\xc3\x95",
+    'Otilde;'   => "\xc3\x95",
+    'Ouml'      => "\xc3\x96",
+    'Ouml;'     => "\xc3\x96",
+    'Phi;'      => "\xce\xa6",
+    'Pi;'       => "\xce\xa0",
+    'Prime;'    => "\xe2\x80\xb3",
+    'Psi;'      => "\xce\xa8",
+    'QUOT'      => '"',
+    'QUOT;'     => '"',
+    'REG'       => "\xc2\xae",
+    'REG;'      => "\xc2\xae",
+    'Rho;'      => "\xce\xa1",
+    'Scaron;'   => "\xc5\xa0",
+    'Sigma;'    => "\xce\xa3",
+    'THORN'     => "\xc3\x9e",
+    'THORN;'    => "\xc3\x9e",
+    'TRADE;'    => "\xe2\x84\xa2",
+    'Tau;'      => "\xce\xa4",
+    'Theta;'    => "\xce\x98",
+    'Uacute'    => "\xc3\x9a",
+    'Uacute;'   => "\xc3\x9a",
+    'Ucirc'     => "\xc3\x9b",
+    'Ucirc;'    => "\xc3\x9b",
+    'Ugrave'    => "\xc3\x99",
+    'Ugrave;'   => "\xc3\x99",
+    'Upsilon;'  => "\xce\xa5",
+    'Uuml'      => "\xc3\x9c",
+    'Uuml;'     => "\xc3\x9c",
+    'Xi;'       => "\xce\x9e",
+    'Yacute'    => "\xc3\x9d",
+    'Yacute;'   => "\xc3\x9d",
+    'Yuml;'     => "\xc5\xb8",
+    'Zeta;'     => "\xce\x96",
+    'aacute'    => "\xc3\xa1",
+    'aacute;'   => "\xc3\xa1",
+    'acirc'     => "\xc3\xa2",
+    'acirc;'    => "\xc3\xa2",
+    'acute'     => "\xc2\xb4",
+    'acute;'    => "\xc2\xb4",
+    'aelig'     => "\xc3\xa6",
+    'aelig;'    => "\xc3\xa6",
+    'agrave'    => "\xc3\xa0",
+    'agrave;'   => "\xc3\xa0",
+    'alefsym;'  => "\xe2\x84\xb5",
+    'alpha;'    => "\xce\xb1",
+    'amp'       => '&',
+    'amp;'      => '&',
+    'and;'      => "\xe2\x88\xa7",
+    'ang;'      => "\xe2\x88\xa0",
+    'apos;'     => "'",
+    'aring'     => "\xc3\xa5",
+    'aring;'    => "\xc3\xa5",
+    'asymp;'    => "\xe2\x89\x88",
+    'atilde'    => "\xc3\xa3",
+    'atilde;'   => "\xc3\xa3",
+    'auml'      => "\xc3\xa4",
+    'auml;'     => "\xc3\xa4",
+    'bdquo;'    => "\xe2\x80\x9e",
+    'beta;'     => "\xce\xb2",
+    'brvbar'    => "\xc2\xa6",
+    'brvbar;'   => "\xc2\xa6",
+    'bull;'     => "\xe2\x80\xa2",
+    'cap;'      => "\xe2\x88\xa9",
+    'ccedil'    => "\xc3\xa7",
+    'ccedil;'   => "\xc3\xa7",
+    'cedil'     => "\xc2\xb8",
+    'cedil;'    => "\xc2\xb8",
+    'cent'      => "\xc2\xa2",
+    'cent;'     => "\xc2\xa2",
+    'chi;'      => "\xcf\x87",
+    'circ;'     => "\xcb\x86",
+    'clubs;'    => "\xe2\x99\xa3",
+    'cong;'     => "\xe2\x89\x85",
+    'copy'      => "\xc2\xa9",
+    'copy;'     => "\xc2\xa9",
+    'crarr;'    => "\xe2\x86\xb5",
+    'cup;'      => "\xe2\x88\xaa",
+    'curren'    => "\xc2\xa4",
+    'curren;'   => "\xc2\xa4",
+    'dArr;'     => "\xe2\x87\x93",
+    'dagger;'   => "\xe2\x80\xa0",
+    'darr;'     => "\xe2\x86\x93",
+    'deg'       => "\xc2\xb0",
+    'deg;'      => "\xc2\xb0",
+    'delta;'    => "\xce\xb4",
+    'diams;'    => "\xe2\x99\xa6",
+    'divide'    => "\xc3\xb7",
+    'divide;'   => "\xc3\xb7",
+    'eacute'    => "\xc3\xa9",
+    'eacute;'   => "\xc3\xa9",
+    'ecirc'     => "\xc3\xaa",
+    'ecirc;'    => "\xc3\xaa",
+    'egrave'    => "\xc3\xa8",
+    'egrave;'   => "\xc3\xa8",
+    'empty;'    => "\xe2\x88\x85",
+    'emsp;'     => "\xe2\x80\x83",
+    'ensp;'     => "\xe2\x80\x82",
+    'epsilon;'  => "\xce\xb5",
+    'equiv;'    => "\xe2\x89\xa1",
+    'eta;'      => "\xce\xb7",
+    'eth'       => "\xc3\xb0",
+    'eth;'      => "\xc3\xb0",
+    'euml'      => "\xc3\xab",
+    'euml;'     => "\xc3\xab",
+    'euro;'     => "\xe2\x82\xac",
+    'exist;'    => "\xe2\x88\x83",
+    'fnof;'     => "\xc6\x92",
+    'forall;'   => "\xe2\x88\x80",
+    'frac12'    => "\xc2\xbd",
+    'frac12;'   => "\xc2\xbd",
+    'frac14'    => "\xc2\xbc",
+    'frac14;'   => "\xc2\xbc",
+    'frac34'    => "\xc2\xbe",
+    'frac34;'   => "\xc2\xbe",
+    'frasl;'    => "\xe2\x81\x84",
+    'gamma;'    => "\xce\xb3",
+    'ge;'       => "\xe2\x89\xa5",
+    'gt'        => '>',
+    'gt;'       => '>',
+    'hArr;'     => "\xe2\x87\x94",
+    'harr;'     => "\xe2\x86\x94",
+    'hearts;'   => "\xe2\x99\xa5",
+    'hellip;'   => "\xe2\x80\xa6",
+    'iacute'    => "\xc3\xad",
+    'iacute;'   => "\xc3\xad",
+    'icirc'     => "\xc3\xae",
+    'icirc;'    => "\xc3\xae",
+    'iexcl'     => "\xc2\xa1",
+    'iexcl;'    => "\xc2\xa1",
+    'igrave'    => "\xc3\xac",
+    'igrave;'   => "\xc3\xac",
+    'image;'    => "\xe2\x84\x91",
+    'infin;'    => "\xe2\x88\x9e",
+    'int;'      => "\xe2\x88\xab",
+    'iota;'     => "\xce\xb9",
+    'iquest'    => "\xc2\xbf",
+    'iquest;'   => "\xc2\xbf",
+    'isin;'     => "\xe2\x88\x88",
+    'iuml'      => "\xc3\xaf",
+    'iuml;'     => "\xc3\xaf",
+    'kappa;'    => "\xce\xba",
+    'lArr;'     => "\xe2\x87\x90",
+    'lambda;'   => "\xce\xbb",
+    'lang;'     => "\xe3\x80\x88",
+    'laquo'     => "\xc2\xab",
+    'laquo;'    => "\xc2\xab",
+    'larr;'     => "\xe2\x86\x90",
+    'lceil;'    => "\xe2\x8c\x88",
+    'ldquo;'    => "\xe2\x80\x9c",
+    'le;'       => "\xe2\x89\xa4",
+    'lfloor;'   => "\xe2\x8c\x8a",
+    'lowast;'   => "\xe2\x88\x97",
+    'loz;'      => "\xe2\x97\x8a",
+    'lrm;'      => "\xe2\x80\x8e",
+    'lsaquo;'   => "\xe2\x80\xb9",
+    'lsquo;'    => "\xe2\x80\x98",
+    'lt'        => '<',
+    'lt;'       => '<',
+    'macr'      => "\xc2\xaf",
+    'macr;'     => "\xc2\xaf",
+    'mdash;'    => "\xe2\x80\x94",
+    'micro'     => "\xc2\xb5",
+    'micro;'    => "\xc2\xb5",
+    'middot'    => "\xc2\xb7",
+    'middot;'   => "\xc2\xb7",
+    'minus;'    => "\xe2\x88\x92",
+    'mu;'       => "\xce\xbc",
+    'nabla;'    => "\xe2\x88\x87",
+    'nbsp'      => "\xc2\xa0",
+    'nbsp;'     => "\xc2\xa0",
+    'ndash;'    => "\xe2\x80\x93",
+    'ne;'       => "\xe2\x89\xa0",
+    'ni;'       => "\xe2\x88\x8b",
+    'not'       => "\xc2\xac",
+    'not;'      => "\xc2\xac",
+    'notin;'    => "\xe2\x88\x89",
+    'nsub;'     => "\xe2\x8a\x84",
+    'ntilde'    => "\xc3\xb1",
+    'ntilde;'   => "\xc3\xb1",
+    'nu;'       => "\xce\xbd",
+    'oacute'    => "\xc3\xb3",
+    'oacute;'   => "\xc3\xb3",
+    'ocirc'     => "\xc3\xb4",
+    'ocirc;'    => "\xc3\xb4",
+    'oelig;'    => "\xc5\x93",
+    'ograve'    => "\xc3\xb2",
+    'ograve;'   => "\xc3\xb2",
+    'oline;'    => "\xe2\x80\xbe",
+    'omega;'    => "\xcf\x89",
+    'omicron;'  => "\xce\xbf",
+    'oplus;'    => "\xe2\x8a\x95",
+    'or;'       => "\xe2\x88\xa8",
+    'ordf'      => "\xc2\xaa",
+    'ordf;'     => "\xc2\xaa",
+    'ordm'      => "\xc2\xba",
+    'ordm;'     => "\xc2\xba",
+    'oslash'    => "\xc3\xb8",
+    'oslash;'   => "\xc3\xb8",
+    'otilde'    => "\xc3\xb5",
+    'otilde;'   => "\xc3\xb5",
+    'otimes;'   => "\xe2\x8a\x97",
+    'ouml'      => "\xc3\xb6",
+    'ouml;'     => "\xc3\xb6",
+    'para'      => "\xc2\xb6",
+    'para;'     => "\xc2\xb6",
+    'part;'     => "\xe2\x88\x82",
+    'permil;'   => "\xe2\x80\xb0",
+    'perp;'     => "\xe2\x8a\xa5",
+    'phi;'      => "\xcf\x86",
+    'pi;'       => "\xcf\x80",
+    'piv;'      => "\xcf\x96",
+    'plusmn'    => "\xc2\xb1",
+    'plusmn;'   => "\xc2\xb1",
+    'pound'     => "\xc2\xa3",
+    'pound;'    => "\xc2\xa3",
+    'prime;'    => "\xe2\x80\xb2",
+    'prod;'     => "\xe2\x88\x8f",
+    'prop;'     => "\xe2\x88\x9d",
+    'psi;'      => "\xcf\x88",
+    'quot'      => '"',
+    'quot;'     => '"',
+    'rArr;'     => "\xe2\x87\x92",
+    'radic;'    => "\xe2\x88\x9a",
+    'rang;'     => "\xe3\x80\x89",
+    'raquo'     => "\xc2\xbb",
+    'raquo;'    => "\xc2\xbb",
+    'rarr;'     => "\xe2\x86\x92",
+    'rceil;'    => "\xe2\x8c\x89",
+    'rdquo;'    => "\xe2\x80\x9d",
+    'real;'     => "\xe2\x84\x9c",
+    'reg'       => "\xc2\xae",
+    'reg;'      => "\xc2\xae",
+    'rfloor;'   => "\xe2\x8c\x8b",
+    'rho;'      => "\xcf\x81",
+    'rlm;'      => "\xe2\x80\x8f",
+    'rsaquo;'   => "\xe2\x80\xba",
+    'rsquo;'    => "\xe2\x80\x99",
+    'sbquo;'    => "\xe2\x80\x9a",
+    'scaron;'   => "\xc5\xa1",
+    'sdot;'     => "\xe2\x8b\x85",
+    'sect'      => "\xc2\xa7",
+    'sect;'     => "\xc2\xa7",
+    'shy'       => "\xc2\xad",
+    'shy;'      => "\xc2\xad",
+    'sigma;'    => "\xcf\x83",
+    'sigmaf;'   => "\xcf\x82",
+    'sim;'      => "\xe2\x88\xbc",
+    'spades;'   => "\xe2\x99\xa0",
+    'sub;'      => "\xe2\x8a\x82",
+    'sube;'     => "\xe2\x8a\x86",
+    'sum;'      => "\xe2\x88\x91",
+    'sup1'      => "\xc2\xb9",
+    'sup1;'     => "\xc2\xb9",
+    'sup2'      => "\xc2\xb2",
+    'sup2;'     => "\xc2\xb2",
+    'sup3'      => "\xc2\xb3",
+    'sup3;'     => "\xc2\xb3",
+    'sup;'      => "\xe2\x8a\x83",
+    'supe;'     => "\xe2\x8a\x87",
+    'szlig'     => "\xc3\x9f",
+    'szlig;'    => "\xc3\x9f",
+    'tau;'      => "\xcf\x84",
+    'there4;'   => "\xe2\x88\xb4",
+    'theta;'    => "\xce\xb8",
+    'thetasym;' => "\xcf\x91",
+    'thinsp;'   => "\xe2\x80\x89",
+    'thorn'     => "\xc3\xbe",
+    'thorn;'    => "\xc3\xbe",
+    'tilde;'    => "\xcb\x9c",
+    'times'     => "\xc3\x97",
+    'times;'    => "\xc3\x97",
+    'trade;'    => "\xe2\x84\xa2",
+    'uArr;'     => "\xe2\x87\x91",
+    'uacute'    => "\xc3\xba",
+    'uacute;'   => "\xc3\xba",
+    'uarr;'     => "\xe2\x86\x91",
+    'ucirc'     => "\xc3\xbb",
+    'ucirc;'    => "\xc3\xbb",
+    'ugrave'    => "\xc3\xb9",
+    'ugrave;'   => "\xc3\xb9",
+    'uml'       => "\xc2\xa8",
+    'uml;'      => "\xc2\xa8",
+    'upsih;'    => "\xcf\x92",
+    'upsilon;'  => "\xcf\x85",
+    'uuml'      => "\xc3\xbc",
+    'uuml;'     => "\xc3\xbc",
+    'weierp;'   => "\xe2\x84\x98",
+    'xi;'       => "\xce\xbe",
+    'yacute'    => "\xc3\xbd",
+    'yacute;'   => "\xc3\xbd",
+    'yen'       => "\xc2\xa5",
+    'yen;'      => "\xc2\xa5",
+    'yuml'      => "\xc3\xbf",
+    'yuml;'     => "\xc3\xbf",
+    'zeta;'     => "\xce\xb6",
+    'zwj;'      => "\xe2\x80\x8d",
+    'zwnj;'     => "\xe2\x80\x8c"
+  }
+
+  ENCODINGS = %w[
+      ansi_x3.4-1968
+      iso-ir-6
+      ansi_x3.4-1986
+      iso_646.irv:1991
+      ascii
+      iso646-us
+      us-ascii
+      us
+      ibm367
+      cp367
+      csascii
+      ks_c_5601-1987
+      korean
+      iso-2022-kr
+      csiso2022kr
+      euc-kr
+      iso-2022-jp
+      csiso2022jp
+      iso-2022-jp-2
+      iso-ir-58
+      chinese
+      csiso58gb231280
+      iso_8859-1:1987
+      iso-ir-100
+      iso_8859-1
+      iso-8859-1
+      latin1
+      l1
+      ibm819
+      cp819
+      csisolatin1
+      iso_8859-2:1987
+      iso-ir-101
+      iso_8859-2
+      iso-8859-2
+      latin2
+      l2
+      csisolatin2
+      iso_8859-3:1988
+      iso-ir-109
+      iso_8859-3
+      iso-8859-3
+      latin3
+      l3
+      csisolatin3
+      iso_8859-4:1988
+      iso-ir-110
+      iso_8859-4
+      iso-8859-4
+      latin4
+      l4
+      csisolatin4
+      iso_8859-6:1987
+      iso-ir-127
+      iso_8859-6
+      iso-8859-6
+      ecma-114
+      asmo-708
+      arabic
+      csisolatinarabic
+      iso_8859-7:1987
+      iso-ir-126
+      iso_8859-7
+      iso-8859-7
+      elot_928
+      ecma-118
+      greek
+      greek8
+      csisolatingreek
+      iso_8859-8:1988
+      iso-ir-138
+      iso_8859-8
+      iso-8859-8
+      hebrew
+      csisolatinhebrew
+      iso_8859-5:1988
+      iso-ir-144
+      iso_8859-5
+      iso-8859-5
+      cyrillic
+      csisolatincyrillic
+      iso_8859-9:1989
+      iso-ir-148
+      iso_8859-9
+      iso-8859-9
+      latin5
+      l5
+      csisolatin5
+      iso-8859-10
+      iso-ir-157
+      l6
+      iso_8859-10:1992
+      csisolatin6
+      latin6
+      hp-roman8
+      roman8
+      r8
+      ibm037
+      cp037
+      csibm037
+      ibm424
+      cp424
+      csibm424
+      ibm437
+      cp437
+      437
+      cspc8codepage437
+      ibm500
+      cp500
+      csibm500
+      ibm775
+      cp775
+      cspc775baltic
+      ibm850
+      cp850
+      850
+      cspc850multilingual
+      ibm852
+      cp852
+      852
+      cspcp852
+      ibm855
+      cp855
+      855
+      csibm855
+      ibm857
+      cp857
+      857
+      csibm857
+      ibm860
+      cp860
+      860
+      csibm860
+      ibm861
+      cp861
+      861
+      cp-is
+      csibm861
+      ibm862
+      cp862
+      862
+      cspc862latinhebrew
+      ibm863
+      cp863
+      863
+      csibm863
+      ibm864
+      cp864
+      csibm864
+      ibm865
+      cp865
+      865
+      csibm865
+      ibm866
+      cp866
+      866
+      csibm866
+      ibm869
+      cp869
+      869
+      cp-gr
+      csibm869
+      ibm1026
+      cp1026
+      csibm1026
+      koi8-r
+      cskoi8r
+      koi8-u
+      big5-hkscs
+      ptcp154
+      csptcp154
+      pt154
+      cp154
+      utf-7
+      utf-16be
+      utf-16le
+      utf-16
+      utf-8
+      iso-8859-13
+      iso-8859-14
+      iso-ir-199
+      iso_8859-14:1998
+      iso_8859-14
+      latin8
+      iso-celtic
+      l8
+      iso-8859-15
+      iso_8859-15
+      iso-8859-16
+      iso-ir-226
+      iso_8859-16:2001
+      iso_8859-16
+      latin10
+      l10
+      gbk
+      cp936
+      ms936
+      gb18030
+      shift_jis
+      ms_kanji
+      csshiftjis
+      euc-jp
+      gb2312
+      big5
+      csbig5
+      windows-1250
+      windows-1251
+      windows-1252
+      windows-1253
+      windows-1254
+      windows-1255
+      windows-1256
+      windows-1257
+      windows-1258
+      tis-620
+      hz-gb-2312
+  ]
+
+end
--- a/vendor/plugins/HTML5lib/lib/html5/filters.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/filters.rb
@ -0,0 +1 @@
+require 'html5/filters/optionaltags'
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters/base.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/filters/base.rb
@ -1,7 +1,7 @@
 require 'delegate'
 require 'enumerator'

-module HTML5lib
+module HTML5
  module Filters
    class Base < SimpleDelegator
      include Enumerable
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters/inject_meta_charset.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/filters/inject_meta_charset.rb
@ -1,6 +1,6 @@
-require 'html5lib/filters/base'
+require 'html5/filters/base'

-module HTML5lib
+module HTML5
  module Filters
    class InjectMetaCharset < Base
      def initialize(source, encoding)
@ -21,9 +21,9 @@ module HTML5lib
          when :EmptyTag
            if token[:name].downcase == "meta"
              # replace charset with actual encoding
-              token[:data].each_with_index do |(name,value),index|
+              token[:data].each_with_index do |(name, value), index|
                if name == 'charset'
-                  token[:data][index][1]=@encoding
+                  token[:data][index][1] = @encoding
                  meta_found = true
                end
              end
@ -31,7 +31,7 @@ module HTML5lib
              # replace charset with actual encoding
              has_http_equiv_content_type = false
              content_index = -1
-              token[:data].each_with_index do |(name,value),i|
+              token[:data].each_with_index do |(name, value), i|
                if name.downcase == 'charset'
                  token[:data][i] = ['charset', @encoding]
                  meta_found = true
@ -43,30 +43,27 @@ module HTML5lib
                end
              end

-              if not meta_found
-                if has_http_equiv_content_type and content_index >= 0
-                  token[:data][content_index][1] =
-                    'text/html; charset=%s' % @encoding
+              if !meta_found
+                if has_http_equiv_content_type && content_index >= 0
+                  token[:data][content_index][1] = 'text/html; charset=%s' % @encoding
                  meta_found = true
                end
              end

-            elsif token[:name].downcase == "head" and not meta_found
+            elsif token[:name].downcase == "head" && !meta_found
              # insert meta into empty head
-              yield(:type => :StartTag, :name => "head", :data => token[:data])
-              yield(:type => :EmptyTag, :name => "meta",
-                    :data => [["charset", @encoding]])
-              yield(:type => :EndTag, :name => "head")
+              yield :type => :StartTag, :name => "head", :data => token[:data]
+              yield :type => :EmptyTag, :name => "meta", :data => [["charset", @encoding]]
+              yield :type => :EndTag,   :name => "head"
              meta_found = true
              next
            end

          when :EndTag
-            if token[:name].downcase == "head" and pending.any?
+            if token[:name].downcase == "head" && pending.any?
              # insert meta into head (if necessary) and flush pending queue
              yield pending.shift
-              yield(:type => :EmptyTag, :name => "meta",
-                    :data => [["charset", @encoding]]) if not meta_found
+              yield :type => :EmptyTag, :name => "meta", :data => [["charset", @encoding]] if !meta_found
              yield pending.shift while pending.any?
              meta_found = true
              state = :post_head
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters/optionaltags.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/filters/optionaltags.rb
@ -1,7 +1,7 @@
-require 'html5lib/constants'
-require 'html5lib/filters/base'
+require 'html5/constants'
+require 'html5/filters/base'

-module HTML5lib
+module HTML5
  module Filters

    class OptionalTagFilter < Base
@ -75,8 +75,7 @@ module HTML5lib
          if type == :StartTag
            # omit the thead and tfoot elements' end tag when they are
            # immediately followed by a tbody element. See is_optional_end.
-            if previous and previous[:type] == :EndTag and \
-              %w(tbody thead tfoot).include?(previous[:name])
+            if previous and previous[:type] == :EndTag && %w(tbody thead tfoot).include?(previous[:name])
              return false
            end

@ -85,7 +84,7 @@ module HTML5lib
            return false
          end
        end
-      return false
+        return false
      end

      def is_optional_end(tagname, nexttok)
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters/sanitizer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/filters/sanitizer.rb
@ -1,7 +1,7 @@
-require 'html5lib/filters/base'
-require 'html5lib/sanitizer'
+require 'html5/filters/base'
+require 'html5/sanitizer'

-module HTML5lib
+module HTML5
  module Filters
    class HTMLSanitizeFilter < Base
      include HTMLSanitizeModule
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters/whitespace.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/filters/whitespace.rb
@ -1,7 +1,7 @@
-require 'html5lib/constants'
-require 'html5lib/filters/base'
+require 'html5/constants'
+require 'html5/filters/base'

-module HTML5lib
+module HTML5
  module Filters
    class WhitespaceFilter < Base

@ -21,7 +21,7 @@ module HTML5lib
            preserve -= 1 if preserve > 0

          when :SpaceCharacters
-            next if preserve == 0
+            token[:data] = " " if preserve == 0 && token[:data]

          when :Characters
            token[:data] = token[:data].sub(SPACES,' ') if preserve == 0
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser.rb
@ -1,246 +1,248 @@
-require 'html5lib/constants'
-require 'html5lib/tokenizer'
-require 'html5lib/treebuilders/rexml'
-
-Dir.glob(File.join(File.dirname(__FILE__), 'html5parser', '*_phase.rb')).each do |path|
-  require 'html5lib/html5parser/' + File.basename(path)
-end
-
-module HTML5lib
-
-  # Error in parsed document
-  class ParseError < Exception; end
-  class AssertionError < Exception; end
-
-  # HTML parser. Generates a tree structure from a stream of (possibly malformed) HTML
-  #
-  class HTMLParser
-
-    attr_accessor :phase, :firstStartTag, :innerHTML, :lastPhase, :insertFromTable
-
-    attr_reader :phases, :tokenizer, :tree, :errors
-
-    def self.parse(stream, options = {})
-      encoding = options.delete(:encoding)
-      new(options).parse(stream,encoding)
-    end
-
-    def self.parseFragment(stream, options = {})
-      container = options.delete(:container) || 'div'
-      encoding = options.delete(:encoding)
-      new(options).parseFragment(stream,container,encoding)
-    end
-
-    @@phases = %w( initial rootElement beforeHead inHead afterHead inBody inTable inCaption
-      inColumnGroup inTableBody inRow inCell inSelect afterBody inFrameset afterFrameset trailingEnd )
-
-    # :strict - raise an exception when a parse error is encountered
-    # :tree - a treebuilder class controlling the type of tree that will be
-    # returned. Built in treebuilders can be accessed through
-    # HTML5lib::TreeBuilders[treeType]
-    def initialize(options = {})
-      @strict = false
-      @errors = []
-     
-      @tokenizer =  HTMLTokenizer
-      @tree = TreeBuilders::REXML::TreeBuilder
- 
-      options.each { |name, value| instance_variable_set("@#{name}", value) }
-
-      @tree = @tree.new
-
-      @phases = @@phases.inject({}) do |phases, phase_name|
-        phase_class_name = phase_name.sub(/(.)/) { $1.upcase } + 'Phase'
-        phases[phase_name.to_sym] = HTML5lib.const_get(phase_class_name).new(self, @tree)
-        phases 
-      end
-    end
-
-    def _parse(stream, innerHTML, encoding, container = 'div')
-      @tree.reset
-      @firstStartTag = false
-      @errors = []
-
-      @tokenizer = @tokenizer.class unless Class === @tokenizer
-      @tokenizer = @tokenizer.new(stream, :encoding => encoding,
-        :parseMeta => !innerHTML)
-
-      if innerHTML
-        case @innerHTML = container.downcase
-          when 'title', 'textarea'
-            @tokenizer.contentModelFlag = :RCDATA
-          when 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript'
-            @tokenizer.contentModelFlag = :CDATA
-          when 'plaintext'
-            @tokenizer.contentModelFlag = :PLAINTEXT
-          else
-          # contentModelFlag already is PCDATA
-          #@tokenizer.contentModelFlag = :PCDATA
-        end
-      
-        @phase = @phases[:rootElement]
-        @phase.insertHtmlElement
-        resetInsertionMode
-      else
-        @innerHTML = false
-        @phase = @phases[:initial]
-      end
-
-      # We only seem to have InBodyPhase testcases where the following is
-      # relevant ... need others too
-      @lastPhase = nil
-
-      # XXX This is temporary for the moment so there isn't any other
-      # changes needed for the parser to work with the iterable tokenizer
-      @tokenizer.each do |token|
-        token = normalizeToken(token)
-
-        method = 'process%s' % token[:type]
-
-        case token[:type]
-          when :Characters, :SpaceCharacters, :Comment
-            @phase.send method, token[:data]
-          when :StartTag
-            @phase.send method, token[:name], token[:data]
-          when :EndTag
-            @phase.send method, token[:name]
-          when :Doctype
-            @phase.send method, token[:name], token[:publicId],
-              token[:systemId], token[:correct]
-          else
-            parseError(token[:data])
-        end
-      end
-
-      # When the loop finishes it's EOF
-      @phase.processEOF
-    end
-
-    # Parse a HTML document into a well-formed tree
-    #
-    # stream - a filelike object or string containing the HTML to be parsed
-    #
-    # The optional encoding parameter must be a string that indicates
-    # the encoding.  If specified, that encoding will be used,
-    # regardless of any BOM or later declaration (such as in a meta
-    # element)
-    def parse(stream, encoding=nil)
-      _parse(stream, false, encoding)
-      return @tree.getDocument
-    end
-  
-    # Parse a HTML fragment into a well-formed tree fragment
-    
-    # container - name of the element we're setting the innerHTML property
-    # if set to nil, default to 'div'
-    #
-    # stream - a filelike object or string containing the HTML to be parsed
-    #
-    # The optional encoding parameter must be a string that indicates
-    # the encoding.  If specified, that encoding will be used,
-    # regardless of any BOM or later declaration (such as in a meta
-    # element)
-    def parseFragment(stream, container='div', encoding=nil)
-      _parse(stream, true, encoding, container)
-      return @tree.getFragment
-    end
-
-    def parseError(data = 'XXX ERROR MESSAGE NEEDED')
-      # XXX The idea is to make data mandatory.
-      @errors.push([@tokenizer.stream.position, data])
-      raise ParseError if @strict
-    end
-
-    # HTML5 specific normalizations to the token stream
-    def normalizeToken(token)
-
-      if token[:type] == :EmptyTag
-        # When a solidus (/) is encountered within a tag name what happens
-        # depends on whether the current tag name matches that of a void
-        # element.  If it matches a void element atheists did the wrong
-        # thing and if it doesn't it's wrong for everyone.
-
-        unless VOID_ELEMENTS.include?(token[:name])
-          parseError(_('Solidus (/) incorrectly placed in tag.'))
-        end
-
-        token[:type] = :StartTag
-      end
-
-      if token[:type] == :StartTag
-        token[:name] = token[:name].tr(ASCII_UPPERCASE,ASCII_LOWERCASE)
-
-        # We need to remove the duplicate attributes and convert attributes
-        # to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
-
-        unless token[:data].empty?
-          data = token[:data].reverse.map { |attr, value| [attr.tr(ASCII_UPPERCASE, ASCII_LOWERCASE), value] }
-          token[:data] = Hash[*data.flatten]
-        end
-
-      elsif token[:type] == :EndTag
-        parseError(_('End tag contains unexpected attributes.')) unless token[:data].empty?
-        token[:name] = token[:name].downcase
-      end
-
-      return token
-    end
-
-    @@new_modes = {
-      'select' => :inSelect,
-      'td' => :inCell,
-      'th' => :inCell,
-      'tr' => :inRow,
-      'tbody' => :inTableBody,
-      'thead' => :inTableBody,
-      'tfoot' => :inTableBody,
-      'caption' => :inCaption,
-      'colgroup' => :inColumnGroup,
-      'table' => :inTable,
-      'head' => :inBody,
-      'body' => :inBody,
-      'frameset' => :inFrameset
-    }
-
-    def resetInsertionMode
-      # The name of this method is mostly historical. (It's also used in the
-      # specification.)
-      last = false
-
-      @tree.openElements.reverse.each do |node|
-        nodeName = node.name
-
-        if node == @tree.openElements[0]
-          last = true
-          unless ['td', 'th'].include?(nodeName)
-            # XXX
-            # assert @innerHTML
-            nodeName = @innerHTML
-          end
-        end
-
-        # Check for conditions that should only happen in the innerHTML
-        # case
-        if ['select', 'colgroup', 'head', 'frameset'].include?(nodeName)
-          # XXX
-          # assert @innerHTML
-        end
-
-        if @@new_modes.has_key?(nodeName)
-          @phase = @phases[@@new_modes[nodeName]]
-        elsif nodeName == 'html'
-          @phase = @phases[@tree.headPointer.nil?? :beforeHead : :afterHead]
-        elsif last
-          @phase = @phases[:inBody]
-        else
-          next
-        end
-
-        break
-      end
-    end
-
-    def _(string); string; end
-  end
-
-end
+require 'html5/constants'
+require 'html5/tokenizer'
+require 'html5/treebuilders/rexml'
+
+Dir.glob(File.join(File.dirname(__FILE__), 'html5parser', '*_phase.rb')).each do |path|
+  require 'html5/html5parser/' + File.basename(path)
+end
+
+module HTML5
+
+  # Error in parsed document
+  class ParseError < Exception; end
+  class AssertionError < Exception; end
+
+  # HTML parser. Generates a tree structure from a stream of (possibly malformed) HTML
+  #
+  class HTMLParser
+
+    attr_accessor :phase, :first_start_tag, :inner_html, :last_phase, :insert_from_table
+
+    attr_reader :phases, :tokenizer, :tree, :errors
+
+    def self.parse(stream, options = {})
+      encoding = options.delete(:encoding)
+      new(options).parse(stream,encoding)
+    end
+
+    def self.parse_fragment(stream, options = {})
+      container = options.delete(:container) || 'div'
+      encoding = options.delete(:encoding)
+      new(options).parse_fragment(stream, container, encoding)
+    end
+
+    @@phases = %w( initial rootElement beforeHead inHead afterHead inBody inTable inCaption
+      inColumnGroup inTableBody inRow inCell inSelect afterBody inFrameset afterFrameset trailingEnd )
+
+    # :strict - raise an exception when a parse error is encountered
+    # :tree - a treebuilder class controlling the type of tree that will be
+    # returned. Built in treebuilders can be accessed through
+    # HTML5::TreeBuilders[treeType]
+    def initialize(options = {})
+      @strict = false
+      @errors = []
+     
+      @tokenizer =  HTMLTokenizer
+      @tree = TreeBuilders::REXML::TreeBuilder
+
+      options.each {|name, value| instance_variable_set("@#{name}", value) }
+      @lowercase_attr_name    = nil unless instance_variables.include?("@lowercase_attr_name")
+      @lowercase_element_name = nil unless instance_variables.include?("@lowercase_element_name")
+
+      @tree = @tree.new
+
+      @phases = @@phases.inject({}) do |phases, phase_name|
+        phase_class_name = phase_name.sub(/(.)/) { $1.upcase } + 'Phase'
+        phases[phase_name.to_sym] = HTML5.const_get(phase_class_name).new(self, @tree)
+        phases
+      end
+    end
+
+    def _parse(stream, inner_html, encoding, container = 'div')
+      @tree.reset
+      @first_start_tag = false
+      @errors = []
+
+      @tokenizer = @tokenizer.class unless Class === @tokenizer
+      @tokenizer = @tokenizer.new(stream, :encoding => encoding,
+        :parseMeta => !inner_html, :lowercase_attr_name => @lowercase_attr_name, :lowercase_element_name => @lowercase_element_name)
+
+      if inner_html
+        case @inner_html = container.downcase
+          when 'title', 'textarea'
+            @tokenizer.content_model_flag = :RCDATA
+          when 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript'
+            @tokenizer.content_model_flag = :CDATA
+          when 'plaintext'
+            @tokenizer.content_model_flag = :PLAINTEXT
+          else
+          # content_model_flag already is PCDATA
+          #@tokenizer.content_model_flag = :PCDATA
+        end
+      
+        @phase = @phases[:rootElement]
+        @phase.insert_html_element
+        reset_insertion_mode
+      else
+        @inner_html = false
+        @phase = @phases[:initial]
+      end
+
+      # We only seem to have InBodyPhase testcases where the following is
+      # relevant ... need others too
+      @last_phase = nil
+
+      # XXX This is temporary for the moment so there isn't any other
+      # changes needed for the parser to work with the iterable tokenizer
+      @tokenizer.each do |token|
+        token = normalize_token(token)
+
+        method = 'process%s' % token[:type]
+
+        case token[:type]
+          when :Characters, :SpaceCharacters, :Comment
+            @phase.send method, token[:data]
+          when :StartTag
+            @phase.send method, token[:name], token[:data]
+          when :EndTag
+            @phase.send method, token[:name]
+          when :Doctype
+            @phase.send method, token[:name], token[:publicId],
+              token[:systemId], token[:correct]
+          else
+            parse_error(token[:data])
+        end
+      end
+
+      # When the loop finishes it's EOF
+      @phase.process_eof
+    end
+
+    # Parse a HTML document into a well-formed tree
+    #
+    # stream - a filelike object or string containing the HTML to be parsed
+    #
+    # The optional encoding parameter must be a string that indicates
+    # the encoding.  If specified, that encoding will be used,
+    # regardless of any BOM or later declaration (such as in a meta
+    # element)
+    def parse(stream, encoding=nil)
+      _parse(stream, false, encoding)
+      @tree.get_document
+    end
+
+    # Parse a HTML fragment into a well-formed tree fragment
+
+    # container - name of the element we're setting the inner_html property
+    # if set to nil, default to 'div'
+    #
+    # stream - a filelike object or string containing the HTML to be parsed
+    #
+    # The optional encoding parameter must be a string that indicates
+    # the encoding.  If specified, that encoding will be used,
+    # regardless of any BOM or later declaration (such as in a meta
+    # element)
+    def parse_fragment(stream, container='div', encoding=nil)
+      _parse(stream, true, encoding, container)
+      @tree.get_fragment
+    end
+
+    def parse_error(data = 'XXX ERROR MESSAGE NEEDED')
+      # XXX The idea is to make data mandatory.
+      @errors.push([@tokenizer.stream.position, data])
+      raise ParseError if @strict
+    end
+
+    # HTML5 specific normalizations to the token stream
+    def normalize_token(token)
+
+      if token[:type] == :EmptyTag
+        # When a solidus (/) is encountered within a tag name what happens
+        # depends on whether the current tag name matches that of a void
+        # element.  If it matches a void element atheists did the wrong
+        # thing and if it doesn't it's wrong for everyone.
+
+        unless VOID_ELEMENTS.include?(token[:name])
+          parse_error(_('Solidus (/) incorrectly placed in tag.'))
+        end
+
+        token[:type] = :StartTag
+      end
+
+      if token[:type] == :StartTag
+        token[:name] = token[:name].downcase
+
+        # We need to remove the duplicate attributes and convert attributes
+        # to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
+
+        unless token[:data].empty?
+          data = token[:data].reverse.map {|attr, value| [attr.downcase, value] }
+          token[:data] = Hash[*data.flatten]
+        end
+
+      elsif token[:type] == :EndTag
+        parse_error(_('End tag contains unexpected attributes.')) unless token[:data].empty?
+        token[:name] = token[:name].downcase
+      end
+
+      token
+    end
+
+    @@new_modes = {
+      'select'   => :inSelect,
+      'td'       => :inCell,
+      'th'       => :inCell,
+      'tr'       => :inRow,
+      'tbody'    => :inTableBody,
+      'thead'    => :inTableBody,
+      'tfoot'    => :inTableBody,
+      'caption'  => :inCaption,
+      'colgroup' => :inColumnGroup,
+      'table'    => :inTable,
+      'head'     => :inBody,
+      'body'     => :inBody,
+      'frameset' => :inFrameset
+    }
+
+    def reset_insertion_mode
+      # The name of this method is mostly historical. (It's also used in the
+      # specification.)
+      last = false
+
+      @tree.open_elements.reverse.each do |node|
+        node_name = node.name
+
+        if node == @tree.open_elements.first
+          last = true
+          unless ['td', 'th'].include?(node_name)
+            # XXX
+            # assert @inner_html
+            node_name = @inner_html
+          end
+        end
+
+        # Check for conditions that should only happen in the inner_html
+        # case
+        if ['select', 'colgroup', 'head', 'frameset'].include?(node_name)
+          # XXX
+          # assert @inner_html
+        end
+
+        if @@new_modes.has_key?(node_name)
+          @phase = @phases[@@new_modes[node_name]]
+        elsif node_name == 'html'
+          @phase = @phases[@tree.head_pointer.nil?? :beforeHead : :afterHead]
+        elsif last
+          @phase = @phases[:inBody]
+        else
+          next
+        end
+
+        break
+      end
+    end
+
+    def _(string); string; end
+  end
+
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_body_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_body_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class AfterBodyPhase < Phase

    handle_end 'html'
@ -8,36 +8,36 @@ module HTML5lib
    def processComment(data)
      # This is needed because data is to be appended to the <html> element
      # here and not to whatever is currently open.
-      @tree.insertComment(data, @tree.openElements[0])
+      @tree.insert_comment(data, @tree.open_elements.first)
    end

    def processCharacters(data)
-      @parser.parseError(_('Unexpected non-space characters in the after body phase.'))
+      parse_error(_('Unexpected non-space characters in the after body phase.'))
      @parser.phase = @parser.phases[:inBody]
      @parser.phase.processCharacters(data)
    end

    def processStartTag(name, attributes)
-      @parser.parseError(_("Unexpected start tag token (#{name}) in the after body phase."))
+      parse_error(_("Unexpected start tag token (#{name}) in the after body phase."))
      @parser.phase = @parser.phases[:inBody]
      @parser.phase.processStartTag(name, attributes)
    end

    def endTagHtml(name)
-      if @parser.innerHTML
-        @parser.parseError
+      if @parser.inner_html
+        parse_error
      else
        # XXX: This may need to be done, not sure
-        # Don't set lastPhase to the current phase but to the inBody phase
+        # Don't set last_phase to the current phase but to the inBody phase
        # instead. No need for extra parse errors if there's something after </html>.
        # Try "<!doctype html>X</html>X" for instance.
-        @parser.lastPhase = @parser.phase
-        @parser.phase = @parser.phases[:trailingEnd]
+        @parser.last_phase = @parser.phase
+        @parser.phase      = @parser.phases[:trailingEnd]
      end
    end

    def endTagOther(name)
-      @parser.parseError(_("Unexpected end tag token (#{name}) in the after body phase."))
+      parse_error(_("Unexpected end tag token (#{name}) in the after body phase."))
      @parser.phase = @parser.phases[:inBody]
      @parser.phase.processEndTag(name)
    end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_frameset_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_frameset_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class AfterFramesetPhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#after3
@ -10,7 +10,7 @@ module HTML5lib
    handle_end 'html'

    def processCharacters(data)
-      @parser.parseError(_('Unexpected non-space characters in the after frameset phase. Ignored.'))
+      parse_error(_('Unexpected non-space characters in the after frameset phase. Ignored.'))
    end

    def startTagNoframes(name, attributes)
@ -18,16 +18,16 @@ module HTML5lib
    end

    def startTagOther(name, attributes)
-      @parser.parseError(_("Unexpected start tag (#{name}) in the after frameset phase. Ignored."))
+      parse_error(_("Unexpected start tag (#{name}) in the after frameset phase. Ignored."))
    end

    def endTagHtml(name)
-      @parser.lastPhase = @parser.phase
-      @parser.phase = @parser.phases[:trailingEnd]
+      @parser.last_phase = @parser.phase
+      @parser.phase      = @parser.phases[:trailingEnd]
    end

    def endTagOther(name)
-      @parser.parseError(_("Unexpected end tag (#{name}) in the after frameset phase. Ignored."))
+      parse_error(_("Unexpected end tag (#{name}) in the after frameset phase. Ignored."))
    end

  end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_head_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_head_phase.rb
@ -1,48 +1,48 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class AfterHeadPhase < Phase
-  
+
    handle_start 'html', 'body', 'frameset', %w( base link meta script style title ) => 'FromHead'

-    def processEOF
-      anythingElse
-      @parser.phase.processEOF
+    def process_eof
+      anything_else
+      @parser.phase.process_eof
    end

    def processCharacters(data)
-      anythingElse
+      anything_else
      @parser.phase.processCharacters(data)
    end

    def startTagBody(name, attributes)
-      @tree.insertElement(name, attributes)
+      @tree.insert_element(name, attributes)
      @parser.phase = @parser.phases[:inBody]
    end

    def startTagFrameset(name, attributes)
-      @tree.insertElement(name, attributes)
+      @tree.insert_element(name, attributes)
      @parser.phase = @parser.phases[:inFrameset]
    end

    def startTagFromHead(name, attributes)
-      @parser.parseError(_("Unexpected start tag (#{name}) that can be in head. Moved."))
+      parse_error(_("Unexpected start tag (#{name}) that can be in head. Moved."))
      @parser.phase = @parser.phases[:inHead]
      @parser.phase.processStartTag(name, attributes)
    end

    def startTagOther(name, attributes)
-      anythingElse
+      anything_else
      @parser.phase.processStartTag(name, attributes)
    end

    def processEndTag(name)
-      anythingElse
+      anything_else
      @parser.phase.processEndTag(name)
    end

-    def anythingElse
-      @tree.insertElement('body', {})
+    def anything_else
+      @tree.insert_element('body', {})
      @parser.phase = @parser.phases[:inBody]
    end

--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/before_head_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/before_head_phase.rb
@ -1,15 +1,15 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class BeforeHeadPhase < Phase

    handle_start 'html', 'head'

-    handle_end %w( html head body br ) => 'ImplyHead'
+    handle_end %w( html head body br p ) => 'ImplyHead'

-    def processEOF
+    def process_eof
      startTagHead('head', {})
-      @parser.phase.processEOF
+      @parser.phase.process_eof
    end

    def processCharacters(data)
@ -18,8 +18,8 @@ module HTML5lib
    end

    def startTagHead(name, attributes)
-      @tree.insertElement(name, attributes)
-      @tree.headPointer = @tree.openElements[-1]
+      @tree.insert_element(name, attributes)
+      @tree.head_pointer = @tree.open_elements[-1]
      @parser.phase = @parser.phases[:inHead]
    end

@ -34,7 +34,7 @@ module HTML5lib
    end

    def endTagOther(name)
-      @parser.parseError(_("Unexpected end tag (#{name}) after the (implied) root element."))
+      parse_error(_("Unexpected end tag (#{name}) after the (implied) root element."))
    end

  end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_body_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_body_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InBodyPhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#in-body
@ -51,25 +51,40 @@ module HTML5lib

      # for special handling of whitespace in <pre>
      @processSpaceCharactersDropNewline = false
+      if $-w
+        $-w = false
+        alias processSpaceCharactersNonPre processSpaceCharacters
+        $-w = true
+      else
+        alias processSpaceCharactersNonPre processSpaceCharacters
+      end
    end

    def processSpaceCharactersDropNewline(data)
-      #Sometimes (start of <pre> blocks) we want to drop leading newlines
-      @processSpaceCharactersDropNewline = false
-      if (data.length > 0 and data[0] == ?\n and 
-        %w[pre textarea].include?(@tree.openElements[-1].name) and
-        not @tree.openElements[-1].hasContent)
+      # #Sometimes (start of <pre> blocks) we want to drop leading newlines
+
+      if $-w
+        $-w = false
+        alias processSpaceCharacters processSpaceCharactersNonPre
+        $-w = true
+      else
+        alias processSpaceCharacters processSpaceCharactersNonPre
+      end
+
+      if (data.length > 0 and data[0] == ?\n && 
+        %w[pre textarea].include?(@tree.open_elements.last.name) && !@tree.open_elements.last.hasContent)
        data = data[1..-1]
      end
-      @tree.insertText(data) if data.length > 0
+
+      if data.length > 0
+        @tree.reconstructActiveFormattingElements
+        @tree.insertText(data)
+      end
    end

    def processSpaceCharacters(data)
-      if @processSpaceCharactersDropNewline
-        processSpaceCharactersDropNewline(data)
-      else
-        super(data)
-      end
+      @tree.reconstructActiveFormattingElements()
+      @tree.insertText(data)
    end

    def processCharacters(data)
@ -85,20 +100,19 @@ module HTML5lib
    end

    def startTagTitle(name, attributes)
-      @parser.parseError(_("Unexpected start tag (#{name}) that belongs in the head. Moved."))
+      parse_error(_("Unexpected start tag (#{name}) that belongs in the head. Moved."))
      @parser.phases[:inHead].processStartTag(name, attributes)
    end

    def startTagBody(name, attributes)
-      @parser.parseError(_('Unexpected start tag (body).'))
+      parse_error(_('Unexpected start tag (body).'))

-      if (@tree.openElements.length == 1 or
-        @tree.openElements[1].name != 'body')
-        assert @parser.innerHTML
+      if (@tree.open_elements.length == 1 || @tree.open_elements[1].name != 'body')
+        assert @parser.inner_html
      else
        attributes.each do |attr, value|
-          unless @tree.openElements[1].attributes.has_key?(attr)
-            @tree.openElements[1].attributes[attr] = value
+          unless @tree.open_elements[1].attributes.has_key?(attr)
+            @tree.open_elements[1].attributes[attr] = value
          end
        end
      end
@ -106,17 +120,17 @@ module HTML5lib

    def startTagCloseP(name, attributes)
      endTagP('p') if in_scope?('p')
-      @tree.insertElement(name, attributes)
+      @tree.insert_element(name, attributes)
      @processSpaceCharactersDropNewline = true if name == 'pre'
    end

    def startTagForm(name, attributes)
      if @tree.formPointer
-        @parser.parseError('Unexpected start tag (form). Ignored.')
+        parse_error(_('Unexpected start tag (form). Ignored.'))
      else
        endTagP('p') if in_scope?('p')
-        @tree.insertElement(name, attributes)
-        @tree.formPointer = @tree.openElements[-1]
+        @tree.insert_element(name, attributes)
+        @tree.formPointer = @tree.open_elements[-1]
      end
    end

@ -125,31 +139,28 @@ module HTML5lib
      stopNames = {'li' => ['li'], 'dd' => ['dd', 'dt'], 'dt' => ['dd', 'dt']}
      stopName = stopNames[name]

-      @tree.openElements.reverse.each_with_index do |node, i|
+      @tree.open_elements.reverse.each_with_index do |node, i|
        if stopName.include?(node.name)
-          poppedNodes = (0..i).collect { @tree.openElements.pop }
+          poppedNodes = (0..i).collect { @tree.open_elements.pop }
          if i >= 1
-            @parser.parseError("Missing end tag%s (%s)" % [
-              (i>1 ? 's' : ''),
-              poppedNodes.reverse.map {|item| item.name}.join(', ')])
+            parse_error(_("Missing end tag%s (%s)" % [(i>1 ? 's' : ''), poppedNodes.reverse.map{|item| item.name}.join(', ')]))
          end
          break
        end

        # Phrasing elements are all non special, non scoping, non
        # formatting elements
-        break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) and
-          not ['address', 'div'].include?(node.name))
+        break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) && !%w[address div].include?(node.name))
      end

      # Always insert an <li> element.
-      @tree.insertElement(name, attributes)
+      @tree.insert_element(name, attributes)
    end

    def startTagPlaintext(name, attributes)
      endTagP('p') if in_scope?('p')
-      @tree.insertElement(name, attributes)
-      @parser.tokenizer.contentModelFlag = :PLAINTEXT
+      @tree.insert_element(name, attributes)
+      @parser.tokenizer.content_model_flag = :PLAINTEXT
    end

    def startTagHeading(name, attributes)
@ -158,7 +169,7 @@ module HTML5lib
      # Uncomment the following for IE7 behavior:
      # HEADING_ELEMENTS.each do |element|
      #   if in_scope?(element)
-      #     @parser.parseError(_("Unexpected start tag (#{name})."))
+      #     parse_error(_("Unexpected start tag (#{name})."))
      # 
      #     remove_open_elements_until do |element|
      #       HEADING_ELEMENTS.include?(element.name)
@ -167,14 +178,14 @@ module HTML5lib
      #     break
      #   end
      # end
-      @tree.insertElement(name, attributes)
+      @tree.insert_element(name, attributes)
    end

    def startTagA(name, attributes)
      if afeAElement = @tree.elementInActiveFormattingElements('a')
-        @parser.parseError(_('Unexpected start tag (a) implies end tag (a).'))
+        parse_error(_('Unexpected start tag (a) implies end tag (a).'))
        endTagFormatting('a')
-        @tree.openElements.delete(afeAElement) if @tree.openElements.include?(afeAElement)
+        @tree.open_elements.delete(afeAElement) if @tree.open_elements.include?(afeAElement)
        @tree.activeFormattingElements.delete(afeAElement) if @tree.activeFormattingElements.include?(afeAElement)
      end
      @tree.reconstructActiveFormattingElements
@ -188,77 +199,82 @@ module HTML5lib

    def startTagNobr(name, attributes)
      @tree.reconstructActiveFormattingElements
-      processEndTag('nobr') if in_scope?('nobr')
+      if in_scope?('nobr')
+        parse_error(_('Unexpected start tag (nobr) implies end tag (nobr).'))
+        processEndTag('nobr')
+        # XXX Need tests that trigger the following
+        @tree.reconstructActiveFormattingElements
+      end
      addFormattingElement(name, attributes)
    end

    def startTagButton(name, attributes)
      if in_scope?('button')
-        @parser.parseError(_('Unexpected start tag (button) implied end tag (button).'))
+        parse_error(_('Unexpected start tag (button) implied end tag (button).'))
        processEndTag('button')
        @parser.phase.processStartTag(name, attributes)
      else
        @tree.reconstructActiveFormattingElements
-        @tree.insertElement(name, attributes)
+        @tree.insert_element(name, attributes)
        @tree.activeFormattingElements.push(Marker)
      end
    end

    def startTagMarqueeObject(name, attributes)
      @tree.reconstructActiveFormattingElements
-      @tree.insertElement(name, attributes)
+      @tree.insert_element(name, attributes)
      @tree.activeFormattingElements.push(Marker)
    end

    def startTagXmp(name, attributes)
      @tree.reconstructActiveFormattingElements
-      @tree.insertElement(name, attributes)
-      @parser.tokenizer.contentModelFlag = :CDATA
+      @tree.insert_element(name, attributes)
+      @parser.tokenizer.content_model_flag = :CDATA
    end

    def startTagTable(name, attributes)
      processEndTag('p') if in_scope?('p')
-      @tree.insertElement(name, attributes)
+      @tree.insert_element(name, attributes)
      @parser.phase = @parser.phases[:inTable]
    end

    def startTagVoidFormatting(name, attributes)
      @tree.reconstructActiveFormattingElements
-      @tree.insertElement(name, attributes)
-      @tree.openElements.pop
+      @tree.insert_element(name, attributes)
+      @tree.open_elements.pop
    end

    def startTagHr(name, attributes)
      endTagP('p') if in_scope?('p')
-      @tree.insertElement(name, attributes)
-      @tree.openElements.pop
+      @tree.insert_element(name, attributes)
+      @tree.open_elements.pop
    end

    def startTagImage(name, attributes)
      # No really...
-      @parser.parseError(_('Unexpected start tag (image). Treated as img.'))
+      parse_error(_('Unexpected start tag (image). Treated as img.'))
      processStartTag('img', attributes)
    end

    def startTagInput(name, attributes)
      @tree.reconstructActiveFormattingElements
-      @tree.insertElement(name, attributes)
+      @tree.insert_element(name, attributes)
      if @tree.formPointer
        # XXX Not exactly sure what to do here
-        # @tree.openElements[-1].form = @tree.formPointer
+        # @tree.open_elements[-1].form = @tree.formPointer
      end
-      @tree.openElements.pop
+      @tree.open_elements.pop
    end

    def startTagIsindex(name, attributes)
-      @parser.parseError("Unexpected start tag isindex. Don't use it!")
+      parse_error(_("Unexpected start tag isindex. Don't use it!"))
      return if @tree.formPointer
      processStartTag('form', {})
      processStartTag('hr', {})
      processStartTag('p', {})
      processStartTag('label', {})
      # XXX Localization ...
-      processCharacters('This is a searchable index. Insert your search keywords here:')
+      processCharacters('This is a searchable index. Insert your search keywords here: ')
      attributes['name'] = 'isindex'
      attrs = attributes.to_a
      processStartTag('input', attributes)
@ -270,20 +286,21 @@ module HTML5lib

    def startTagTextarea(name, attributes)
      # XXX Form element pointer checking here as well...
-      @tree.insertElement(name, attributes)
-      @parser.tokenizer.contentModelFlag = :RCDATA
+      @tree.insert_element(name, attributes)
+      @parser.tokenizer.content_model_flag = :RCDATA
      @processSpaceCharactersDropNewline = true
+      alias processSpaceCharacters processSpaceCharactersDropNewline
    end

    # iframe, noembed noframes, noscript(if scripting enabled)
    def startTagCdata(name, attributes)
-      @tree.insertElement(name, attributes)
-      @parser.tokenizer.contentModelFlag = :CDATA
+      @tree.insert_element(name, attributes)
+      @parser.tokenizer.content_model_flag = :CDATA
    end

    def startTagSelect(name, attributes)
      @tree.reconstructActiveFormattingElements
-      @tree.insertElement(name, attributes)
+      @tree.insert_element(name, attributes)
      @parser.phase = @parser.phases[:inSelect]
    end

@ -293,7 +310,7 @@ module HTML5lib
      # "caption", "col", "colgroup", "frame", "frameset", "head",
      # "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
      # "tr", "noscript"
-      @parser.parseError(_("Unexpected start tag (#{name}). Ignored."))
+      parse_error(_("Unexpected start tag (#{name}). Ignored."))
    end

    def startTagNew(name, attributes)
@ -306,33 +323,38 @@ module HTML5lib

    def startTagOther(name, attributes)
      @tree.reconstructActiveFormattingElements
-      @tree.insertElement(name, attributes)
+      @tree.insert_element(name, attributes)
    end

    def endTagP(name)
      @tree.generateImpliedEndTags('p') if in_scope?('p')
-      @parser.parseError('Unexpected end tag (p).') unless @tree.openElements[-1].name == 'p'
-      @tree.openElements.pop while in_scope?('p')
+      parse_error(_('Unexpected end tag (p).')) unless @tree.open_elements.last.name == 'p'
+      if in_scope?('p')
+        @tree.open_elements.pop while in_scope?('p')
+      else
+        startTagCloseP('p', {})
+        endTagP('p')
+      end
    end

    def endTagBody(name)
      # XXX Need to take open <p> tags into account here. We shouldn't imply
      # </p> but we should not throw a parse error either. Specification is
      # likely to be updated.
-      unless @tree.openElements[1].name == 'body'
-        # innerHTML case
-        @parser.parseError
+      unless @tree.open_elements[1].name == 'body'
+        # inner_html case
+        parse_error
        return
      end
-      unless @tree.openElements[-1].name == 'body'
-        @parser.parseError(_("Unexpected end tag (body). Missing end tag (#{@tree.openElements[-1].name})."))
+      unless @tree.open_elements.last.name == 'body'
+        parse_error(_("Unexpected end tag (body). Missing end tag (#{@tree.open_elements[-1].name})."))
      end
      @parser.phase = @parser.phases[:afterBody]
    end

    def endTagHtml(name)
      endTagBody(name)
-      @parser.phase.processEndTag(name) unless @parser.innerHTML
+      @parser.phase.processEndTag(name) unless @parser.inner_html
    end

    def endTagBlock(name)
@ -341,8 +363,8 @@ module HTML5lib

      @tree.generateImpliedEndTags if in_scope?(name)

-      unless @tree.openElements[-1].name == name
-        @parser.parseError(("End tag (#{name}) seen too early. Expected other end tag."))
+      unless @tree.open_elements.last.name == name
+        parse_error(_("End tag (#{name}) seen too early. Expected other end tag."))
      end

      if in_scope?(name)
@ -351,18 +373,23 @@ module HTML5lib
    end

    def endTagForm(name)
-      endTagBlock(name)
+      if in_scope?(name)
+        @tree.generateImpliedEndTags
+      end
+      if @tree.open_elements.last.name != name
+        parse_error(_("End tag (form) seen too early. Ignored."))
+      else
+        @tree.open_elements.pop
+      end
      @tree.formPointer = nil
    end

    def endTagListItem(name)
      # AT Could merge this with the Block case
-      if in_scope?(name)
-        @tree.generateImpliedEndTags(name)
+      @tree.generateImpliedEndTags(name) if in_scope?(name)

-        unless @tree.openElements[-1].name == name
-          @parser.parseError(("End tag (#{name}) seen too early. Expected other end tag."))
-        end
+      unless @tree.open_elements.last.name == name
+        parse_error(_("End tag (#{name}) seen too early. " + 'Expected other end tag.'))
      end

      remove_open_elements_until(name) if in_scope?(name)
@ -376,13 +403,13 @@ module HTML5lib
        end
      end

-      unless @tree.openElements[-1].name == name
-        @parser.parseError(("Unexpected end tag (#{name}). Expected other end tag."))
+      unless @tree.open_elements.last.name == name
+        parse_error(_("Unexpected end tag (#{name}). Expected other end tag."))
      end

      HEADING_ELEMENTS.each do |element|
        if in_scope?(element)
-          remove_open_elements_until { |element| HEADING_ELEMENTS.include?(element.name) }
+          remove_open_elements_until {|element| HEADING_ELEMENTS.include?(element.name)}
          break
        end
      end
@ -391,30 +418,30 @@ module HTML5lib
    # The much-feared adoption agency algorithm
    def endTagFormatting(name)
      # http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
-      # XXX Better parseError messages appreciated.
+      # XXX Better parse_error messages appreciated.
      while true
        # Step 1 paragraph 1
        afeElement = @tree.elementInActiveFormattingElements(name)
-        if not afeElement or (@tree.openElements.include?(afeElement) and not in_scope?(afeElement.name))
-          @parser.parseError(_("End tag (#{name}) violates step 1, paragraph 1 of the adoption agency algorithm."))
+        if !afeElement or (@tree.open_elements.include?(afeElement) && !in_scope?(afeElement.name))
+          parse_error(_("End tag (#{name}) violates step 1, paragraph 1 of the adoption agency algorithm."))
          return
        # Step 1 paragraph 2
-        elsif not @tree.openElements.include?(afeElement)
-          @parser.parseError(_("End tag (#{name}) violates step 1, paragraph 2 of the adoption agency algorithm."))
+        elsif not @tree.open_elements.include?(afeElement)
+          parse_error(_("End tag (#{name}) violates step 1, paragraph 2 of the adoption agency algorithm."))
          @tree.activeFormattingElements.delete(afeElement)
          return
        end

        # Step 1 paragraph 3
-        if afeElement != @tree.openElements[-1]
-          @parser.parseError(_("End tag (#{name}) violates step 1, paragraph 3 of the adoption agency algorithm."))
+        if afeElement != @tree.open_elements.last
+          parse_error(_("End tag (#{name}) violates step 1, paragraph 3 of the adoption agency algorithm."))
        end

        # Step 2
        # Start of the adoption agency algorithm proper
-        afeIndex = @tree.openElements.index(afeElement)
+        afeIndex = @tree.open_elements.index(afeElement)
        furthestBlock = nil
-        @tree.openElements[afeIndex..-1].each do |element|
+        @tree.open_elements[afeIndex..-1].each do |element|
          if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(element.name)
            furthestBlock = element
            break
@ -423,11 +450,11 @@ module HTML5lib

        # Step 3
        if furthestBlock.nil?
-          element = remove_open_elements_until { |element| element == afeElement }
+          element = remove_open_elements_until {|element| element == afeElement }
          @tree.activeFormattingElements.delete(element)
          return
        end
-        commonAncestor = @tree.openElements[afeIndex - 1]
+        commonAncestor = @tree.open_elements[afeIndex - 1]

        # Step 5
        furthestBlock.parent.removeChild(furthestBlock) if furthestBlock.parent
@ -444,11 +471,11 @@ module HTML5lib
        while true
          # AT replace this with a function and recursion?
          # Node is element before node in open elements
-          node = @tree.openElements[@tree.openElements.index(node) - 1]
+          node = @tree.open_elements[@tree.open_elements.index(node) - 1]
          until @tree.activeFormattingElements.include?(node)
            tmpNode = node
-            node = @tree.openElements[@tree.openElements.index(node) - 1]
-            @tree.openElements.delete(tmpNode)
+            node = @tree.open_elements[@tree.open_elements.index(node) - 1]
+            @tree.open_elements.delete(tmpNode)
          end
          # Step 7.3
          break if node == afeElement
@ -465,7 +492,7 @@ module HTML5lib
            clone = node.cloneNode
            # Replace node with clone
            @tree.activeFormattingElements[@tree.activeFormattingElements.index(node)] = clone
-            @tree.openElements[@tree.openElements.index(node)] = clone
+            @tree.open_elements[@tree.open_elements.index(node)] = clone
            node = clone
          end
          # Step 7.6
@ -495,47 +522,47 @@ module HTML5lib
        @tree.activeFormattingElements.insert([bookmark,@tree.activeFormattingElements.length].min, clone)

        # Step 13
-        @tree.openElements.delete(afeElement)
-        @tree.openElements.insert(@tree.openElements.index(furthestBlock) + 1, clone)
+        @tree.open_elements.delete(afeElement)
+        @tree.open_elements.insert(@tree.open_elements.index(furthestBlock) + 1, clone)
      end
    end

    def endTagButtonMarqueeObject(name)
      @tree.generateImpliedEndTags if in_scope?(name)

-      unless @tree.openElements[-1].name == name
-        @parser.parseError(_("Unexpected end tag (#{name}). Expected other end tag first."))
+      unless @tree.open_elements.last.name == name
+        parse_error(_("Unexpected end tag (#{name}). Expected other end tag first."))
      end

      if in_scope?(name)
        remove_open_elements_until(name)
-      
+
        @tree.clearActiveFormattingElements
      end
    end

    def endTagMisplaced(name)
      # This handles elements with end tags in other insertion modes.
-      @parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
+      parse_error(_("Unexpected end tag (#{name}). Ignored."))
    end

    def endTagBr(name)
-      @parser.parseError(_("Unexpected end tag (br). Treated as br element."))
+      parse_error(_("Unexpected end tag (br). Treated as br element."))
      @tree.reconstructActiveFormattingElements
-      @tree.insertElement(name, {})
-      @tree.openElements.pop()
+      @tree.insert_element(name, {})
+      @tree.open_elements.pop()
    end

    def endTagNone(name)
      # This handles elements with no end tag.
-      @parser.parseError(_("This tag (#{name}) has no end tag"))
+      parse_error(_("This tag (#{name}) has no end tag"))
    end

    def endTagCdataTextAreaXmp(name)
-      if @tree.openElements[-1].name == name
-        @tree.openElements.pop
+      if @tree.open_elements.last.name == name
+        @tree.open_elements.pop
      else
-        @parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
+        parse_error(_("Unexpected end tag (#{name}). Ignored."))
      end
    end

@ -549,20 +576,20 @@ module HTML5lib

    def endTagOther(name)
      # XXX This logic should be moved into the treebuilder
-      @tree.openElements.reverse.each do |node|
+      @tree.open_elements.reverse.each do |node|
        if node.name == name
          @tree.generateImpliedEndTags

-          unless @tree.openElements[-1].name == name
-            @parser.parseError(_("Unexpected end tag (#{name})."))
+          unless @tree.open_elements.last.name == name
+            parse_error(_("Unexpected end tag (#{name})."))
          end

-          remove_open_elements_until { |element| element == node }
+          remove_open_elements_until {|element| element == node }

          break
        else
          if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name)
-            @parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
+            parse_error(_("Unexpected end tag (#{name}). Ignored."))
            break
          end
        end
@ -572,8 +599,8 @@ module HTML5lib
    protected

    def addFormattingElement(name, attributes)
-      @tree.insertElement(name, attributes)
-      @tree.activeFormattingElements.push(@tree.openElements[-1])
+      @tree.insert_element(name, attributes)
+      @tree.activeFormattingElements.push(@tree.open_elements.last)
    end

  end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_caption_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_caption_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InCaptionPhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#in-caption
@ -10,7 +10,7 @@ module HTML5lib
    handle_end 'caption', 'table', %w( body col colgroup html tbody td tfoot th thead tr ) => 'Ignore'

    def ignoreEndTagCaption
-      not in_scope?('caption', true)
+      !in_scope?('caption', true)
    end

    def processCharacters(data)
@ -18,7 +18,7 @@ module HTML5lib
    end

    def startTagTableElement(name, attributes)
-      @parser.parseError
+      parse_error
      #XXX Have to duplicate logic here to find out if the tag is ignored
      ignoreEndTag = ignoreEndTagCaption
      @parser.phase.processEndTag('caption')
@ -31,15 +31,15 @@ module HTML5lib

    def endTagCaption(name)
      if ignoreEndTagCaption
-        # innerHTML case
-        assert @parser.innerHTML
-        @parser.parseError
+        # inner_html case
+        assert @parser.inner_html
+        parse_error
      else
        # AT this code is quite similar to endTagTable in "InTable"
        @tree.generateImpliedEndTags

-        unless @tree.openElements[-1].name == 'caption'
-          @parser.parseError(_("Unexpected end tag (caption). Missing end tags."))
+        unless @tree.open_elements[-1].name == 'caption'
+          parse_error(_("Unexpected end tag (caption). Missing end tags."))
        end

        remove_open_elements_until('caption')
@ -50,14 +50,14 @@ module HTML5lib
    end

    def endTagTable(name)
-      @parser.parseError
+      parse_error
      ignoreEndTag = ignoreEndTagCaption
      @parser.phase.processEndTag('caption')
      @parser.phase.processEndTag(name) unless ignoreEndTag
    end

    def endTagIgnore(name)
-      @parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
+      parse_error(_("Unexpected end tag (#{name}). Ignored."))
    end

    def endTagOther(name)
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_cell_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_cell_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InCellPhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#in-cell
@ -20,8 +20,8 @@ module HTML5lib
        closeCell
        @parser.phase.processStartTag(name, attributes)
      else
-        # innerHTML case
-        @parser.parseError
+        # inner_html case
+        parse_error
      end
    end

@ -32,22 +32,22 @@ module HTML5lib
    def endTagTableCell(name)
      if in_scope?(name, true)
        @tree.generateImpliedEndTags(name)
-        if @tree.openElements[-1].name != name
-          @parser.parseError("Got table cell end tag (#{name}) while required end tags are missing.")
+        if @tree.open_elements.last.name != name
+          parse_error("Got table cell end tag (#{name}) while required end tags are missing.")

          remove_open_elements_until(name)
        else
-          @tree.openElements.pop
+          @tree.open_elements.pop
        end
        @tree.clearActiveFormattingElements
        @parser.phase = @parser.phases[:inRow]
      else
-        @parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
+        parse_error(_("Unexpected end tag (#{name}). Ignored."))
      end
    end

    def endTagIgnore(name)
-      @parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
+      parse_error(_("Unexpected end tag (#{name}). Ignored."))
    end

    def endTagImply(name)
@ -55,8 +55,8 @@ module HTML5lib
        closeCell
        @parser.phase.processEndTag(name)
      else
-        # sometimes innerHTML case
-        @parser.parseError
+        # sometimes inner_html case
+        parse_error
      end
    end

--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_column_group_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_column_group_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InColumnGroupPhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#in-column
@ -10,7 +10,7 @@ module HTML5lib
    handle_end 'colgroup', 'col'

    def ignoreEndTagColgroup
-      @tree.openElements[-1].name == 'html'
+      @tree.open_elements[-1].name == 'html'
    end

    def processCharacters(data)
@ -20,8 +20,8 @@ module HTML5lib
    end

    def startTagCol(name, attributes)
-      @tree.insertElement(name, attributes)
-      @tree.openElements.pop
+      @tree.insert_element(name, attributes)
+      @tree.open_elements.pop
    end

    def startTagOther(name, attributes)
@ -32,17 +32,17 @@ module HTML5lib

    def endTagColgroup(name)
      if ignoreEndTagColgroup
-        # innerHTML case
-        assert @parser.innerHTML
-        @parser.parseError
+        # inner_html case
+        assert @parser.inner_html
+        parse_error
      else
-        @tree.openElements.pop
+        @tree.open_elements.pop
        @parser.phase = @parser.phases[:inTable]
      end
    end

    def endTagCol(name)
-      @parser.parseError(_('Unexpected end tag (col). col has no end tag.'))
+      parse_error(_('Unexpected end tag (col). col has no end tag.'))
    end

    def endTagOther(name)
--- a/vendor/plugins/HTML5lib/lib/html5/html5parser/in_frameset_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_frameset_phase.rb
@ -0,0 +1,57 @@
+require 'html5/html5parser/phase'
+
+module HTML5
+  class InFramesetPhase < Phase
+
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
+
+    handle_start 'html', 'frameset', 'frame', 'noframes'
+
+    handle_end 'frameset', 'noframes'
+
+    def processCharacters(data)
+      parse_error(_('Unexpected characters in the frameset phase. Characters ignored.'))
+    end
+
+    def startTagFrameset(name, attributes)
+      @tree.insert_element(name, attributes)
+    end
+
+    def startTagFrame(name, attributes)
+      @tree.insert_element(name, attributes)
+      @tree.open_elements.pop
+    end
+
+    def startTagNoframes(name, attributes)
+      @parser.phases[:inBody].processStartTag(name, attributes)
+    end
+
+    def startTagOther(name, attributes)
+      parse_error(_("Unexpected start tag token (#{name}) in the frameset phase. Ignored"))
+    end
+
+    def endTagFrameset(name)
+      if @tree.open_elements.last.name == 'html'
+        # inner_html case
+        parse_error(_("Unexpected end tag token (frameset) in the frameset phase (inner_html)."))
+      else
+        @tree.open_elements.pop
+      end
+      if (not @parser.inner_html and
+        @tree.open_elements.last.name != 'frameset')
+        # If we're not in inner_html mode and the the current node is not a
+        # "frameset" element (anymore) then switch.
+        @parser.phase = @parser.phases[:afterFrameset]
+      end
+    end
+
+    def endTagNoframes(name)
+      @parser.phases[:inBody].processEndTag(name)
+    end
+
+    def endTagOther(name)
+      parse_error(_("Unexpected end tag token (#{name}) in the frameset phase. Ignored."))
+    end
+
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5/html5parser/in_head_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_head_phase.rb
@ -0,0 +1,138 @@
+require 'html5/html5parser/phase'
+
+module HTML5
+  class InHeadPhase < Phase
+
+    handle_start 'html', 'head', 'title', 'style', 'script', 'noscript'
+    handle_start %w( base link meta )
+
+    handle_end 'head'
+    handle_end %w( html body br p ) => 'ImplyAfterHead'
+    handle_end %w( title style script noscript )
+
+    def process_eof
+      if ['title', 'style', 'script'].include?(name = @tree.open_elements.last.name)
+        parse_error(_("Unexpected end of file. Expected end tag (#{name})."))
+        @tree.open_elements.pop
+      end
+      anything_else
+      @parser.phase.process_eof
+    end
+
+    def processCharacters(data)
+      if %w[title style script noscript].include?(@tree.open_elements.last.name)
+        @tree.insertText(data)
+      else
+        anything_else
+        @parser.phase.processCharacters(data)
+      end
+    end
+
+    def startTagHead(name, attributes)
+      parse_error(_('Unexpected start tag head in existing head. Ignored'))
+    end
+
+    def startTagTitle(name, attributes)
+      element = @tree.createElement(name, attributes)
+      appendToHead(element)
+      @tree.open_elements.push(element)
+      @parser.tokenizer.content_model_flag = :RCDATA
+    end
+
+    def startTagStyle(name, attributes)
+      element = @tree.createElement(name, attributes)
+      if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
+        appendToHead(element)
+      else
+        @tree.open_elements.last.appendChild(element)
+      end
+      @tree.open_elements.push(element)
+      @parser.tokenizer.content_model_flag = :CDATA
+    end
+
+    def startTagNoscript(name, attributes)
+      # XXX Need to decide whether to implement the scripting disabled case.
+      element = @tree.createElement(name, attributes)
+      if @tree.head_pointer !=nil and @parser.phase == @parser.phases[:inHead]
+        appendToHead(element)
+      else
+        @tree.open_elements.last.appendChild(element)
+      end
+      @tree.open_elements.push(element)
+      @parser.tokenizer.content_model_flag = :CDATA
+    end
+
+    def startTagScript(name, attributes)
+      #XXX Inner HTML case may be wrong
+      element = @tree.createElement(name, attributes)
+      element._flags.push("parser-inserted")
+      if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
+        appendToHead(element)
+      else
+        @tree.open_elements.last.appendChild(element)
+      end
+      @tree.open_elements.push(element)
+      @parser.tokenizer.content_model_flag = :CDATA
+    end
+
+    def startTagBaseLinkMeta(name, attributes)
+      element = @tree.createElement(name, attributes)
+      if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
+        appendToHead(element)
+      else
+        @tree.open_elements.last.appendChild(element)
+      end
+    end
+
+    def startTagOther(name, attributes)
+      anything_else
+      @parser.phase.processStartTag(name, attributes)
+    end
+
+    def endTagHead(name)
+      if @tree.open_elements.last.name == 'head'
+        @tree.open_elements.pop
+      else
+        parse_error(_("Unexpected end tag (head). Ignored."))
+      end
+      @parser.phase = @parser.phases[:afterHead]
+    end
+
+    def endTagImplyAfterHead(name)
+      anything_else
+      @parser.phase.processEndTag(name)
+    end
+
+    def endTagTitleStyleScriptNoscript(name)
+      if @tree.open_elements.last.name == name
+        @tree.open_elements.pop
+      else
+        parse_error(_("Unexpected end tag (#{name}). Ignored."))
+      end
+    end
+
+    def endTagOther(name)
+      parse_error(_("Unexpected end tag (#{name}). Ignored."))
+    end
+
+    def anything_else
+      if @tree.open_elements.last.name == 'head'
+        endTagHead('head')
+      else
+        @parser.phase = @parser.phases[:afterHead]
+      end
+    end
+
+    protected
+
+    def appendToHead(element)
+      if @tree.head_pointer.nil?
+        assert @parser.inner_html
+        @tree.open_elements.last.appendChild(element)
+      else
+        @tree.head_pointer.appendChild(element)
+      end
+    end
+
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_row_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_row_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InRowPhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#in-row
@ -15,7 +15,7 @@ module HTML5lib

    def startTagTableCell(name, attributes)
      clearStackToTableRowContext
-      @tree.insertElement(name, attributes)
+      @tree.insert_element(name, attributes)
      @parser.phase = @parser.phases[:inCell]
      @tree.activeFormattingElements.push(Marker)
    end
@ -23,7 +23,7 @@ module HTML5lib
    def startTagTableOther(name, attributes)
      ignoreEndTag = ignoreEndTagTr
      endTagTr('tr')
-      # XXX how are we sure it's always ignored in the innerHTML case?
+      # XXX how are we sure it's always ignored in the inner_html case?
      @parser.phase.processStartTag(name, attributes) unless ignoreEndTag
    end

@ -33,12 +33,12 @@ module HTML5lib

    def endTagTr(name)
      if ignoreEndTagTr
-        # innerHTML case
-        assert @parser.innerHTML
-        @parser.parseError
+        # inner_html case
+        assert @parser.inner_html
+        parse_error
      else
        clearStackToTableRowContext
-        @tree.openElements.pop
+        @tree.open_elements.pop
        @parser.phase = @parser.phases[:inTableBody]
      end
    end
@ -47,7 +47,7 @@ module HTML5lib
      ignoreEndTag = ignoreEndTagTr
      endTagTr('tr')
      # Reprocess the current tag if the tr end tag was not ignored
-      # XXX how are we sure it's always ignored in the innerHTML case?
+      # XXX how are we sure it's always ignored in the inner_html case?
      @parser.phase.processEndTag(name) unless ignoreEndTag
    end

@ -56,13 +56,13 @@ module HTML5lib
        endTagTr('tr')
        @parser.phase.processEndTag(name)
      else
-        # innerHTML case
-        @parser.parseError
+        # inner_html case
+        parse_error
      end
    end

    def endTagIgnore(name)
-      @parser.parseError(_("Unexpected end tag (#{name}) in the row phase. Ignored."))
+      parse_error(_("Unexpected end tag (#{name}) in the row phase. Ignored."))
    end

    def endTagOther(name)
@ -73,9 +73,9 @@ module HTML5lib

    # XXX unify this with other table helper methods
    def clearStackToTableRowContext
-      until ['tr', 'html'].include?(name = @tree.openElements[-1].name)
-        @parser.parseError(_("Unexpected implied end tag (#{name}) in the row phase."))
-        @tree.openElements.pop
+      until %w[tr html].include?(name = @tree.open_elements.last.name)
+        parse_error(_("Unexpected implied end tag (#{name}) in the row phase."))
+        @tree.open_elements.pop
      end
    end

--- a/vendor/plugins/HTML5lib/lib/html5/html5parser/in_select_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_select_phase.rb
@ -0,0 +1,84 @@
+require 'html5/html5parser/phase'
+
+module HTML5
+  class InSelectPhase < Phase
+
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-select
+
+    handle_start 'html', 'option', 'optgroup', 'select'
+
+    handle_end 'option', 'optgroup', 'select', %w( caption table tbody tfoot thead tr td th ) => 'TableElements'
+
+    def processCharacters(data)
+      @tree.insertText(data)
+    end
+
+    def startTagOption(name, attributes)
+      # We need to imply </option> if <option> is the current node.
+      @tree.open_elements.pop if @tree.open_elements.last.name == 'option'
+      @tree.insert_element(name, attributes)
+    end
+
+    def startTagOptgroup(name, attributes)
+      @tree.open_elements.pop if @tree.open_elements.last.name == 'option'
+      @tree.open_elements.pop if @tree.open_elements.last.name == 'optgroup'
+      @tree.insert_element(name, attributes)
+    end
+
+    def startTagSelect(name, attributes)
+      parse_error(_('Unexpected start tag (select) in the select phase implies select start tag.'))
+      endTagSelect('select')
+    end
+
+    def startTagOther(name, attributes)
+      parse_error(_('Unexpected start tag token (#{name}) in the select phase. Ignored.'))
+    end
+
+    def endTagOption(name)
+      if @tree.open_elements.last.name == 'option'
+        @tree.open_elements.pop
+      else
+        parse_error(_('Unexpected end tag (option) in the select phase. Ignored.'))
+      end
+    end
+
+    def endTagOptgroup(name)
+      # </optgroup> implicitly closes <option>
+      if @tree.open_elements.last.name == 'option' and @tree.open_elements[-2].name == 'optgroup'
+        @tree.open_elements.pop
+      end
+      # It also closes </optgroup>
+      if @tree.open_elements.last.name == 'optgroup'
+        @tree.open_elements.pop
+      # But nothing else
+      else
+        parse_error(_('Unexpected end tag (optgroup) in the select phase. Ignored.'))
+      end
+    end
+
+    def endTagSelect(name)
+      if in_scope?('select', true)
+        remove_open_elements_until('select')
+
+        @parser.reset_insertion_mode
+      else
+        # inner_html case
+        parse_error
+      end
+    end
+
+    def endTagTableElements(name)
+      parse_error(_("Unexpected table end tag (#{name}) in the select phase."))
+
+      if in_scope?(name, true)
+        endTagSelect('select')
+        @parser.phase.processEndTag(name)
+      end
+    end
+
+    def endTagOther(name)
+      parse_error(_("Unexpected end tag token (#{name}) in the select phase. Ignored."))
+    end
+
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_body_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_body_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InTableBodyPhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#in-table0
@ -15,12 +15,12 @@ module HTML5lib

    def startTagTr(name, attributes)
      clearStackToTableBodyContext
-      @tree.insertElement(name, attributes)
+      @tree.insert_element(name, attributes)
      @parser.phase = @parser.phases[:inRow]
    end

    def startTagTableCell(name, attributes)
-      @parser.parseError(_("Unexpected table cell start tag (#{name}) in the table body phase."))
+      parse_error(_("Unexpected table cell start tag (#{name}) in the table body phase."))
      startTagTr('tr', {})
      @parser.phase.processStartTag(name, attributes)
    end
@ -29,11 +29,11 @@ module HTML5lib
      # XXX AT Any ideas on how to share this with endTagTable?
      if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
        clearStackToTableBodyContext
-        endTagTableRowGroup(@tree.openElements[-1].name)
+        endTagTableRowGroup(@tree.open_elements.last.name)
        @parser.phase.processStartTag(name, attributes)
      else
-        # innerHTML case
-        @parser.parseError
+        # inner_html case
+        parse_error
      end
    end

@ -44,26 +44,26 @@ module HTML5lib
    def endTagTableRowGroup(name)
      if in_scope?(name, true)
        clearStackToTableBodyContext
-        @tree.openElements.pop
+        @tree.open_elements.pop
        @parser.phase = @parser.phases[:inTable]
      else
-        @parser.parseError(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
+        parse_error(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
      end
    end

    def endTagTable(name)
      if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
        clearStackToTableBodyContext
-        endTagTableRowGroup(@tree.openElements[-1].name)
+        endTagTableRowGroup(@tree.open_elements.last.name)
        @parser.phase.processEndTag(name)
      else
-        # innerHTML case
-        @parser.parseError
+        # inner_html case
+        parse_error
      end
    end

    def endTagIgnore(name)
-      @parser.parseError(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
+      parse_error(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
    end

    def endTagOther(name)
@ -73,9 +73,9 @@ module HTML5lib
    protected

    def clearStackToTableBodyContext
-      until ['tbody', 'tfoot', 'thead', 'html'].include?(name = @tree.openElements[-1].name)
-        @parser.parseError(_("Unexpected implied end tag (#{name}) in the table body phase."))
-        @tree.openElements.pop
+      until %w[tbody tfoot thead html].include?(name = @tree.open_elements.last.name)
+        parse_error(_("Unexpected implied end tag (#{name}) in the table body phase."))
+        @tree.open_elements.pop
      end
    end

--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_phase.rb
@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InTablePhase < Phase

    # http://www.whatwg.org/specs/web-apps/current-work/#in-table
@ -12,24 +12,24 @@ module HTML5lib
    handle_end 'table', %w( body caption col colgroup html tbody td tfoot th thead tr ) => 'Ignore'

    def processCharacters(data)
-      @parser.parseError(_("Unexpected non-space characters in table context caused voodoo mode."))
+      parse_error(_("Unexpected non-space characters in table context caused voodoo mode."))
      # Make all the special element rearranging voodoo kick in
-      @tree.insertFromTable = true
+      @tree.insert_from_table = true
      # Process the character in the "in body" mode
      @parser.phases[:inBody].processCharacters(data)
-      @tree.insertFromTable = false
+      @tree.insert_from_table = false
    end

    def startTagCaption(name, attributes)
      clearStackToTableContext
      @tree.activeFormattingElements.push(Marker)
-      @tree.insertElement(name, attributes)
+      @tree.insert_element(name, attributes)
      @parser.phase = @parser.phases[:inCaption]
    end

    def startTagColgroup(name, attributes)
      clearStackToTableContext
-      @tree.insertElement(name, attributes)
+      @tree.insert_element(name, attributes)
      @parser.phase = @parser.phases[:inColumnGroup]
    end

@ -40,7 +40,7 @@ module HTML5lib

    def startTagRowGroup(name, attributes)
      clearStackToTableContext
-      @tree.insertElement(name, attributes)
+      @tree.insert_element(name, attributes)
      @parser.phase = @parser.phases[:inTableBody]
    end

@ -50,60 +50,60 @@ module HTML5lib
    end

    def startTagTable(name, attributes)
-      @parser.parseError(_("Unexpected start tag (table) in table phase. Implies end tag (table)."))
+      parse_error(_("Unexpected start tag (table) in table phase. Implies end tag (table)."))
      @parser.phase.processEndTag('table')
-      @parser.phase.processStartTag(name, attributes) unless @parser.innerHTML
+      @parser.phase.processStartTag(name, attributes) unless @parser.inner_html
    end

    def startTagOther(name, attributes)
-      @parser.parseError(_("Unexpected start tag (#{name}) in table context caused voodoo mode."))
+      parse_error(_("Unexpected start tag (#{name}) in table context caused voodoo mode."))
      # Make all the special element rearranging voodoo kick in
-      @tree.insertFromTable = true
+      @tree.insert_from_table = true
      # Process the start tag in the "in body" mode
      @parser.phases[:inBody].processStartTag(name, attributes)
-      @tree.insertFromTable = false
+      @tree.insert_from_table = false
    end

    def endTagTable(name)
      if in_scope?('table', true)
        @tree.generateImpliedEndTags
-      
-        unless @tree.openElements[-1].name == 'table'
-          @parser.parseError(_("Unexpected end tag (table). Expected end tag (#{@tree.openElements[-1].name})."))
+
+        unless @tree.open_elements.last.name == 'table'
+          parse_error(_("Unexpected end tag (table). Expected end tag (#{@tree.open_elements.last.name})."))
        end
-      
+
        remove_open_elements_until('table')

-        @parser.resetInsertionMode
+        @parser.reset_insertion_mode
      else
-        # innerHTML case
-        assert @parser.innerHTML
-        @parser.parseError
+        # inner_html case
+        assert @parser.inner_html
+        parse_error
      end
    end

    def endTagIgnore(name)
-      @parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
+      parse_error(_("Unexpected end tag (#{name}). Ignored."))
    end

    def endTagOther(name)
-      @parser.parseError(_("Unexpected end tag (#{name}) in table context caused voodoo mode."))
+      parse_error(_("Unexpected end tag (#{name}) in table context caused voodoo mode."))
      # Make all the special element rearranging voodoo kick in
-      @tree.insertFromTable = true
+      @tree.insert_from_table = true
      # Process the end tag in the "in body" mode
      @parser.phases[:inBody].processEndTag(name)
-      @tree.insertFromTable = false
+      @tree.insert_from_table = false
    end

    protected

    def clearStackToTableContext
      # "clear the stack back to a table context"
-      until ['table', 'html'].include?(name = @tree.openElements[-1].name)
-        @parser.parseError(_("Unexpected implied end tag (#{name}) in the table phase."))
-        @tree.openElements.pop
+      until %w[table html].include?(name = @tree.open_elements.last.name)
+        parse_error(_("Unexpected implied end tag (#{name}) in the table phase."))
+        @tree.open_elements.pop
      end
-      # When the current node is <html> it's an innerHTML case
+      # When the current node is <html> it's an inner_html case
    end

  end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/initial_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/initial_phase.rb
@ -1,28 +1,28 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class InitialPhase < Phase

    # This phase deals with error handling as well which is currently not
    # covered in the specification. The error handling is typically known as
    # "quirks mode". It is expected that a future version of HTML5 will define this.

-    def processEOF
-      @parser.parseError(_('Unexpected End of file. Expected DOCTYPE.'))
+    def process_eof
+      parse_error(_('Unexpected End of file. Expected DOCTYPE.'))
      @parser.phase = @parser.phases[:rootElement]
-      @parser.phase.processEOF
+      @parser.phase.process_eof
    end

    def processComment(data)
-      @tree.insertComment(data, @tree.document)
+      @tree.insert_comment(data, @tree.document)
    end

    def processDoctype(name, publicId, systemId, correct)
      if name.downcase != 'html' or publicId or systemId
-        @parser.parseError(_('Erroneous DOCTYPE.'))
+        parse_error(_('Erroneous DOCTYPE.'))
      end
      # XXX need to update DOCTYPE tokens
-      @tree.insertDoctype(name)
+      @tree.insertDoctype(name, publicId, systemId)

      publicId = publicId.to_s.upcase

@ -110,23 +110,22 @@ module HTML5lib
    end

    def processSpaceCharacters(data)
-      @tree.insertText(data, @tree.document)
    end

    def processCharacters(data)
-      @parser.parseError(_('Unexpected non-space characters. Expected DOCTYPE.'))
+      parse_error(_('Unexpected non-space characters. Expected DOCTYPE.'))
      @parser.phase = @parser.phases[:rootElement]
      @parser.phase.processCharacters(data)
    end

    def processStartTag(name, attributes)
-      @parser.parseError(_("Unexpected start tag (#{name}). Expected DOCTYPE."))
+      parse_error(_("Unexpected start tag (#{name}). Expected DOCTYPE."))
      @parser.phase = @parser.phases[:rootElement]
      @parser.phase.processStartTag(name, attributes)
    end

    def processEndTag(name)
-      @parser.parseError(_("Unexpected end tag (#{name}). Expected DOCTYPE."))
+      parse_error(_("Unexpected end tag (#{name}). Expected DOCTYPE."))
      @parser.phase = @parser.phases[:rootElement]
      @parser.phase.processEndTag(name)
    end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/phase.rb
@ -1,4 +1,4 @@
-module HTML5lib
+module HTML5
  # Base class for helper objects that implement each phase of processing.
  #
  # Handler methods should be in the following order (they can be omitted):
@ -15,9 +15,12 @@ module HTML5lib
  #
  class Phase

+    extend Forwardable
+    def_delegators :@parser, :parse_error
+
    # The following example call:
    #
-    #   tag_handlers('startTag', 'html', %( base link meta ), %( li dt dd ) => 'ListItem')
+    #   tag_handlers('startTag', 'html', %w( base link meta ), %w( li dt dd ) => 'ListItem')
    #
    # ...would return a hash equal to this:
    #
@ -34,15 +37,15 @@ module HTML5lib
      if tags.last.is_a?(Hash)
        tags.pop.each do |names, handler_method_suffix|
          handler_method = prefix + handler_method_suffix
-          Array(names).each { |name| mapping[name] = handler_method }
+          Array(names).each {|name| mapping[name] = handler_method }
        end
      end
      tags.each do |names|
        names = Array(names)
-        handler_method = prefix + names.map { |name| name.capitalize }.join
-        names.each { |name| mapping[name] = handler_method }
+        handler_method = prefix + names.map {|name| name.capitalize }.join
+        names.each {|name| mapping[name] = handler_method }
      end
-      return mapping
+      mapping
    end

    def self.start_tag_handlers
@ -80,17 +83,17 @@ module HTML5lib
      @parser, @tree = parser, tree
    end

-    def processEOF
+    def process_eof
      @tree.generateImpliedEndTags

-      if @tree.openElements.length > 2
-        @parser.parseError(_('Unexpected end of file. Missing closing tags.'))
-      elsif @tree.openElements.length == 2 and @tree.openElements[1].name != 'body'
+      if @tree.open_elements.length > 2
+        parse_error(_('Unexpected end of file. Missing closing tags.'))
+      elsif @tree.open_elements.length == 2 and @tree.open_elements[1].name != 'body'
        # This happens for framesets or something?
-        @parser.parseError(_("Unexpected end of file. Expected end tag (#{@tree.openElements[1].name}) first."))
-      elsif @parser.innerHTML and @tree.openElements.length > 1 
+        parse_error(_("Unexpected end of file. Expected end tag (#{@tree.open_elements[1].name}) first."))
+      elsif @parser.inner_html and @tree.open_elements.length > 1 
        # XXX This is not what the specification says. Not sure what to do here.
-        @parser.parseError(_('XXX innerHTML EOF'))
+        parse_error(_('XXX inner_html EOF'))
      end
      # Betting ends.
    end
@ -98,11 +101,11 @@ module HTML5lib
    def processComment(data)
      # For most phases the following is correct. Where it's not it will be
      # overridden.
-      @tree.insertComment(data, @tree.openElements[-1])
+      @tree.insert_comment(data, @tree.open_elements.last)
    end

    def processDoctype(name, publicId, systemId, correct)
-      @parser.parseError(_('Unexpected DOCTYPE. Ignored.'))
+      parse_error(_('Unexpected DOCTYPE. Ignored.'))
    end

    def processSpaceCharacters(data)
@ -114,17 +117,17 @@ module HTML5lib
    end

    def startTagHtml(name, attributes)
-      if @parser.firstStartTag == false and name == 'html'
-         @parser.parseError(_('html needs to be the first start tag.'))
+      if @parser.first_start_tag == false and name == 'html'
+         parse_error(_('html needs to be the first start tag.'))
      end
      # XXX Need a check here to see if the first start tag token emitted is
-      # this token... If it's not, invoke @parser.parseError.
+      # this token... If it's not, invoke parse_error.
      attributes.each do |attr, value|
-        unless @tree.openElements[0].attributes.has_key?(attr)
-          @tree.openElements[0].attributes[attr] = value
+        unless @tree.open_elements.first.attributes.has_key?(attr)
+          @tree.open_elements.first.attributes[attr] = value
        end
      end
-      @parser.firstStartTag = false
+      @parser.first_start_tag = false
    end

    def processEndTag(name)
@ -146,11 +149,10 @@ module HTML5lib
    def remove_open_elements_until(name=nil)
      finished = false
      until finished
-        element = @tree.openElements.pop
-        finished = name.nil?? yield(element) : element.name == name
+        element = @tree.open_elements.pop
+        finished = name.nil? ? yield(element) : element.name == name
      end
      return element
    end
-
  end
 end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/root_element_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/root_element_phase.rb
@ -1,40 +1,39 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'

-module HTML5lib
+module HTML5
  class RootElementPhase < Phase

-    def processEOF
-      insertHtmlElement
-      @parser.phase.processEOF
+    def process_eof
+      insert_html_element
+      @parser.phase.process_eof
    end

    def processComment(data)
-      @tree.insertComment(data, @tree.document)
+      @tree.insert_comment(data, @tree.document)
    end

    def processSpaceCharacters(data)
-      @tree.insertText(data, @tree.document)
    end

    def processCharacters(data)
-      insertHtmlElement
+      insert_html_element
      @parser.phase.processCharacters(data)
    end

    def processStartTag(name, attributes)
-      @parser.firstStartTag = true if name == 'html'
-      insertHtmlElement
+      @parser.first_start_tag = true if name == 'html'
+      insert_html_element
      @parser.phase.processStartTag(name, attributes)
    end

    def processEndTag(name)
-      insertHtmlElement
+      insert_html_element
      @parser.phase.processEndTag(name)
    end

-    def insertHtmlElement
+    def insert_html_element
      element = @tree.createElement('html', {})
-      @tree.openElements.push(element)
+      @tree.open_elements.push(element)
      @tree.document.appendChild(element)
      @parser.phase = @parser.phases[:beforeHead]
    end
--- a/vendor/plugins/HTML5lib/lib/html5/html5parser/trailing_end_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/trailing_end_phase.rb
@ -0,0 +1,35 @@
+require 'html5/html5parser/phase'
+
+module HTML5
+  class TrailingEndPhase < Phase
+
+    def process_eof
+    end
+
+    def processComment(data)
+      @tree.insert_comment(data, @tree.document)
+    end
+
+    def processSpaceCharacters(data)
+      @parser.last_phase.processSpaceCharacters(data)
+    end
+
+    def processCharacters(data)
+      parse_error(_('Unexpected non-space characters. Expected end of file.'))
+      @parser.phase = @parser.last_phase
+      @parser.phase.processCharacters(data)
+    end
+
+    def processStartTag(name, attributes)
+      parse_error(_('Unexpected start tag (#{name}). Expected end of file.'))
+      @parser.phase = @parser.last_phase
+      @parser.phase.processStartTag(name, attributes)
+    end
+
+    def processEndTag(name)
+      parse_error(_('Unexpected end tag (#{name}). Expected end of file.'))
+      @parser.phase = @parser.last_phase
+      @parser.phase.processEndTag(name)
+    end
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/inputstream.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/inputstream.rb
@ -1,7 +1,7 @@
 require 'stringio'
-require 'html5lib/constants'
+require 'html5/constants'

-module HTML5lib
+module HTML5

  # Provides a unicode stream of characters to the HTMLTokenizer.

@ -10,7 +10,7 @@ module HTML5lib

  class HTMLInputStream

-    attr_accessor :queue, :char_encoding
+    attr_accessor :queue, :char_encoding, :errors

    # Initialises the HTMLInputStream.
    # 
@ -27,11 +27,11 @@ module HTML5lib
    # parseMeta - Look for a <meta> element containing encoding information

    def initialize(source, options = {})
-      @encoding = nil
+      @encoding   = nil
      @parse_meta = true
-      @chardet = true
+      @chardet    = true

-      options.each { |name, value| instance_variable_set("@#{name}", value) }
+      options.each {|name, value| instance_variable_set("@#{name}", value) }

      # Raw Stream
      @raw_stream = open_stream(source)
@ -40,25 +40,31 @@ module HTML5lib
      #Number of bytes to use when looking for a meta element with
      #encoding information
      @NUM_BYTES_META = 512
+      #Number of bytes to use when using detecting encoding using chardet
+      @NUM_BYTES_CHARDET = 256
+      #Number of bytes to use when reading content
+      @NUM_BYTES_BUFFER = 1024
+
      #Encoding to use if no other information can be found
      @DEFAULT_ENCODING = 'windows-1252'
    
      #Detect encoding iff no explicit "transport level" encoding is supplied
-      if @encoding.nil? or not HTML5lib.is_valid_encoding(@encoding)
+      if @encoding.nil? or not HTML5.is_valid_encoding(@encoding)
        @char_encoding = detect_encoding
      else
        @char_encoding = @encoding
      end

      # Read bytes from stream decoding them into Unicode
-      uString = @raw_stream.read
+      @buffer = @raw_stream.read(@NUM_BYTES_BUFFER) || ''
      if @char_encoding == 'windows-1252'
        @win1252 = true
      elsif @char_encoding != 'utf-8'
        begin
          require 'iconv'
          begin
-            uString = Iconv.iconv('utf-8', @char_encoding, uString).first
+            @buffer << @raw_stream.read unless @raw_stream.eof?
+            @buffer = Iconv.iconv('utf-8', @char_encoding, @buffer).first
          rescue
            @win1252 = true
          end
@ -67,10 +73,8 @@ module HTML5lib
        end
      end

-      # Convert the unicode string into a list to be used as the data stream
-      @data_stream = uString
-
      @queue = []
+      @errors = []

      # Reset position in the list to read from
      @tell = 0
@ -109,9 +113,22 @@ module HTML5lib
        begin
          require 'rubygems'
          require 'UniversalDetector' # gem install chardet
-          buffer = @raw_stream.read
-          encoding = UniversalDetector::chardet(buffer)['encoding']
-          seek(buffer, 0)
+          buffers = []
+          detector = UniversalDetector::Detector.instance
+          detector.reset
+          until @raw_stream.eof?
+            buffer = @raw_stream.read(@NUM_BYTES_CHARDET)
+            break if !buffer or buffer.empty?
+            buffers << buffer
+            detector.feed(buffer)
+            break if detector.instance_eval {@done}
+            detector.instance_eval {
+              @_mLastChar = @_mLastChar.chr if Fixnum === @_mLastChar
+            }
+          end
+          detector.close
+          encoding = detector.result['encoding']
+          seek(buffers*'', 0)
        rescue LoadError
        end
      end
@ -242,14 +259,20 @@ module HTML5lib
      unless @queue.empty?
        return @queue.shift
      else
-        c = @data_stream[@tell]
+        if @tell + 3 > @buffer.length and !@raw_stream.eof?
+          # read next block
+          @buffer = @buffer[@tell .. -1] + @raw_stream.read(@NUM_BYTES_BUFFER)
+          @tell = 0
+        end
+
+        c = @buffer[@tell]
        @tell += 1

        case c
        when 0x01 .. 0x7F
          if c == 0x0D
            # normalize newlines
-            @tell += 1 if @data_stream[@tell] == 0x0A
+            @tell += 1 if @buffer[@tell] == 0x0A
            c = 0x0A
          end

@ -274,9 +297,9 @@ module HTML5lib
          end

        when 0xC0 .. 0xFF
-          if @win1252
+          if instance_variables.include?("@win1252") && @win1252
            "\xC3" + (c-64).chr # convert to utf-8
-          elsif @data_stream[@tell-1 .. -1] =~ /^
+          elsif @buffer[@tell-1 .. @tell+3] =~ /^
                ( [\xC2-\xDF][\x80-\xBF]             # non-overlong 2-byte
                |  \xE0[\xA0-\xBF][\x80-\xBF]        # excluding overlongs
                | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}  # straight 3-byte
@ -292,6 +315,8 @@ module HTML5lib
          end

        when 0x00
+          @errors.push('null character found in input stream, ' +
+            'replaced with U+FFFD')
          [0xFFFD].pack('U') # null characters are invalid

        else
@ -317,6 +342,10 @@ module HTML5lib
      @queue.insert(0, c) unless c == :EOF
      return char_stack.join('')
    end
+
+    def unget(characters)
+      @queue.unshift(*characters.to_a) unless characters == :EOF
+    end
  end

  # String-like object with an assosiated position and various extra methods
@ -433,14 +462,14 @@ module HTML5lib
        
        if attr[0] == 'charset'
          tentative_encoding = attr[1]
-          if HTML5lib.is_valid_encoding(tentative_encoding)
+          if HTML5.is_valid_encoding(tentative_encoding)
            @encoding = tentative_encoding  
            return false
          end
        elsif attr[0] == 'content'
          content_parser = ContentAttrParser.new(EncodingBytes.new(attr[1]))
          tentative_encoding = content_parser.parse
-          if HTML5lib.is_valid_encoding(tentative_encoding)
+          if HTML5.is_valid_encoding(tentative_encoding)
            @encoding = tentative_encoding
            return false
          end
--- a/vendor/plugins/HTML5lib/lib/html5lib/liberalxmlparser.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/liberalxmlparser.rb
@ -11,10 +11,10 @@
 # 
 # @@TODO:
 # * Selectively lowercase only XHTML, but not foreign markup
-require 'html5lib/html5parser'
-require 'html5lib/constants'
+require 'html5/html5parser'
+require 'html5/constants'

-module HTML5lib
+module HTML5

  # liberal XML parser
  class XMLParser < HTMLParser
@ -24,26 +24,36 @@ module HTML5lib
      @phases[:initial] = XmlRootPhase.new(self, @tree)
    end

-    def normalizeToken(token)
-      if token[:type] == :StartTag or token[:type] == :EmptyTag
+    def normalize_token(token)
+      case token[:type]
+      when :StartTag, :EmptyTag
        # We need to remove the duplicate attributes and convert attributes
-        # to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
+        # to a Hash so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}

        token[:data] = Hash[*token[:data].reverse.flatten]

        # For EmptyTags, process both a Start and an End tag
        if token[:type] == :EmptyTag
+          save = @tokenizer.content_model_flag
          @phase.processStartTag(token[:name], token[:data])
+          @tokenizer.content_model_flag = save
          token[:data] = {}
          token[:type] = :EndTag
        end

-      elsif token[:type] == :EndTag
-        if token[:data]
-           parseError(_("End tag contains unexpected attributes."))
+      when :Characters
+        # un-escape RCDATA_ELEMENTS (e.g. style, script)
+        if @tokenizer.content_model_flag == :CDATA
+          token[:data] = token[:data].
+            gsub('&lt;','<').gsub('&gt;','>').gsub('&amp;','&')
        end

-      elsif token[:type] == :Comment
+      when :EndTag
+        if token[:data]
+           parse_error(_("End tag contains unexpected attributes."))
+        end
+
+      when :Comment
        # Rescue CDATA from the comments
        if token[:data][0..6] == "[CDATA[" and token[:data][-2..-1] == "]]"
          token[:type] = :Characters
@ -64,22 +74,22 @@ module HTML5lib
      @phases[:rootElement] = XhmlRootPhase.new(self, @tree)
    end

-    def normalizeToken(token)
+    def normalize_token(token)
      super(token)

      # ensure that non-void XHTML elements have content so that separate
      # open and close tags are emitted
      if token[:type]  == :EndTag
        if VOID_ELEMENTS.include? token[:name]
-          if @tree.openElements[-1].name != token["name"]:
+          if @tree.open_elements[-1].name != token["name"]:
            token[:type] = :EmptyTag
            token["data"] ||= {}
          end
        else
-          if token[:name] == @tree.openElements[-1].name and \
-            not @tree.openElements[-1].hasContent
+          if token[:name] == @tree.open_elements[-1].name and \
+            not @tree.open_elements[-1].hasContent
            @tree.insertText('') unless
-              @tree.openElements.any? {|e|
+              @tree.open_elements.any? {|e|
                e.attributes.keys.include? 'xmlns' and
                e.attributes['xmlns'] != 'http://www.w3.org/1999/xhtml'
              }
@ -92,9 +102,9 @@ module HTML5lib
  end

  class XhmlRootPhase < RootElementPhase
-    def insertHtmlElement
+    def insert_html_element
      element = @tree.createElement("html", {'xmlns' => 'http://www.w3.org/1999/xhtml'})
-      @tree.openElements.push(element)
+      @tree.open_elements.push(element)
      @tree.document.appendChild(element)
      @parser.phase = @parser.phases[:beforeHead]
    end
@ -105,15 +115,15 @@ module HTML5lib
    @start_tag_handlers = Hash.new(:startTagOther)
    @end_tag_handlers = Hash.new(:endTagOther)
    def startTagOther(name, attributes)
-      @tree.openElements.push(@tree.document)
+      @tree.open_elements.push(@tree.document)
      element = @tree.createElement(name, attributes)
-      @tree.openElements[-1].appendChild(element)
-      @tree.openElements.push(element)
+      @tree.open_elements[-1].appendChild(element)
+      @tree.open_elements.push(element)
      @parser.phase = XmlElementPhase.new(@parser,@tree)
    end
    def endTagOther(name)
      super
-      @tree.openElements.pop
+      @tree.open_elements.pop
    end
  end

@ -125,17 +135,17 @@ module HTML5lib

    def startTagOther(name, attributes)
      element = @tree.createElement(name, attributes)
-      @tree.openElements[-1].appendChild(element)
-      @tree.openElements.push(element)
+      @tree.open_elements[-1].appendChild(element)
+      @tree.open_elements.push(element)
    end

    def endTagOther(name)
-      for node in @tree.openElements.reverse
+      for node in @tree.open_elements.reverse
        if node.name == name
-          {} while @tree.openElements.pop != node
+          {} while @tree.open_elements.pop != node
          break
        else
-          @parser.parseError
+          parse_error
        end
      end
    end
--- a/vendor/plugins/HTML5lib/lib/html5lib/sanitizer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/sanitizer.rb
@ -1,6 +1,7 @@
 require 'cgi'
+require 'html5/tokenizer'

-module HTML5lib
+module HTML5

 # This module provides sanitization of XHTML+MathML+SVG
 # and of inline style attributes.
@ -12,11 +13,11 @@ module HTML5lib
 # or, if you already have a parse tree (in this example, a REXML tree),
 # at the Serializer stage:
 #
-#     tokens = TreeWalkers.getTreeWalker('rexml').new(tree)
+#     tokens = TreeWalkers.get_tree_walker('rexml').new(tree)
 #     HTMLSerializer.serialize(tokens, {:encoding=>'utf-8',
 #        :sanitize => true})

-   module HTMLSanitizeModule
+  module HTMLSanitizeModule

    ACCEPTABLE_ELEMENTS = %w[a abbr acronym address area b big blockquote br
      button caption center cite code col colgroup dd del dfn dir div dl dt
--- a/vendor/plugins/HTML5lib/lib/html5/serializer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/serializer.rb
@ -0,0 +1,2 @@
+require 'html5/serializer/htmlserializer'
+require 'html5/serializer/xhtmlserializer'
--- a/vendor/plugins/HTML5lib/lib/html5lib/serializer/htmlserializer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/serializer/htmlserializer.rb
@ -1,6 +1,6 @@
-require 'html5lib/constants'
+require 'html5/constants'

-module HTML5lib
+module HTML5

  class HTMLSerializer

@ -13,17 +13,18 @@ module HTML5lib
    end
 
    def initialize(options={})
-      @quote_attr_values = false
-      @quote_char = '"'
-      @use_best_quote_char = true
+      @quote_attr_values           = false
+      @quote_char                  = '"'
+      @use_best_quote_char         = true
      @minimize_boolean_attributes = true

-      @use_trailing_solidus = false
+      @use_trailing_solidus          = false
      @space_before_trailing_solidus = true
-      @escape_lt_in_attrs = false
+      @escape_lt_in_attrs            = false
+      @escape_rcdata                 = false

      @omit_optional_tags = true
-      @sanitize = false
+      @sanitize           = false

      @strip_whitespace = false

@ -43,22 +44,22 @@ module HTML5lib
      @errors = []

      if encoding and @inject_meta_charset
-        require 'html5lib/filters/inject_meta_charset'
+        require 'html5/filters/inject_meta_charset'
        treewalker = Filters::InjectMetaCharset.new(treewalker, encoding)
      end

      if @strip_whitespace
-        require 'html5lib/filters/whitespace'
+        require 'html5/filters/whitespace'
        treewalker = Filters::WhitespaceFilter.new(treewalker)
      end

      if @sanitize
-        require 'html5lib/filters/sanitizer'
+        require 'html5/filters/sanitizer'
        treewalker = Filters::HTMLSanitizeFilter.new(treewalker)
      end

      if @omit_optional_tags
-        require 'html5lib/filters/optionaltags'
+        require 'html5/filters/optionaltags'
        treewalker = Filters::OptionalTagFilter.new(treewalker)
      end

@ -72,7 +73,7 @@ module HTML5lib
        elsif [:Characters, :SpaceCharacters].include? type
          if type == :SpaceCharacters or in_cdata
            if in_cdata and token[:data].include?("</")
-              serializeError(_("Unexpected </ in CDATA"))
+              serialize_error(_("Unexpected </ in CDATA"))
            end
            result << token[:data]
          else
@ -81,10 +82,10 @@ module HTML5lib

        elsif [:StartTag, :EmptyTag].include? type
          name = token[:name]
-          if RCDATA_ELEMENTS.include?(name)
+          if RCDATA_ELEMENTS.include?(name) and not @escape_rcdata
            in_cdata = true
          elsif in_cdata
-            serializeError(_("Unexpected child element of a CDATA element"))
+            serialize_error(_("Unexpected child element of a CDATA element"))
          end
          attributes = []
          for k,v in attrs = token[:data].to_a.sort
@ -136,19 +137,19 @@ module HTML5lib
          if RCDATA_ELEMENTS.include?(name)
            in_cdata = false
          elsif in_cdata
-            serializeError(_("Unexpected child element of a CDATA element"))
+            serialize_error(_("Unexpected child element of a CDATA element"))
          end
          end_tag = "</#{name}>"
          result << end_tag

        elsif type == :Comment
          data = token[:data]
-          serializeError(_("Comment contains --")) if data.index("--")
+          serialize_error(_("Comment contains --")) if data.index("--")
          comment = "<!--%s-->" % token[:data]
          result << comment

        else
-          serializeError(token[:data])
+          serialize_error(token[:data])
        end
      end

@ -162,13 +163,15 @@ module HTML5lib

    alias :render :serialize

-    def serializeError(data="XXX ERROR MESSAGE NEEDED")
+    def serialize_error(data="XXX ERROR MESSAGE NEEDED")
      # XXX The idea is to make data mandatory.
      @errors.push(data)
      if @strict
        raise SerializeError
      end
    end
+
+    def _(string); string; end
  end

  # Error in serialized tree
--- a/vendor/plugins/HTML5lib/lib/html5/serializer/xhtmlserializer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/serializer/xhtmlserializer.rb
@ -0,0 +1,20 @@
+require 'html5/serializer/htmlserializer'
+
+module HTML5
+
+  class XHTMLSerializer < HTMLSerializer
+    DEFAULTS = {
+      :quote_attr_values           => true,
+      :minimize_boolean_attributes => false,
+      :use_trailing_solidus        => true,
+      :escape_lt_in_attrs          => true,
+      :omit_optional_tags          => false,
+      :escape_rcdata               => true
+    }
+
+    def initialize(options={})
+      super(DEFAULTS.clone.update(options))
+    end
+  end
+
+end
--- a/vendor/plugins/HTML5lib/lib/html5/sniffer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/sniffer.rb
@ -0,0 +1,45 @@
+module HTML5
+module Sniffer
+  # 4.7.4
+  def html_or_feed str
+    s = str[0, 512] # steps 1, 2
+    pos = 0
+
+    while pos < s.length
+      case s[pos]
+      when 0x09, 0x20, 0x0A, 0x0D # tab, space, LF, CR
+        pos += 1
+      when  0x3C # "<"
+        pos += 1
+        if s[pos..pos+2] == "!--" # [0x21, 0x2D, 0x2D]
+          pos += 3
+          until s[pos..pos+2] == "-->" or pos >= s.length
+            pos += 1
+          end
+          pos += 3
+        elsif s[pos] == 0x21 # "!"
+          pos += 1
+          until s[pos] == 0x3E or pos >= s.length # ">"
+            pos += 1 
+          end
+          pos += 1
+        elsif s[pos] == 0x3F # "?"
+          until s[pos..pos+1] == "?>" or pos >= s.length # [0x3F, 0x3E]
+            pos +=  1
+          end
+          pos += 2
+        elsif s[pos..pos+2] == "rss"   # [0x72, 0x73, 0x73]
+          return "application/rss+xml"
+        elsif s[pos..pos+3] == "feed"  # [0x66, 0x65, 0x65, 0x64]
+          return "application/atom+xml"
+        elsif s[pos..pos+6] == "rdf:RDF" # [0x72, 0x64, 0x66, 0x3A, 0x52, 0x44, 0x46]
+          raise NotImplementedError
+        end
+      else
+        break
+      end
+    end
+    "text/html"
+  end
+end
+end
--- a/vendor/plugins/HTML5lib/lib/html5/tokenizer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/tokenizer.rb
@ -0,0 +1,968 @@
+require 'html5/constants'
+require 'html5/inputstream'
+
+module HTML5
+
+  # This class takes care of tokenizing HTML.
+  #
+  # * @current_token
+  #   Holds the token that is currently being processed.
+  #
+  # * @state
+  #   Holds a reference to the method to be invoked... XXX
+  #
+  # * @states
+  #   Holds a mapping between states and methods that implement the state.
+  #
+  # * @stream
+  #   Points to HTMLInputStream object.
+
+  class HTMLTokenizer
+    attr_accessor :content_model_flag, :current_token
+    attr_reader :stream
+
+    # XXX need to fix documentation
+
+    def initialize(stream, options = {})
+      @stream = HTMLInputStream.new(stream, options)
+
+      # Setup the initial tokenizer state
+      @content_model_flag = :PCDATA
+      @state              = :data_state
+      @escapeFlag         = false
+      @lastFourChars      = []
+
+      # The current token being created
+      @current_token = nil
+
+      # Tokens to be processed.
+      @token_queue             = []
+      @lowercase_element_name = options[:lowercase_element_name] != false
+      @lowercase_attr_name    = options[:lowercase_attr_name]    != false
+    end
+
+    # This is where the magic happens.
+    #
+    # We do our usually processing through the states and when we have a token
+    # to return we yield the token which pauses processing until the next token
+    # is requested.
+    def each
+      @token_queue = []
+      # Start processing. When EOF is reached @state will return false
+      # instead of true and the loop will terminate.
+      while send @state
+        yield :type => :ParseError, :data => @stream.errors.shift until @stream.errors.empty?
+        yield @token_queue.shift until @token_queue.empty?
+      end
+    end
+
+    # Below are various helper functions the tokenizer states use worked out.
+  
+    # If the next character is a '>', convert the current_token into
+    # an EmptyTag
+
+    def process_solidus_in_tag
+
+      # We need to consume another character to make sure it's a ">"
+      data = @stream.char
+
+      if @current_token[:type] == :StartTag and data == ">"
+        @current_token[:type] = :EmptyTag
+      else
+        @token_queue << {:type => :ParseError, :data => _("Solidus (/) incorrectly placed in tag.")}
+      end
+
+      # The character we just consumed need to be put back on the stack so it
+      # doesn't get lost...
+      @stream.unget(data)
+    end
+
+    # This function returns either U+FFFD or the character based on the
+    # decimal or hexadecimal representation. It also discards ";" if present.
+    # If not present @token_queue << {:type => :ParseError}" is invoked.
+
+    def consume_number_entity(isHex)
+
+      # XXX More need to be done here. For instance, #13 should prolly be
+      # converted to #10 so we don't get \r (#13 is \r right?) in the DOM and
+      # such. Thoughts on this appreciated.
+      allowed = DIGITS
+      radix = 10
+      if isHex
+        allowed = HEX_DIGITS
+        radix = 16
+      end
+
+      char_stack = []
+
+      # Consume all the characters that are in range while making sure we
+      # don't hit an EOF.
+      c = @stream.char
+      while allowed.include?(c) and c != :EOF
+        char_stack.push(c)
+        c = @stream.char
+      end
+
+      # Convert the set of characters consumed to an int.
+      charAsInt = char_stack.join('').to_i(radix)
+
+      if charAsInt == 13
+        @token_queue << {:type => :ParseError, :data => _("Incorrect CR newline entity. Replaced with LF.")}
+        charAsInt = 10
+      elsif (128..159).include? charAsInt
+        # If the integer is between 127 and 160 (so 128 and bigger and 159
+        # and smaller) we need to do the "windows trick".
+        @token_queue << {:type => :ParseError, :data => _("Entity used with illegal number (windows-1252 reference).")}
+
+        charAsInt = ENTITIES_WINDOWS1252[charAsInt - 128]
+      end
+
+      if 0 < charAsInt and charAsInt <= 1114111 and not (55296 <= charAsInt and charAsInt <= 57343)
+        char = [charAsInt].pack('U')
+      else
+        char = [0xFFFD].pack('U')
+        @token_queue << {:type => :ParseError, :data => _("Numeric entity represents an illegal codepoint.")}
+      end
+
+      # Discard the ; if present. Otherwise, put it back on the queue and
+      # invoke parse_error on parser.
+      if c != ";"
+        @token_queue << {:type => :ParseError, :data => _("Numeric entity didn't end with ';'.")}
+        @stream.unget(c)
+      end
+
+      return char
+    end
+
+    def consume_entity(from_attribute=false)
+      char = nil
+      char_stack = [@stream.char]
+      if SPACE_CHARACTERS.include?(char_stack[0]) or [:EOF, '<', '&'].include?(char_stack[0])
+        @stream.unget(char_stack)
+      elsif char_stack[0] == '#'
+        # We might have a number entity here.
+        char_stack += [@stream.char, @stream.char]
+        if char_stack[0 .. 1].include? :EOF
+          # If we reach the end of the file put everything up to :EOF
+          # back in the queue
+          char_stack = char_stack[0...char_stack.index(:EOF)]
+          @stream.unget(char_stack)
+          @token_queue << {:type => :ParseError, :data => _("Numeric entity expected. Got end of file instead.")}
+        else
+          if char_stack[1].downcase == "x" and HEX_DIGITS.include? char_stack[2]
+            # Hexadecimal entity detected.
+            @stream.unget(char_stack[2])
+            char = consume_number_entity(true)
+          elsif DIGITS.include? char_stack[1]
+            # Decimal entity detected.
+            @stream.unget(char_stack[1..-1])
+            char = consume_number_entity(false)
+          else
+            # No number entity detected.
+            @stream.unget(char_stack)
+            @token_queue << {:type => :ParseError, :data => _("Numeric entity expected but none found.")}
+          end
+        end
+      else
+        # At this point in the process might have named entity. Entities
+        # are stored in the global variable "entities".
+        #
+        # Consume characters and compare to these to a substring of the
+        # entity names in the list until the substring no longer matches.
+        filteredEntityList = ENTITIES.keys
+        filteredEntityList.reject! {|e| e[0].chr != char_stack[0]}
+        entityName = nil
+
+        # Try to find the longest entity the string will match to take care
+        # of &noti for instance.
+        while char_stack.last != :EOF
+          name = char_stack.join('')
+          if filteredEntityList.any? {|e| e[0...name.length] == name}
+            filteredEntityList.reject! {|e| e[0...name.length] != name}
+            char_stack.push(@stream.char)
+          else
+            break
+          end
+
+          if ENTITIES.include? name
+            entityName = name
+            break if entityName[-1] == ';'
+          end
+        end
+
+        if entityName != nil
+          char = ENTITIES[entityName]
+
+          # Check whether or not the last character returned can be
+          # discarded or needs to be put back.
+          if entityName[-1] != ?;
+            @token_queue << {:type => :ParseError, :data => _("Named entity didn't end with ';'.")}
+          end
+
+          if char_stack[-1] != ";" and from_attribute and
+             (ASCII_LETTERS.include?(char_stack[entityName.length]) or
+              DIGITS.include?(char_stack[entityName.length]))
+            @stream.unget(char_stack)
+            char = '&'
+          else
+            @stream.unget(char_stack[entityName.length..-1])
+          end
+        else
+          @token_queue << {:type => :ParseError, :data => _("Named entity expected. Got none.")}
+          @stream.unget(char_stack)
+        end
+      end
+      return char
+    end
+
+    # This method replaces the need for "entityInAttributeValueState".
+    def process_entity_in_attribute
+      entity = consume_entity(true)
+      if entity
+        @current_token[:data][-1][1] += entity
+      else
+        @current_token[:data][-1][1] += "&"
+      end
+    end
+
+    # This method is a generic handler for emitting the tags. It also sets
+    # the state to "data" because that's what's needed after a token has been
+    # emitted.
+    def emit_current_token
+      # Add token to the queue to be yielded
+      token = @current_token
+      if [:StartTag, :EndTag, :EmptyTag].include?(token[:type])
+        if @lowercase_element_name
+          token[:name] = token[:name].downcase
+        end
+        @token_queue << token
+        @state = :data_state
+      end
+      
+    end
+
+    # Below are the various tokenizer states worked out.
+
+    # XXX AT Perhaps we should have Hixie run some evaluation on billions of
+    # documents to figure out what the order of the various if and elsif
+    # statements should be.
+    def data_state
+      data = @stream.char
+
+      if @content_model_flag == :CDATA or @content_model_flag == :RCDATA
+        @lastFourChars << data
+        @lastFourChars.shift if @lastFourChars.length > 4
+      end
+
+      if data == "&" and [:PCDATA,:RCDATA].include?(@content_model_flag) and !@escapeFlag
+          @state = :entity_data_state
+      elsif data == "-" && [:CDATA, :RCDATA].include?(@content_model_flag) && !@escapeFlag && @lastFourChars.join('') == "<!--"
+          @escapeFlag = true
+          @token_queue << {:type => :Characters, :data => data}
+      elsif data == "<" and !@escapeFlag and
+        [:PCDATA,:CDATA,:RCDATA].include?(@content_model_flag)
+          @state = :tag_open_state
+      elsif data == ">" and @escapeFlag and 
+        [:CDATA,:RCDATA].include?(@content_model_flag) and
+        @lastFourChars[1..-1].join('') == "-->"
+          @escapeFlag = false
+          @token_queue << {:type => :Characters, :data => data}
+
+      elsif data == :EOF
+        # Tokenization ends.
+        return false
+
+      elsif SPACE_CHARACTERS.include? data
+        # Directly after emitting a token you switch back to the "data
+        # state". At that point SPACE_CHARACTERS are important so they are
+        # emitted separately.
+        # XXX need to check if we don't need a special "spaces" flag on
+        # characters.
+        @token_queue << {:type => :SpaceCharacters, :data => data + @stream.chars_until(SPACE_CHARACTERS, true)}
+      else
+        @token_queue << {:type => :Characters, :data => data + @stream.chars_until(%w[& < > -])}
+      end
+      return true
+    end
+
+    def entity_data_state
+      entity = consume_entity
+      if entity
+        @token_queue << {:type => :Characters, :data => entity}
+      else
+        @token_queue << {:type => :Characters, :data => "&"}
+      end
+      @state = :data_state
+      return true
+    end
+
+    def tag_open_state
+      data = @stream.char
+      if @content_model_flag == :PCDATA
+        if data == "!"
+          @state = :markup_declaration_open_state
+        elsif data == "/"
+          @state = :close_tag_open_state
+        elsif data != :EOF and ASCII_LETTERS.include? data
+          @current_token = {:type => :StartTag, :name => data, :data => []}
+          @state = :tag_name_state
+        elsif data == ">"
+          # XXX In theory it could be something besides a tag name. But
+          # do we really care?
+          @token_queue << {:type => :ParseError, :data =>       _("Expected tag name. Got '>' instead.")}
+          @token_queue << {:type => :Characters, :data => "<>"}
+          @state = :data_state
+        elsif data == "?"
+          # XXX In theory it could be something besides a tag name. But
+          # do we really care?
+          @token_queue.push({:type => :ParseError, :data => _("Expected tag name. Got '?' instead (HTML doesn't " +
+            "support processing instructions).")})
+          @stream.unget(data)
+          @state = :bogus_comment_state
+        else
+          # XXX
+          @token_queue << {:type => :ParseError, :data => _("Expected tag name. Got something else instead")}
+          @token_queue << {:type => :Characters, :data => "<"}
+          @stream.unget(data)
+          @state = :data_state
+        end
+      else
+        # We know the content model flag is set to either RCDATA or CDATA
+        # now because this state can never be entered with the PLAINTEXT
+        # flag.
+        if data == "/"
+          @state = :close_tag_open_state
+        else
+          @token_queue << {:type => :Characters, :data => "<"}
+          @stream.unget(data)
+          @state = :data_state
+        end
+      end
+      return true
+    end
+
+    def close_tag_open_state
+      if (@content_model_flag == :RCDATA or @content_model_flag == :CDATA)
+        if @current_token
+          char_stack = []
+
+          # So far we know that "</" has been consumed. We now need to know
+          # whether the next few characters match the name of last emitted
+          # start tag which also happens to be the current_token. We also need
+          # to have the character directly after the characters that could
+          # match the start tag name.
+          (@current_token[:name].length + 1).times do
+            char_stack.push(@stream.char)
+            # Make sure we don't get hit by :EOF
+            break if char_stack[-1] == :EOF
+          end
+
+          # Since this is just for checking. We put the characters back on
+          # the stack.
+          @stream.unget(char_stack)
+        end
+
+        if @current_token and
+          @current_token[:name].downcase == 
+          char_stack[0...-1].join('').downcase and
+          (SPACE_CHARACTERS + [">", "/", "<", :EOF]).include? char_stack[-1]
+          # Because the characters are correct we can safely switch to
+          # PCDATA mode now. This also means we don't have to do it when
+          # emitting the end tag token.
+          @content_model_flag = :PCDATA
+        else
+          @token_queue << {:type => :Characters, :data => "</"}
+          @state = :data_state
+
+          # Need to return here since we don't want the rest of the
+          # method to be walked through.
+          return true
+        end
+      end
+
+      data = @stream.char
+      if data == :EOF
+        @token_queue << {:type => :ParseError, :data => _("Expected closing tag. Unexpected end of file.")}
+        @token_queue << {:type => :Characters, :data => "</"}
+        @state = :data_state
+      elsif ASCII_LETTERS.include? data
+        @current_token = {:type => :EndTag, :name => data, :data => []}
+        @state = :tag_name_state
+      elsif data == ">"
+        @token_queue << {:type => :ParseError, :data => _("Expected closing tag. Got '>' instead. Ignoring '</>'.")}
+        @state = :data_state
+      else
+        # XXX data can be _'_...
+        @token_queue << {:type => :ParseError, :data => _("Expected closing tag. Unexpected character '#{data}' found.")}
+        @stream.unget(data)
+        @state = :bogus_comment_state
+      end
+
+      return true
+    end
+
+    def tag_name_state
+      data = @stream.char
+      if SPACE_CHARACTERS.include? data
+        @state = :before_attribute_name_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => _("Unexpected end of file in the tag name.")}
+        emit_current_token
+      elsif ASCII_LETTERS.include? data
+        @current_token[:name] += data + @stream.chars_until(ASCII_LETTERS, true)
+      elsif data == ">"
+        emit_current_token
+      elsif data == "/"
+        process_solidus_in_tag
+        @state = :before_attribute_name_state
+      else
+        @current_token[:name] += data
+      end
+      return true
+    end
+
+    def before_attribute_name_state
+      data = @stream.char
+      if SPACE_CHARACTERS.include? data
+        @stream.chars_until(SPACE_CHARACTERS, true)
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => _("Unexpected end of file. Expected attribute name instead.")}
+        emit_current_token
+      elsif ASCII_LETTERS.include? data
+        @current_token[:data].push([data, ""])
+        @state = :attribute_name_state
+      elsif data == ">"
+        emit_current_token
+      elsif data == "/"
+        process_solidus_in_tag
+      else
+        @current_token[:data].push([data, ""])
+        @state = :attribute_name_state
+      end
+      return true
+    end
+
+    def attribute_name_state
+      data = @stream.char
+      leavingThisState = true
+      emitToken = false
+      if data == "="
+        @state = :before_attribute_value_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => _("Unexpected end of file in attribute name.")}
+        @state = :data_state
+        emitToken = true
+      elsif ASCII_LETTERS.include? data
+        @current_token[:data][-1][0] += data + @stream.chars_until(ASCII_LETTERS, true)
+        leavingThisState = false
+      elsif data == ">"
+        # XXX If we emit here the attributes are converted to a dict
+        # without being checked and when the code below runs we error
+        # because data is a dict not a list
+        emitToken = true
+      elsif SPACE_CHARACTERS.include? data
+        @state = :after_attribute_name_state
+      elsif data == "/"
+        process_solidus_in_tag
+        @state = :before_attribute_name_state
+      else
+        @current_token[:data][-1][0] += data
+        leavingThisState = false
+      end
+
+      if leavingThisState
+        # Attributes are not dropped at this stage. That happens when the
+        # start tag token is emitted so values can still be safely appended
+        # to attributes, but we do want to report the parse error in time.
+        if @lowercase_attr_name
+            @current_token[:data][-1][0] = @current_token[:data].last.first.downcase
+        end
+        @current_token[:data][0...-1].each {|name,value|
+          if @current_token[:data].last.first == name
+            @token_queue << {:type => :ParseError, :data =>_("Dropped duplicate attribute on tag.")}
+            break # don't report an error more than once
+          end
+        }
+        # XXX Fix for above XXX
+        emit_current_token if emitToken
+      end
+      return true
+    end
+
+    def after_attribute_name_state
+      data = @stream.char
+      if SPACE_CHARACTERS.include? data
+        @stream.chars_until(SPACE_CHARACTERS, true)
+      elsif data == "="
+        @state = :before_attribute_value_state
+      elsif data == ">"
+        emit_current_token
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => _("Unexpected end of file. Expected = or end of tag.")}
+        emit_current_token
+      elsif ASCII_LETTERS.include? data
+        @current_token[:data].push([data, ""])
+        @state = :attribute_name_state
+      elsif data == "/"
+        process_solidus_in_tag
+        @state = :before_attribute_name_state
+      else
+        @current_token[:data].push([data, ""])
+        @state = :attribute_name_state
+      end
+      return true
+    end
+
+    def before_attribute_value_state
+      data = @stream.char
+      if SPACE_CHARACTERS.include? data
+        @stream.chars_until(SPACE_CHARACTERS, true)
+      elsif data == "\""
+        @state = :attribute_value_double_quoted_state
+      elsif data == "&"
+        @state = :attribute_value_unquoted_state
+        @stream.unget(data);
+      elsif data == "'"
+        @state = :attribute_value_single_quoted_state
+      elsif data == ">"
+        emit_current_token
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => _("Unexpected end of file. Expected attribute value.")}
+        emit_current_token
+      else
+        @current_token[:data][-1][1] += data
+        @state = :attribute_value_unquoted_state
+      end
+      return true
+    end
+
+    def attribute_value_double_quoted_state
+      data = @stream.char
+      if data == "\""
+        @state = :before_attribute_name_state
+      elsif data == "&"
+        process_entity_in_attribute
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => _("Unexpected end of file in attribute value (\").")}
+        emit_current_token
+      else
+        @current_token[:data][-1][1] += data + @stream.chars_until(["\"", "&"])
+      end
+      return true
+    end
+
+    def attribute_value_single_quoted_state
+      data = @stream.char
+      if data == "'"
+        @state = :before_attribute_name_state
+      elsif data == "&"
+        process_entity_in_attribute
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => _("Unexpected end of file in attribute value (').")}
+        emit_current_token
+      else
+        @current_token[:data][-1][1] += data +\
+          @stream.chars_until(["'", "&"])
+      end
+      return true
+    end
+
+    def attribute_value_unquoted_state
+      data = @stream.char
+      if SPACE_CHARACTERS.include? data
+        @state = :before_attribute_name_state
+      elsif data == "&"
+        process_entity_in_attribute
+      elsif data == ">"
+        emit_current_token
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => _("Unexpected end of file in attribute value.")}
+        emit_current_token
+      else
+        @current_token[:data][-1][1] += data +  @stream.chars_until(["&", ">","<"] + SPACE_CHARACTERS)
+      end
+      return true
+    end
+
+    def bogus_comment_state
+      # Make a new comment token and give it as value all the characters
+      # until the first > or :EOF (chars_until checks for :EOF automatically)
+      # and emit it.
+      @token_queue << {:type => :Comment, :data => @stream.chars_until((">"))}
+
+      # Eat the character directly after the bogus comment which is either a
+      # ">" or an :EOF.
+      @stream.char
+      @state = :data_state
+      return true
+    end
+
+    def markup_declaration_open_state
+      char_stack = [@stream.char, @stream.char]
+      if char_stack == ["-", "-"]
+        @current_token = {:type => :Comment, :data => ""}
+        @state = :comment_start_state
+      else
+        5.times { char_stack.push(@stream.char) }
+        # Put in explicit :EOF check
+        if !char_stack.include?(:EOF) && char_stack.join("").upcase == "DOCTYPE"
+          @current_token = {:type => :Doctype, :name => "", :publicId => nil, :systemId => nil, :correct => true}
+          @state = :doctype_state
+        else
+          @token_queue << {:type => :ParseError, :data => _("Expected '--' or 'DOCTYPE'. Not found.")}
+          @stream.unget(char_stack)
+          @state = :bogus_comment_state
+        end
+      end
+      return true
+    end
+
+    def comment_start_state
+        data = @stream.char
+        if data == "-"
+            @state = :comment_start_dash_state
+        elsif data == ">"
+            @token_queue << {:type => :ParseError, :data => _("Incorrect comment.")}
+            @token_queue << @current_token
+            @state = :data_state
+        elsif data == :EOF
+            @token_queue << {:type => :ParseError, :data => _("Unexpected end of file in comment.")}
+            @token_queue << @current_token
+            @state = :data_state
+        else
+            @current_token[:data] += data + @stream.chars_until("-")
+            @state = :comment_state
+        end
+        return true
+    end
+    
+    def comment_start_dash_state
+        data = @stream.char
+        if data == "-"
+            @state = :comment_end_state
+        elsif data == ">"
+            @token_queue << {:type => :ParseError, :data => _("Incorrect comment.")}
+            @token_queue << @current_token
+            @state = :data_state
+        elsif data == :EOF
+            @token_queue << {:type => :ParseError, :data => _("Unexpected end of file in comment.")}
+            @token_queue << @current_token
+            @state = :data_state
+        else
+            @current_token[:data] += '-' + data + @stream.chars_until("-")
+            @state = :comment_state
+        end
+        return true
+    end
+
+    def comment_state
+      data = @stream.char
+      if data == "-"
+        @state = :comment_end_dash_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => _("Unexpected end of file in comment.")}
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        @current_token[:data] += data + @stream.chars_until("-")
+      end
+      return true
+    end
+
+    def comment_end_dash_state
+      data = @stream.char
+      if data == "-"
+        @state = :comment_end_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => _("Unexpected end of file in comment (-)")}
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        @current_token[:data] += "-" + data +\
+          @stream.chars_until("-")
+        # Consume the next character which is either a "-" or an :EOF as
+        # well so if there's a "-" directly after the "-" we go nicely to
+        # the "comment end state" without emitting a ParseError there.
+        @stream.char
+      end
+      return true
+    end
+
+    def comment_end_state
+      data = @stream.char
+      if data == ">"
+        @token_queue << @current_token
+        @state = :data_state
+      elsif data == "-"
+        @token_queue << {:type => :ParseError, :data => _("Unexpected '-' after '--' found in comment.")}
+        @current_token[:data] += data
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => _("Unexpected end of file in comment (--).")}
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        # XXX
+        @token_queue << {:type => :ParseError, :data => _("Unexpected character in comment found.")}
+        @current_token[:data] += "--" + data
+        @state = :comment_state
+      end
+      return true
+    end
+
+    def doctype_state
+      data = @stream.char
+      if SPACE_CHARACTERS.include? data
+        @state = :before_doctype_name_state
+      else
+        @token_queue << {:type => :ParseError, :data => _("No space after literal string 'DOCTYPE'.")}
+        @stream.unget(data)
+        @state = :before_doctype_name_state
+      end
+      return true
+    end
+
+    def before_doctype_name_state
+      data = @stream.char
+      if SPACE_CHARACTERS.include? data
+      elsif data == ">"
+        @token_queue << {:type => :ParseError, :data => _("Unexpected > character. Expected DOCTYPE name.")}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data =>          _("Unexpected end of file. Expected DOCTYPE name.")}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        @current_token[:name] = data
+        @state = :doctype_name_state
+      end
+      return true
+    end
+
+    def doctype_name_state
+      data = @stream.char
+      if SPACE_CHARACTERS.include? data
+        @state = :after_doctype_name_state
+      elsif data == ">"
+        @token_queue << @current_token
+        @state = :data_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE name.")}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        @current_token[:name] += data
+      end
+
+      return true
+    end
+
+    def after_doctype_name_state
+      data = @stream.char
+      if SPACE_CHARACTERS.include? data
+      elsif data == ">"
+        @token_queue << @current_token
+        @state = :data_state
+      elsif data == :EOF
+        @current_token[:correct] = false
+        @stream.unget(data)
+        @token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        char_stack = [data]  
+        5.times { char_stack << stream.char }
+        token = char_stack.join('').tr(ASCII_UPPERCASE,ASCII_LOWERCASE)
+        if token == "public" and !char_stack.include?(:EOF)
+          @state = :before_doctype_public_identifier_state
+        elsif token == "system" and !char_stack.include?(:EOF)
+          @state = :before_doctype_system_identifier_state
+        else
+          @stream.unget(char_stack)
+          @token_queue << {:type => :ParseError, :data => _("Expected 'public' or 'system'. Got '#{token}'")}
+          @state = :bogus_doctype_state
+        end
+      end
+      return true
+    end
+    
+    def before_doctype_public_identifier_state
+      data = @stream.char
+
+      if SPACE_CHARACTERS.include?(data)
+      elsif data == "\""
+        @current_token[:publicId] = ""
+        @state = :doctype_public_identifier_double_quoted_state
+      elsif data == "'"
+        @current_token[:publicId] = ""
+        @state = :doctype_public_identifier_single_quoted_state
+      elsif data == ">"
+        @token_queue << {:type => :ParseError, :data => _("Unexpected end of DOCTYPE.")}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        @token_queue << {:type => :ParseError, :data => _("Unexpected character in DOCTYPE.")}
+        @state = :bogus_doctype_state
+      end
+
+      return true
+    end
+ 
+    def doctype_public_identifier_double_quoted_state
+      data = @stream.char
+      if data == "\""
+        @state = :after_doctype_public_identifier_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        @current_token[:publicId] += data
+      end
+      return true
+    end
+
+    def doctype_public_identifier_single_quoted_state
+      data = @stream.char
+      if data == "'"
+        @state = :after_doctype_public_identifier_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        @current_token[:publicId] += data
+      end
+      return true
+    end
+
+    def after_doctype_public_identifier_state
+      data = @stream.char
+      if SPACE_CHARACTERS.include?(data)
+      elsif data == "\""
+        @current_token[:systemId] = ""
+        @state = :doctype_system_identifier_double_quoted_state
+      elsif data == "'"
+        @current_token[:systemId] = ""
+        @state = :doctype_system_identifier_single_quoted_state
+      elsif data == ">"
+        @token_queue << @current_token
+        @state = :data_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        @token_queue << {:type => :ParseError, :data => _("Unexpected character in DOCTYPE.")}
+        @state = :bogus_doctype_state
+      end
+      return true
+    end
+    
+    def before_doctype_system_identifier_state
+      data = @stream.char
+      if SPACE_CHARACTERS.include?(data)
+      elsif data == "\""
+        @current_token[:systemId] = ""
+        @state = :doctype_system_identifier_double_quoted_state
+      elsif data == "'"
+        @current_token[:systemId] = ""
+        @state = :doctype_system_identifier_single_quoted_state
+      elsif data == ">"
+        @token_queue << {:type => :ParseError, :data => _("Unexpected character in DOCTYPE.")}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        @token_queue << {:type => :ParseError, :data => _("Unexpected character in DOCTYPE.")}
+        @state = :bogus_doctype_state
+      end
+      return true
+    end
+
+    def doctype_system_identifier_double_quoted_state
+      data = @stream.char
+      if data == "\""
+        @state = :after_doctype_system_identifier_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        @current_token[:systemId] += data
+      end
+      return true
+    end
+
+    def doctype_system_identifier_single_quoted_state
+      data = @stream.char
+      if data == "'"
+        @state = :after_doctype_system_identifier_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        @current_token[:systemId] += data
+      end
+      return true
+    end
+
+    def after_doctype_system_identifier_state
+      data = @stream.char
+      if SPACE_CHARACTERS.include?(data)
+      elsif data == ">"
+        @token_queue << @current_token
+        @state = :data_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        @token_queue << {:type => :ParseError, :data => _("Unexpected character in DOCTYPE.")}
+        @state = :bogus_doctype_state
+      end
+      return true
+    end
+
+    def bogus_doctype_state
+      data = @stream.char
+      @current_token[:correct] = false
+      if data == ">"
+        @token_queue << @current_token
+        @state = :data_state
+      elsif data == :EOF
+        # XXX EMIT
+        @stream.unget(data)
+        @token_queue << {:type => :ParseError, :data => _("Unexpected end of file in bogus doctype.")}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      end
+      return true
+    end
+
+    def _(string); string; end
+  end
+
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/treebuilders.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treebuilders.rb
@ -1,24 +1,24 @@
-module HTML5lib
+module HTML5
  module TreeBuilders

    class << self
      def [](name)
        case name.to_s.downcase
        when 'simpletree' then
-          require 'html5lib/treebuilders/simpletree'
+          require 'html5/treebuilders/simpletree'
          SimpleTree::TreeBuilder
        when 'rexml' then
-          require 'html5lib/treebuilders/rexml'
+          require 'html5/treebuilders/rexml'
          REXML::TreeBuilder
        when 'hpricot' then
-          require 'html5lib/treebuilders/hpricot'
+          require 'html5/treebuilders/hpricot'
          Hpricot::TreeBuilder
        else
          raise "Unknown TreeBuilder #{name}"
        end
      end

-      alias :getTreeBuilder :[]
+      alias :get_tree_builder :[]
    end
  end
 end
--- a/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/base.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/base.rb
@ -1,8 +1,8 @@
-require 'html5lib/constants'
+require 'html5/constants'

 #XXX - TODO; make the default interface more ElementTree-like rather than DOM-like

-module HTML5lib
+module HTML5

  # The scope markers are inserted when entering buttons, object elements,
  # marquees, table cells, and table captions, and are used to prevent formatting
@ -24,9 +24,9 @@ module HTML5lib
        attr_accessor :_flags

        def initialize(name)
-          @parent = nil
+          @parent     = nil
          @childNodes = []
-          @_flags = []
+          @_flags     = []
        end

        # Insert node as a child of the current node
@ -76,13 +76,13 @@ module HTML5lib
      # Base treebuilder implementation
      class TreeBuilder

-        attr_accessor :openElements
+        attr_accessor :open_elements

        attr_accessor :activeFormattingElements

        attr_accessor :document

-        attr_accessor :headPointer
+        attr_accessor :head_pointer

        attr_accessor :formPointer

@ -106,25 +106,25 @@ module HTML5lib
        end

        def reset
-          @openElements = []
+          @open_elements = []
          @activeFormattingElements = []

          #XXX - rename these to headElement, formElement
-          @headPointer = nil
+          @head_pointer = nil
          @formPointer = nil

-          self.insertFromTable = false
+          self.insert_from_table = false

          @document = @documentClass.new
        end

        def elementInScope(target, tableVariant=false)
          # Exit early when possible.
-          return true if @openElements[-1].name == target
+          return true if @open_elements[-1].name == target

          # AT How about while true and simply set node to [-1] and set it to
          # [-2] at the end...
-          @openElements.reverse.each do |element|
+          @open_elements.reverse.each do |element|
            if element.name == target
              return true
            elsif element.name == 'table'
@ -149,10 +149,10 @@ module HTML5lib
          # Step 2 and step 3: we start with the last element. So i is -1.
          i = -1
          entry = @activeFormattingElements[i]
-          return if entry == Marker or @openElements.include?(entry)
+          return if entry == Marker or @open_elements.include?(entry)

          # Step 6
-          until entry == Marker or @openElements.include?(entry)
+          until entry == Marker or @open_elements.include?(entry)
            # Step 5: let entry be one earlier in the list.
            i -= 1
            begin
@ -171,7 +171,7 @@ module HTML5lib
            clone = @activeFormattingElements[i].cloneNode

            # Step 9
-            element = insertElement(clone.name, clone.attributes)
+            element = insert_element(clone.name, clone.attributes)

            # Step 10
            @activeFormattingElements[i] = element
@ -198,12 +198,15 @@ module HTML5lib
          return false
        end

-        def insertDoctype(name)
-          @document.appendChild(@doctypeClass.new(name))
+        def insertDoctype(name, public_id, system_id)
+          doctype = @doctypeClass.new(name)
+          doctype.public_id = public_id
+          doctype.system_id = system_id
+          @document.appendChild(doctype)
        end

-        def insertComment(data, parent=nil)
-          parent = @openElements[-1] if parent.nil?
+        def insert_comment(data, parent=nil)
+          parent = @open_elements[-1] if parent.nil?
          parent.appendChild(@commentClass.new(data))
        end
               
@ -216,28 +219,28 @@ module HTML5lib

        # Switch the function used to insert an element from the
        # normal one to the misnested table one and back again
-        def insertFromTable=(value)
-          @insertFromTable = value
-          @insertElement = value ? :insertElementTable : :insertElementNormal
+        def insert_from_table=(value)
+          @insert_from_table = value
+          @insert_element = value ? :insert_elementTable : :insert_elementNormal
        end

-        def insertElement(name, attributes)
-          send(@insertElement, name, attributes)
+        def insert_element(name, attributes)
+          send(@insert_element, name, attributes)
        end

-        def insertElementNormal(name, attributes)
+        def insert_elementNormal(name, attributes)
          element = @elementClass.new(name)
          element.attributes = attributes
-          @openElements[-1].appendChild(element)
-          @openElements.push(element)
+          @open_elements.last.appendChild(element)
+          @open_elements.push(element)
          return element
        end

        # Create an element and insert it into the tree
-        def insertElementTable(name, attributes)
+        def insert_elementTable(name, attributes)
          element = @elementClass.new(name)
          element.attributes = attributes
-          if TABLE_INSERT_MODE_ELEMENTS.include?(@openElements[-1].name)
+          if TABLE_INSERT_MODE_ELEMENTS.include?(@open_elements.last.name)
            #We should be in the InTable mode. This means we want to do
            #special magic element rearranging
            parent, insertBefore = getTableMisnestedNodePosition
@ -246,17 +249,17 @@ module HTML5lib
            else
              parent.insertBefore(element, insertBefore)
            end
-            @openElements.push(element)
+            @open_elements.push(element)
          else
-            return insertElementNormal(name, attributes)
+            return insert_elementNormal(name, attributes)
          end
          return element
        end

        def insertText(data, parent=nil)
-          parent = @openElements[-1] if parent.nil?
+          parent = @open_elements[-1] if parent.nil?

-          if (not(@insertFromTable) or (@insertFromTable and not TABLE_INSERT_MODE_ELEMENTS.include?(@openElements[-1].name)))
+          if (not(@insert_from_table) or (@insert_from_table and not TABLE_INSERT_MODE_ELEMENTS.include?(@open_elements[-1].name)))
            parent.insertText(data)
          else
            #We should be in the InTable mode. This means we want to do
@ -265,7 +268,7 @@ module HTML5lib
            parent.insertText(data, insertBefore)
          end
        end
-      
+
        # Get the foster parent element, and sibling to insert before
        # (or nil) when inserting a misnested table node
        def getTableMisnestedNodePosition
@ -275,7 +278,7 @@ module HTML5lib
          lastTable = nil
          fosterParent = nil
          insertBefore = nil
-          @openElements.reverse.each do |element|
+          @open_elements.reverse.each do |element|
            if element.name == "table"
              lastTable = element
              break
@ -288,33 +291,34 @@ module HTML5lib
              fosterParent = lastTable.parent
              insertBefore = lastTable
            else
-              fosterParent = @openElements[@openElements.index(lastTable) - 1]
+              fosterParent = @open_elements[@open_elements.index(lastTable) - 1]
            end
          else
-            fosterParent = @openElements[0]
+            fosterParent = @open_elements[0]
          end
          return fosterParent, insertBefore
        end

        def generateImpliedEndTags(exclude=nil)
-          name = @openElements[-1].name
+          name = @open_elements[-1].name

-          if (['dd', 'dt', 'li', 'p', 'td', 'th', 'tr'].include?(name) and name != exclude)
-            @openElements.pop
+          # XXX td, th and tr are not actually needed
+          if (%w[dd dt li p td th tr].include?(name) and name != exclude)
+            @open_elements.pop
            # XXX This is not entirely what the specification says. We should
            # investigate it more closely.
            generateImpliedEndTags(exclude)
          end
        end

-        def getDocument
+        def get_document
          @document
        end
  
-        def getFragment
-          #assert @innerHTML
+        def get_fragment
+          #assert @inner_html
          fragment = @fragmentClass.new
-          @openElements[0].reparentChildren(fragment)
+          @open_elements[0].reparentChildren(fragment)
          return fragment
        end

--- a/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/hpricot.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/hpricot.rb
@ -1,221 +1,231 @@
-require 'html5lib/treebuilders/base'
-require 'rubygems'
-require 'hpricot'
-require 'forwardable'
-
-module HTML5lib
-  module TreeBuilders
-    module Hpricot
-
-      class Node < Base::Node
-
-        extend Forwardable
-
-        def_delegators :@hpricot, :name
-
-        attr_accessor :hpricot
-
-        def initialize(name)
-          super(name)
-          @hpricot = self.class.hpricot_class.new name
-        end
-
-        def appendChild(node)
-          if node.kind_of?(TextNode) and childNodes.any? and childNodes.last.kind_of?(TextNode)
-            childNodes[-1].hpricot.content = childNodes[-1].hpricot.to_s + node.hpricot.to_s
-          else
-            childNodes << node
-            hpricot.children << node.hpricot
-          end
-          if (oldparent = node.hpricot.parent) != nil
-            oldparent.children.delete_at(oldparent.children.index(node.hpricot))
-          end
-          node.hpricot.parent = hpricot
-          node.parent = self
-        end
-
-        def removeChild(node)
-           childNodes.delete(node)
-           hpricot.children.delete_at(hpricot.children.index(node.hpricot))
-           node.hpricot.parent = nil
-           node.parent = nil
-        end
-
-        def insertText(data, before=nil)
-          if before
-            insertBefore(TextNode.new(data), before)
-          else
-            appendChild(TextNode.new(data))
-          end
-        end
-
-        def insertBefore(node, refNode)
-          index = childNodes.index(refNode)
-          if node.kind_of?(TextNode) and index > 0 and childNodes[index-1].kind_of?(TextNode)
-            childNodes[index-1].hpricot.content = childNodes[index-1].hpricot.to_s + node.hpricot.to_s
-          else
-            refNode.hpricot.parent.insert_before(node.hpricot,refNode.hpricot)
-            childNodes.insert(index, node)
-          end
-        end
-
-        def hasContent
-          childNodes.any?
-        end
-      end
-
-      class Element < Node
-        def self.hpricot_class
-          ::Hpricot::Elem
-        end
-
-        def initialize(name)
-          super(name)
-
-          @hpricot = ::Hpricot::Elem.new(::Hpricot::STag.new(name))
-        end
-
-        def name
-          @hpricot.stag.name
-        end
-
-        def cloneNode
-          attributes.inject(self.class.new(name)) do |node, (name, value)|
-            node.hpricot[name] = value
-            node
-          end
-        end
-
-        # A call to Hpricot::Elem#raw_attributes is built dynamically,
-        # so alterations to the returned value (a hash) will be lost.
-        #
-        # AttributeProxy works around this by forwarding :[]= calls
-        # to the raw_attributes accessor on the element start tag.
-        #
-        class AttributeProxy
-          def initialize(hpricot)
-            @hpricot = hpricot
-          end
-
-          def []=(k, v)
-            @hpricot.stag.send(stag_attributes_method)[k] = v
-          end
-
-          def stag_attributes_method
-            # STag#attributes changed to STag#raw_attributes after Hpricot 0.5
-            @hpricot.stag.respond_to?(:raw_attributes) ? :raw_attributes : :attributes
-          end
-
-          def method_missing(*a, &b)
-            @hpricot.attributes.send(*a, &b)
-          end
-        end
-
-        def attributes
-          AttributeProxy.new(@hpricot)
-        end
-
-        def attributes=(attrs)
-          attrs.each { |name, value| @hpricot[name] = value }
-        end
-
-        def printTree(indent=0)
-          tree = "\n|#{' ' * indent}<#{name}>"
-          indent += 2
-          attributes.each do |name, value|
-            next if name == 'xmlns'
-            tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
-          end
-          childNodes.inject(tree) { |tree, child| tree + child.printTree(indent) }
-        end
-      end
-
-      class Document < Node
-        def self.hpricot_class
-          ::Hpricot::Doc
-        end
-
-        def initialize
-          super(nil)
-        end
-
-        def printTree(indent=0)
-          childNodes.inject('#document') { |tree, child| tree + child.printTree(indent + 2) }
-        end
-      end
-
-      class DocumentType < Node
-        def self.hpricot_class
-          ::Hpricot::DocType
-        end
-
-        def initialize(name)
-          begin
-            super(name)
-          rescue ArgumentError # needs 3...
-          end
-
-          @hpricot = ::Hpricot::DocType.new(name, nil, nil)
-        end
-
-        def printTree(indent=0)
-          "\n|#{' ' * indent}<!DOCTYPE #{hpricot.target}>"
-        end
-      end
-
-      class DocumentFragment < Element
-        def initialize
-          super('')
-        end
-
-        def printTree(indent=0)
-          childNodes.inject('') { |tree, child| tree + child.printTree(indent+2) }
-        end
-      end
-
-      class TextNode < Node
-        def initialize(data)
-          @hpricot = ::Hpricot::Text.new(data)
-        end
-
-        def printTree(indent=0)
-          "\n|#{' ' * indent}\"#{hpricot.content}\""
-        end
-      end
-
-      class CommentNode < Node
-        def self.hpricot_class
-          ::Hpricot::Comment
-        end
-
-        def printTree(indent=0)
-          "\n|#{' ' * indent}<!-- #{hpricot.content} -->"
-        end
-      end
-
-      class TreeBuilder < Base::TreeBuilder
-        def initialize
-          @documentClass = Document
-          @doctypeClass = DocumentType
-          @elementClass = Element
-          @commentClass = CommentNode
-          @fragmentClass = DocumentFragment
-        end
-
-        def testSerializer(node)
-          node.printTree
-        end
-
-        def getDocument
-          @document.hpricot
-        end
-
-        def getFragment
-          @document = super
-          return @document.hpricot.children
-        end
-      end
-
-    end
-  end
-end
+require 'html5/treebuilders/base'
+require 'rubygems'
+require 'hpricot'
+require 'forwardable'
+
+module HTML5
+  module TreeBuilders
+    module Hpricot
+
+      class Node < Base::Node
+        extend Forwardable
+
+        def_delegators :@hpricot, :name
+
+        attr_accessor :hpricot
+
+        def initialize(name)
+          super(name)
+          @hpricot = self.class.hpricot_class.new name
+        end
+
+        def appendChild(node)
+          if node.kind_of?(TextNode) and childNodes.any? and childNodes.last.kind_of?(TextNode)
+            childNodes.last.hpricot.content = childNodes.last.hpricot.content + node.hpricot.content
+          else
+            childNodes << node
+            hpricot.children << node.hpricot
+          end
+          if (oldparent = node.hpricot.parent) != nil
+            oldparent.children.delete_at(oldparent.children.index(node.hpricot))
+          end
+          node.hpricot.parent = hpricot
+          node.parent = self
+        end
+
+        def removeChild(node)
+           childNodes.delete(node)
+           hpricot.children.delete_at(hpricot.children.index(node.hpricot))
+           node.hpricot.parent = nil
+           node.parent = nil
+        end
+
+        def insertText(data, before=nil)
+          if before
+            insertBefore(TextNode.new(data), before)
+          else
+            appendChild(TextNode.new(data))
+          end
+        end
+
+        def insertBefore(node, refNode)
+          index = childNodes.index(refNode)
+          if node.kind_of?(TextNode) and index > 0 and childNodes[index-1].kind_of?(TextNode)
+            childNodes[index-1].hpricot.content = childNodes[index-1].hpricot.to_s + node.hpricot.to_s
+          else
+            refNode.hpricot.parent.insert_before(node.hpricot,refNode.hpricot)
+            childNodes.insert(index, node)
+          end
+        end
+
+        def hasContent
+          childNodes.any?
+        end
+      end
+
+      class Element < Node
+        def self.hpricot_class
+          ::Hpricot::Elem
+        end
+
+        def initialize(name)
+          super(name)
+
+          @hpricot = ::Hpricot::Elem.new(::Hpricot::STag.new(name))
+        end
+
+        def name
+          @hpricot.stag.name
+        end
+
+        def cloneNode
+          attributes.inject(self.class.new(name)) do |node, (name, value)|
+            node.hpricot[name] = value
+            node
+          end
+        end
+
+        # A call to Hpricot::Elem#raw_attributes is built dynamically,
+        # so alterations to the returned value (a hash) will be lost.
+        #
+        # AttributeProxy works around this by forwarding :[]= calls
+        # to the raw_attributes accessor on the element start tag.
+        #
+        class AttributeProxy
+          def initialize(hpricot)
+            @hpricot = hpricot
+          end
+
+          def []=(k, v)
+            @hpricot.stag.send(stag_attributes_method)[k] = v
+          end
+
+          def stag_attributes_method
+            # STag#attributes changed to STag#raw_attributes after Hpricot 0.5
+            @hpricot.stag.respond_to?(:raw_attributes) ? :raw_attributes : :attributes
+          end
+
+          def method_missing(*a, &b)
+            @hpricot.attributes.send(*a, &b)
+          end
+        end
+
+        def attributes
+          AttributeProxy.new(@hpricot)
+        end
+
+        def attributes=(attrs)
+          attrs.each { |name, value| @hpricot[name] = value }
+        end
+
+        def printTree(indent=0)
+          tree = "\n|#{' ' * indent}<#{name}>"
+          indent += 2
+          attributes.each do |name, value|
+            next if name == 'xmlns'
+            tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
+          end
+          childNodes.inject(tree) { |tree, child| tree + child.printTree(indent) }
+        end
+      end
+
+      class Document < Node
+        def self.hpricot_class
+          ::Hpricot::Doc
+        end
+
+        def initialize
+          super(nil)
+        end
+
+        def printTree(indent=0)
+          childNodes.inject('#document') { |tree, child| tree + child.printTree(indent + 2) }
+        end
+      end
+
+      class DocumentType < Node
+        def_delegators :@hpricot, :public_id, :system_id
+
+        def self.hpricot_class
+          ::Hpricot::DocType
+        end
+
+        def initialize(name, public_id, system_id)
+          begin
+            super(name)
+          rescue ArgumentError # needs 3...
+          end
+
+          @hpricot = ::Hpricot::DocType.new(name, public_id, system_id)
+        end
+
+        def printTree(indent=0)
+          if hpricot.target and hpricot.target.any?
+            "\n|#{' ' * indent}<!DOCTYPE #{hpricot.target}>"
+          else
+            "\n|#{' ' * indent}<!DOCTYPE >"
+          end
+        end
+      end
+
+      class DocumentFragment < Element
+        def initialize
+          super('')
+        end
+
+        def printTree(indent=0)
+          childNodes.inject('') {|tree, child| tree + child.printTree(indent + 2) }
+        end
+      end
+
+      class TextNode < Node
+        def initialize(data)
+          @hpricot = ::Hpricot::Text.new(data)
+        end
+
+        def printTree(indent=0)
+          "\n|#{' ' * indent}\"#{hpricot.content}\""
+        end
+      end
+
+      class CommentNode < Node
+        def self.hpricot_class
+          ::Hpricot::Comment
+        end
+
+        def printTree(indent=0)
+          "\n|#{' ' * indent}<!-- #{hpricot.content} -->"
+        end
+      end
+
+      class TreeBuilder < Base::TreeBuilder
+        def initialize
+          @documentClass = Document
+          @doctypeClass  = DocumentType
+          @elementClass  = Element
+          @commentClass  = CommentNode
+          @fragmentClass = DocumentFragment
+        end
+
+        def insertDoctype(name, public_id, system_id)
+          doctype = @doctypeClass.new(name, public_id, system_id)
+          @document.appendChild(doctype)
+        end
+
+        def testSerializer(node)
+          node.printTree
+        end
+
+        def get_document
+          @document.hpricot
+        end
+
+        def get_fragment
+          @document = super
+          return @document.hpricot.children
+        end
+      end
+
+    end
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/rexml.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/rexml.rb
@ -1,8 +1,8 @@
-require 'html5lib/treebuilders/base'
+require 'html5/treebuilders/base'
 require 'rexml/document'
 require 'forwardable'

-module HTML5lib
+module HTML5
  module TreeBuilders
    module REXML

@ -17,11 +17,9 @@ module HTML5lib
        end

        def appendChild node
-          if node.kind_of? TextNode and 
-            childNodes.length>0 and childNodes[-1].kind_of? TextNode
-            childNodes[-1].rxobj.value =
-              childNodes[-1].rxobj.to_s + node.rxobj.to_s
-            childNodes[-1].rxobj.raw = true
+          if node.kind_of?(TextNode) && childNodes.length > 0 && childNodes.last.kind_of?(TextNode)
+            childNodes.last.rxobj.value = childNodes.last.rxobj.to_s + node.rxobj.to_s
+            childNodes.last.rxobj.raw = true
          else
            childNodes.push node
            rxobj.add node.rxobj
@ -45,10 +43,8 @@ module HTML5lib

        def insertBefore node, refNode
          index = childNodes.index(refNode)
-          if node.kind_of? TextNode and index>0 and 
-            childNodes[index-1].kind_of? TextNode
-            childNodes[index-1].rxobj.value =
-              childNodes[index-1].rxobj.to_s + node.rxobj.to_s
+          if node.kind_of?(TextNode) and index > 0 && childNodes[index-1].kind_of?(TextNode)
+            childNodes[index-1].rxobj.value = childNodes[index-1].rxobj.to_s + node.rxobj.to_s
            childNodes[index-1].rxobj.raw = true
          else
            childNodes.insert index, node
@ -57,7 +53,7 @@ module HTML5lib
        end

        def hasContent
-          return (childNodes.length > 0)
+          (childNodes.length > 0)
        end
      end

@ -77,7 +73,7 @@ module HTML5lib
        end

        def attributes= value
-          value.each {|name, value| rxobj.attributes[name]=value}
+          value.each {|name, value| rxobj.attributes[name] = value}
        end

        def printTree indent=0
@ -90,7 +86,7 @@ module HTML5lib
          for child in childNodes
            tree += child.printTree(indent)
          end
-          return tree
+          tree
        end
      end

@ -120,10 +116,25 @@ module HTML5lib
      end

      class DocumentType < Node
+        def_delegator :@rxobj, :public, :public_id
+
+        def_delegator :@rxobj, :system, :system_id
+
        def self.rxclass
          ::REXML::DocType
        end

+        def initialize name, public_id, system_id
+            super(name)
+            if public_id
+              @rxobj = ::REXML::DocType.new [name, ::REXML::DocType::PUBLIC, public_id, system_id]
+            elsif system_id
+              @rxobj = ::REXML::DocType.new [name, ::REXML::DocType::SYSTEM, nil, system_id]
+            else
+              @rxobj = ::REXML::DocType.new name
+            end
+        end
+
        def printTree indent=0
          "\n|#{' ' * indent}<!DOCTYPE #{name}>"
        end
@ -145,7 +156,7 @@ module HTML5lib

      class TextNode < Node
        def initialize data
-          raw=data.gsub('&','&amp;').gsub('<','&lt;').gsub('>','&gt;')
+          raw = data.gsub('&', '&amp;').gsub('<', '&lt;').gsub('>', '&gt;')
          @rxobj = ::REXML::Text.new(raw, true, nil, true)
        end

@ -167,21 +178,26 @@ module HTML5lib
      class TreeBuilder < Base::TreeBuilder
        def initialize
          @documentClass = Document
-          @doctypeClass = DocumentType
-          @elementClass = Element
-          @commentClass = CommentNode
+          @doctypeClass  = DocumentType
+          @elementClass  = Element
+          @commentClass  = CommentNode
          @fragmentClass = DocumentFragment
        end

-        def testSerializer node
-          node.printTree()
+        def insertDoctype(name, public_id, system_id)
+          doctype = @doctypeClass.new(name, public_id, system_id)
+          @document.appendChild(doctype)
        end

-        def getDocument
+        def testSerializer node
+          node.printTree
+        end
+
+        def get_document
          @document.rxobj
        end

-        def getFragment
+        def get_fragment
          @document = super
          return @document.rxobj.children
        end
--- a/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/simpletree.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/simpletree.rb
@ -1,6 +1,6 @@
-require 'html5lib/treebuilders/base'
+require 'html5/treebuilders/base'

-module HTML5lib
+module HTML5
  module TreeBuilders
    module SimpleTree

@ -18,17 +18,17 @@ module HTML5lib

        def initialize name
          super
-          @name = name
-          @value = nil
+          @name       = name
+          @value      = nil
          @attributes = {}
        end

        def appendChild node
          if node.kind_of? TextNode and 
-            childNodes.length>0 and childNodes[-1].kind_of? TextNode
-            childNodes[-1].value += node.value
+            childNodes.length > 0 and childNodes.last.kind_of? TextNode
+            childNodes.last.value += node.value
          else
-            childNodes.push node
+            childNodes << node
          end
          node.parent = self
        end
@ -55,8 +55,7 @@ module HTML5lib

        def insertBefore node, refNode
          index = childNodes.index(refNode)
-          if node.kind_of? TextNode and index>0 and 
-            childNodes[index-1].kind_of? TextNode
+          if node.kind_of?(TextNode) && index > 0 && childNodes[index-1].kind_of?(TextNode)
            childNodes[index-1].value += node.value
          else
            childNodes.insert index, node
@ -72,7 +71,7 @@ module HTML5lib
        end

        def hasContent
-          return (childNodes.length > 0)
+          childNodes.length > 0
        end
      end

@ -90,7 +89,7 @@ module HTML5lib
          for child in childNodes
            tree += child.printTree(indent)
          end
-          return tree
+          tree
        end
      end

@ -108,13 +107,21 @@ module HTML5lib
          for child in childNodes
            tree += child.printTree(indent + 2)
          end
-          return tree
+          tree
        end
      end

      class DocumentType < Node
+        attr_accessor :public_id, :system_id
+
        def to_s
-           "<!DOCTYPE %s>" % name
+          "<!DOCTYPE #{name}>"
+        end
+
+        def initialize name
+          super name
+          @public_id = nil
+          @system_id = nil
        end
      end

@ -157,19 +164,19 @@ module HTML5lib
      class TreeBuilder < Base::TreeBuilder
        def initialize
          @documentClass = Document
-          @doctypeClass = DocumentType
-          @elementClass = Element
-          @commentClass = CommentNode
+          @doctypeClass  = DocumentType
+          @elementClass  = Element
+          @commentClass  = CommentNode
          @fragmentClass = DocumentFragment
        end

        def testSerializer node
-          node.printTree()
+          node.printTree
        end

-        def getFragment
+        def get_fragment
          @document = super
-          return @document.childNodes
+          @document.childNodes
        end
      end

--- a/vendor/plugins/HTML5lib/lib/html5/treewalkers.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/treewalkers.rb
@ -0,0 +1,26 @@
+require 'html5/treewalkers/base'
+
+module HTML5
+  module TreeWalkers
+
+    class << self
+      def [](name)
+        case name.to_s.downcase
+        when 'simpletree'
+          require 'html5/treewalkers/simpletree'
+          SimpleTree::TreeWalker
+        when 'rexml'
+          require 'html5/treewalkers/rexml'
+          REXML::TreeWalker
+        when 'hpricot'
+          require 'html5/treewalkers/hpricot'
+          Hpricot::TreeWalker
+        else
+          raise "Unknown TreeWalker #{name}"
+        end
+      end
+
+      alias :get_tree_walker :[]
+    end
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5/treewalkers/base.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/treewalkers/base.rb
@ -0,0 +1,154 @@
+require 'html5/constants'
+module HTML5
+module TreeWalkers
+
+module TokenConstructor
+  def error(msg)
+    {:type => "SerializeError", :data => msg}
+  end
+
+  def normalize_attrs(attrs)
+    attrs.to_a
+  end
+
+  def empty_tag(name, attrs, has_children=false)
+    error(_("Void element has children")) if has_children
+    {:type => :EmptyTag, :name => name, :data => normalize_attrs(attrs)}
+  end
+
+  def start_tag(name, attrs)
+    {:type => :StartTag, :name => name, :data => normalize_attrs(attrs)}
+  end
+
+  def end_tag(name)
+    {:type => :EndTag, :name => name, :data => []}
+  end
+
+  def text(data)
+    if data =~ /\A([#{SPACE_CHARACTERS.join('')}]+)/m
+      yield({:type => :SpaceCharacters, :data => $1})
+      data = data[$1.length .. -1]
+      return if data.empty?
+    end
+
+    if data =~ /([#{SPACE_CHARACTERS.join('')}]+)\Z/m
+      yield({:type => :Characters, :data => data[0 ... -$1.length]})
+      yield({:type => :SpaceCharacters, :data => $1})
+    else
+      yield({:type => :Characters, :data => data})
+    end
+  end
+
+  def comment(data)
+    {:type => :Comment, :data => data}
+  end
+
+  def doctype(name, public_id, system_id, correct=nil)
+    {:type => :Doctype, :name => name, :public_id => public_id, :system_id => system_id, :correct => correct}
+  end
+
+  def unknown(nodeType)
+    error(_("Unknown node type: ") + nodeType.to_s)
+  end
+
+  def _(str)
+    str
+  end
+end
+
+class Base
+    include TokenConstructor
+
+    def initialize(tree)
+      @tree = tree
+    end
+
+    def each
+      raise NotImplementedError
+    end
+
+    alias walk each
+end
+
+class NonRecursiveTreeWalker < TreeWalkers::Base
+  def node_details(node)
+    raise NotImplementedError
+  end
+
+  def first_child(node)
+    raise NotImplementedError
+  end
+
+  def next_sibling(node)
+    raise NotImplementedError
+  end
+
+  def parent(node)
+    raise NotImplementedError
+  end
+
+  def each
+    current_node = @tree
+    while current_node != nil
+      details = node_details(current_node)
+      has_children = false
+
+      case details.shift
+      when :DOCTYPE
+        yield doctype(*details)
+
+      when :TEXT
+        text(*details) {|token| yield token}
+
+      when :ELEMENT
+        name, attributes, has_children = details
+        if VOID_ELEMENTS.include?(name)
+          yield empty_tag(name, attributes.to_a, has_children)
+          has_children = false
+        else
+          yield start_tag(name, attributes.to_a)
+        end
+
+      when :COMMENT
+        yield comment(details[0])
+
+      when :DOCUMENT, :DOCUMENT_FRAGMENT
+        has_children = true
+
+      when nil
+        # ignore (REXML::XMLDecl is an example)
+
+      else
+        yield unknown(details[0])
+      end
+
+      first_child = has_children ? first_child(current_node) : nil
+      if first_child != nil
+        current_node = first_child
+      else
+        while current_node != nil
+          details = node_details(current_node)
+          if details.shift == :ELEMENT
+            name, attributes, has_children = details
+            yield end_tag(name) if !VOID_ELEMENTS.include?(name)
+          end
+
+          if @tree == current_node
+            current_node = nil
+          else
+            next_sibling = next_sibling(current_node)
+            if next_sibling != nil
+              current_node = next_sibling
+              break
+            end
+
+            current_node = parent(current_node)
+          end
+        end
+      end
+    end
+  end
+end
+
+end
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/hpricot.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/hpricot.rb
@ -1,10 +1,10 @@
-require 'html5lib/treewalkers/base'
+require 'html5/treewalkers/base'
 require 'rexml/document'

-module HTML5lib
+module HTML5
  module TreeWalkers
    module Hpricot
-      class TreeWalker < HTML5lib::TreeWalkers::NonRecursiveTreeWalker
+      class TreeWalker < HTML5::TreeWalkers::NonRecursiveTreeWalker

        def node_details(node)
          case node
@ -13,17 +13,17 @@ module HTML5lib
              [:DOCUMENT_FRAGMENT]
            else
              [:ELEMENT, node.name,
-                node.attributes.map {|name,value| [name,value]},
+                node.attributes.map {|name, value| [name, value]},
                !node.empty?]
            end
          when ::Hpricot::Text
-            [:TEXT, node.to_plain_text]
+            [:TEXT, node.content]
          when ::Hpricot::Comment
            [:COMMENT, node.content]
          when ::Hpricot::Doc
            [:DOCUMENT]
          when ::Hpricot::DocType
-            [:DOCTYPE, node.target]
+            [:DOCTYPE, node.target, node.public_id, node.system_id]
          when ::Hpricot::XMLDecl
            [nil]
          else
--- a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/rexml.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/rexml.rb
@ -1,10 +1,10 @@
-require 'html5lib/treewalkers/base'
+require 'html5/treewalkers/base'
 require 'rexml/document'

-module HTML5lib
+module HTML5
  module TreeWalkers
    module REXML
-      class TreeWalker < HTML5lib::TreeWalkers::NonRecursiveTreeWalker
+      class TreeWalker < HTML5::TreeWalkers::NonRecursiveTreeWalker

        def node_details(node)
          case node
@ -23,7 +23,7 @@ module HTML5lib
          when ::REXML::Comment
            [:COMMENT, node.string]
          when ::REXML::DocType
-            [:DOCTYPE, node.name]
+            [:DOCTYPE, node.name, node.public, node.system]
          when ::REXML::XMLDecl
            [nil]
          else
--- a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/simpletree.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/simpletree.rb
@ -1,10 +1,10 @@
-require 'html5lib/treewalkers/base'
+require 'html5/treewalkers/base'

-module HTML5lib
+module HTML5
  module TreeWalkers
    module SimpleTree
-      class TreeWalker < HTML5lib::TreeWalkers::Base
-        include HTML5lib::TreeBuilders::SimpleTree
+      class TreeWalker < HTML5::TreeWalkers::Base
+        include HTML5::TreeBuilders::SimpleTree

        def walk(node)
          case node
@ -12,20 +12,20 @@ module HTML5lib
            return

          when DocumentType
-            yield doctype(node.name)
+            yield doctype(node.name, node.public_id, node.system_id)

          when TextNode
            text(node.value) {|token| yield token}

          when Element
            if VOID_ELEMENTS.include?(node.name)
-              yield emptyTag(node.name, node.attributes, node.hasContent())
+              yield empty_tag(node.name, node.attributes, node.hasContent())
            else
-              yield startTag(node.name, node.attributes)
+              yield start_tag(node.name, node.attributes)
              for child in node.childNodes
                walk(child) {|token| yield token}
              end
-              yield endTag(node.name)
+              yield end_tag(node.name)
            end

          when CommentNode
--- a/vendor/plugins/HTML5lib/lib/html5/version.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/version.rb
@ -0,0 +1,3 @@
+module HTML5
+  VERSION = '0.1.0'
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib.rb
@ -1,11 +0,0 @@
-require 'html5lib/html5parser'
-
-module HTML5lib
-    def self.parse(stream, options={})
-        HTMLParser.parse(stream, options)
-    end
-
-    def self.parseFragment(stream, options={})
-        HTMLParser.parse(stream, options)
-    end
-end
--- a/vendor/plugins/HTML5lib/lib/html5lib/constants.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/constants.rb
@ -1,708 +0,0 @@
-module HTML5lib
-
-  class EOF < Exception; end
-
-  CONTENT_MODEL_FLAGS = [
-      :PCDATA,
-      :RCDATA,
-      :CDATA,
-      :PLAINTEXT
-  ]
-
-  SCOPING_ELEMENTS = %w[
-      button
-      caption
-      html
-      marquee
-      object
-      table
-      td
-      th
-  ]
-
-  FORMATTING_ELEMENTS = %w[
-      a
-      b
-      big
-      em
-      font
-      i
-      nobr
-      s
-      small
-      strike
-      strong
-      tt
-      u
-  ]
-
-  SPECIAL_ELEMENTS = %w[
-      address
-      area
-      base
-      basefont
-      bgsound
-      blockquote
-      body
-      br
-      center
-      col
-      colgroup
-      dd
-      dir
-      div
-      dl
-      dt
-      embed
-      fieldset
-      form
-      frame
-      frameset
-      h1
-      h2
-      h3
-      h4
-      h5
-      h6
-      head
-      hr
-      iframe
-      image
-      img
-      input
-      isindex
-      li
-      link
-      listing
-      menu
-      meta
-      noembed
-      noframes
-      noscript
-      ol
-      optgroup
-      option
-      p
-      param
-      plaintext
-      pre
-      script
-      select
-      spacer
-      style
-      tbody
-      textarea
-      tfoot
-      thead
-      title
-      tr
-      ul
-      wbr
-  ]
-
-  SPACE_CHARACTERS = %W[
-      \t
-      \n
-      \x0B
-      \x0C
-      \x20
-      \r
-  ]
-
-  TABLE_INSERT_MODE_ELEMENTS = %w[
-      table
-      tbody
-      tfoot
-      thead
-      tr
-  ]
-
-  ASCII_LOWERCASE = ('a'..'z').to_a.join('')
-  ASCII_UPPERCASE = ('A'..'Z').to_a.join('')
-  ASCII_LETTERS = ASCII_LOWERCASE + ASCII_UPPERCASE
-  DIGITS = '0'..'9'
-  HEX_DIGITS = DIGITS.to_a + ('a'..'f').to_a + ('A'..'F').to_a
-
-  # Heading elements need to be ordered 
-  HEADING_ELEMENTS = %w[
-      h1
-      h2
-      h3
-      h4
-      h5
-      h6
-  ]
-
-  # XXX What about event-source and command?
-  VOID_ELEMENTS = %w[
-      base
-      link
-      meta
-      hr
-      br
-      img
-      embed
-      param
-      area
-      col
-      input
-  ]
-
-  CDATA_ELEMENTS = %w[title textarea]
-
-  RCDATA_ELEMENTS = %w[
-    style
-    script
-    xmp
-    iframe
-    noembed
-    noframes
-    noscript
-  ]
-
-  BOOLEAN_ATTRIBUTES = {
-    :global => %w[irrelevant],
-    'style' => %w[scoped],
-    'img' => %w[ismap],
-    'audio' => %w[autoplay controls],
-    'video' => %w[autoplay controls],
-    'script' => %w[defer async],
-    'details' => %w[open],
-    'datagrid' => %w[multiple disabled],
-    'command' => %w[hidden disabled checked default],
-    'menu' => %w[autosubmit],
-    'fieldset' => %w[disabled readonly],
-    'option' => %w[disabled readonly selected],
-    'optgroup' => %w[disabled readonly],
-    'button' => %w[disabled autofocus],
-    'input' => %w[disabled readonly required autofocus checked ismap],
-    'select' => %w[disabled readonly autofocus multiple],
-    'output' => %w[disabled readonly]
-  }
-
-  # entitiesWindows1252 has to be _ordered_ and needs to have an index.
-  ENTITIES_WINDOWS1252 = [
-      8364,  # 0x80  0x20AC  EURO SIGN
-      65533, # 0x81          UNDEFINED
-      8218,  # 0x82  0x201A  SINGLE LOW-9 QUOTATION MARK
-      402,   # 0x83  0x0192  LATIN SMALL LETTER F WITH HOOK
-      8222,  # 0x84  0x201E  DOUBLE LOW-9 QUOTATION MARK
-      8230,  # 0x85  0x2026  HORIZONTAL ELLIPSIS
-      8224,  # 0x86  0x2020  DAGGER
-      8225,  # 0x87  0x2021  DOUBLE DAGGER
-      710,   # 0x88  0x02C6  MODIFIER LETTER CIRCUMFLEX ACCENT
-      8240,  # 0x89  0x2030  PER MILLE SIGN
-      352,   # 0x8A  0x0160  LATIN CAPITAL LETTER S WITH CARON
-      8249,  # 0x8B  0x2039  SINGLE LEFT-POINTING ANGLE QUOTATION MARK
-      338,   # 0x8C  0x0152  LATIN CAPITAL LIGATURE OE
-      65533, # 0x8D          UNDEFINED
-      381,   # 0x8E  0x017D  LATIN CAPITAL LETTER Z WITH CARON
-      65533, # 0x8F          UNDEFINED
-      65533, # 0x90          UNDEFINED
-      8216,  # 0x91  0x2018  LEFT SINGLE QUOTATION MARK
-      8217,  # 0x92  0x2019  RIGHT SINGLE QUOTATION MARK
-      8220,  # 0x93  0x201C  LEFT DOUBLE QUOTATION MARK
-      8221,  # 0x94  0x201D  RIGHT DOUBLE QUOTATION MARK
-      8226,  # 0x95  0x2022  BULLET
-      8211,  # 0x96  0x2013  EN DASH
-      8212,  # 0x97  0x2014  EM DASH
-      732,   # 0x98  0x02DC  SMALL TILDE
-      8482,  # 0x99  0x2122  TRADE MARK SIGN
-      353,   # 0x9A  0x0161  LATIN SMALL LETTER S WITH CARON
-      8250,  # 0x9B  0x203A  SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
-      339,   # 0x9C  0x0153  LATIN SMALL LIGATURE OE
-      65533, # 0x9D          UNDEFINED
-      382,   # 0x9E  0x017E  LATIN SMALL LETTER Z WITH CARON
-      376    # 0x9F  0x0178  LATIN CAPITAL LETTER Y WITH DIAERESIS
-  ]
-
-  private
-
-    def self.U n
-      [n].pack('U')
-    end
-
-  public
-
-  ENTITIES = {
-      "AElig" => U(0xC6),
-      "Aacute" => U(0xC1),
-      "Acirc" => U(0xC2),
-      "Agrave" => U(0xC0),
-      "Alpha" => U(0x0391),
-      "Aring" => U(0xC5),
-      "Atilde" => U(0xC3),
-      "Auml" => U(0xC4),
-      "Beta" => U(0x0392),
-      "Ccedil" => U(0xC7),
-      "Chi" => U(0x03A7),
-      "Dagger" => U(0x2021),
-      "Delta" => U(0x0394),
-      "ETH" => U(0xD0),
-      "Eacute" => U(0xC9),
-      "Ecirc" => U(0xCA),
-      "Egrave" => U(0xC8),
-      "Epsilon" => U(0x0395),
-      "Eta" => U(0x0397),
-      "Euml" => U(0xCB),
-      "Gamma" => U(0x0393),
-      "Iacute" => U(0xCD),
-      "Icirc" => U(0xCE),
-      "Igrave" => U(0xCC),
-      "Iota" => U(0x0399),
-      "Iuml" => U(0xCF),
-      "Kappa" => U(0x039A),
-      "Lambda" => U(0x039B),
-      "Mu" => U(0x039C),
-      "Ntilde" => U(0xD1),
-      "Nu" => U(0x039D),
-      "OElig" => U(0x0152),
-      "Oacute" => U(0xD3),
-      "Ocirc" => U(0xD4),
-      "Ograve" => U(0xD2),
-      "Omega" => U(0x03A9),
-      "Omicron" => U(0x039F),
-      "Oslash" => U(0xD8),
-      "Otilde" => U(0xD5),
-      "Ouml" => U(0xD6),
-      "Phi" => U(0x03A6),
-      "Pi" => U(0x03A0),
-      "Prime" => U(0x2033),
-      "Psi" => U(0x03A8),
-      "Rho" => U(0x03A1),
-      "Scaron" => U(0x0160),
-      "Sigma" => U(0x03A3),
-      "THORN" => U(0xDE),
-      "Tau" => U(0x03A4),
-      "Theta" => U(0x0398),
-      "Uacute" => U(0xDA),
-      "Ucirc" => U(0xDB),
-      "Ugrave" => U(0xD9),
-      "Upsilon" => U(0x03A5),
-      "Uuml" => U(0xDC),
-      "Xi" => U(0x039E),
-      "Yacute" => U(0xDD),
-      "Yuml" => U(0x0178),
-      "Zeta" => U(0x0396),
-      "aacute" => U(0xE1),
-      "acirc" => U(0xE2),
-      "acute" => U(0xB4),
-      "aelig" => U(0xE6),
-      "agrave" => U(0xE0),
-      "alefsym" => U(0x2135),
-      "alpha" => U(0x03B1),
-      "amp" => U(0x26),
-      "AMP" => U(0x26),
-      "and" => U(0x2227),
-      "ang" => U(0x2220),
-      "apos" => U(0x27),
-      "aring" => U(0xE5),
-      "asymp" => U(0x2248),
-      "atilde" => U(0xE3),
-      "auml" => U(0xE4),
-      "bdquo" => U(0x201E),
-      "beta" => U(0x03B2),
-      "brvbar" => U(0xA6),
-      "bull" => U(0x2022),
-      "cap" => U(0x2229),
-      "ccedil" => U(0xE7),
-      "cedil" => U(0xB8),
-      "cent" => U(0xA2),
-      "chi" => U(0x03C7),
-      "circ" => U(0x02C6),
-      "clubs" => U(0x2663),
-      "cong" => U(0x2245),
-      "copy" => U(0xA9),
-      "COPY" => U(0xA9),
-      "crarr" => U(0x21B5),
-      "cup" => U(0x222A),
-      "curren" => U(0xA4),
-      "dArr" => U(0x21D3),
-      "dagger" => U(0x2020),
-      "darr" => U(0x2193),
-      "deg" => U(0xB0),
-      "delta" => U(0x03B4),
-      "diams" => U(0x2666),
-      "divide" => U(0xF7),
-      "eacute" => U(0xE9),
-      "ecirc" => U(0xEA),
-      "egrave" => U(0xE8),
-      "empty" => U(0x2205),
-      "emsp" => U(0x2003),
-      "ensp" => U(0x2002),
-      "epsilon" => U(0x03B5),
-      "equiv" => U(0x2261),
-      "eta" => U(0x03B7),
-      "eth" => U(0xF0),
-      "euml" => U(0xEB),
-      "euro" => U(0x20AC),
-      "exist" => U(0x2203),
-      "fnof" => U(0x0192),
-      "forall" => U(0x2200),
-      "frac12" => U(0xBD),
-      "frac14" => U(0xBC),
-      "frac34" => U(0xBE),
-      "frasl" => U(0x2044),
-      "gamma" => U(0x03B3),
-      "ge" => U(0x2265),
-      "gt" => U(0x3E),
-      "GT" => U(0x3E),
-      "hArr" => U(0x21D4),
-      "harr" => U(0x2194),
-      "hearts" => U(0x2665),
-      "hellip" => U(0x2026),
-      "iacute" => U(0xED),
-      "icirc" => U(0xEE),
-      "iexcl" => U(0xA1),
-      "igrave" => U(0xEC),
-      "image" => U(0x2111),
-      "infin" => U(0x221E),
-      "int" => U(0x222B),
-      "iota" => U(0x03B9),
-      "iquest" => U(0xBF),
-      "isin" => U(0x2208),
-      "iuml" => U(0xEF),
-      "kappa" => U(0x03BA),
-      "lArr" => U(0x21D0),
-      "lambda" => U(0x03BB),
-      "lang" => U(0x2329),
-      "laquo" => U(0xAB),
-      "larr" => U(0x2190),
-      "lceil" => U(0x2308),
-      "ldquo" => U(0x201C),
-      "le" => U(0x2264),
-      "lfloor" => U(0x230A),
-      "lowast" => U(0x2217),
-      "loz" => U(0x25CA),
-      "lrm" => U(0x200E),
-      "lsaquo" => U(0x2039),
-      "lsquo" => U(0x2018),
-      "lt" => U(0x3C),
-      "LT" => U(0x3C),
-      "macr" => U(0xAF),
-      "mdash" => U(0x2014),
-      "micro" => U(0xB5),
-      "middot" => U(0xB7),
-      "minus" => U(0x2212),
-      "mu" => U(0x03BC),
-      "nabla" => U(0x2207),
-      "nbsp" => U(0xA0),
-      "ndash" => U(0x2013),
-      "ne" => U(0x2260),
-      "ni" => U(0x220B),
-      "not" => U(0xAC),
-      "notin" => U(0x2209),
-      "nsub" => U(0x2284),
-      "ntilde" => U(0xF1),
-      "nu" => U(0x03BD),
-      "oacute" => U(0xF3),
-      "ocirc" => U(0xF4),
-      "oelig" => U(0x0153),
-      "ograve" => U(0xF2),
-      "oline" => U(0x203E),
-      "omega" => U(0x03C9),
-      "omicron" => U(0x03BF),
-      "oplus" => U(0x2295),
-      "or" => U(0x2228),
-      "ordf" => U(0xAA),
-      "ordm" => U(0xBA),
-      "oslash" => U(0xF8),
-      "otilde" => U(0xF5),
-      "otimes" => U(0x2297),
-      "ouml" => U(0xF6),
-      "para" => U(0xB6),
-      "part" => U(0x2202),
-      "permil" => U(0x2030),
-      "perp" => U(0x22A5),
-      "phi" => U(0x03C6),
-      "pi" => U(0x03C0),
-      "piv" => U(0x03D6),
-      "plusmn" => U(0xB1),
-      "pound" => U(0xA3),
-      "prime" => U(0x2032),
-      "prod" => U(0x220F),
-      "prop" => U(0x221D),
-      "psi" => U(0x03C8),
-      "quot" => U(0x22),
-      "QUOT" => U(0x22),
-      "rArr" => U(0x21D2),
-      "radic" => U(0x221A),
-      "rang" => U(0x232A),
-      "raquo" => U(0xBB),
-      "rarr" => U(0x2192),
-      "rceil" => U(0x2309),
-      "rdquo" => U(0x201D),
-      "real" => U(0x211C),
-      "reg" => U(0xAE),
-      "REG" => U(0xAE),
-      "rfloor" => U(0x230B),
-      "rho" => U(0x03C1),
-      "rlm" => U(0x200F),
-      "rsaquo" => U(0x203A),
-      "rsquo" => U(0x2019),
-      "sbquo" => U(0x201A),
-      "scaron" => U(0x0161),
-      "sdot" => U(0x22C5),
-      "sect" => U(0xA7),
-      "shy" => U(0xAD),
-      "sigma" => U(0x03C3),
-      "sigmaf" => U(0x03C2),
-      "sim" => U(0x223C),
-      "spades" => U(0x2660),
-      "sub" => U(0x2282),
-      "sube" => U(0x2286),
-      "sum" => U(0x2211),
-      "sup" => U(0x2283),
-      "sup1" => U(0xB9),
-      "sup2" => U(0xB2),
-      "sup3" => U(0xB3),
-      "supe" => U(0x2287),
-      "szlig" => U(0xDF),
-      "tau" => U(0x03C4),
-      "there4" => U(0x2234),
-      "theta" => U(0x03B8),
-      "thetasym" => U(0x03D1),
-      "thinsp" => U(0x2009),
-      "thorn" => U(0xFE),
-      "tilde" => U(0x02DC),
-      "times" => U(0xD7),
-      "trade" => U(0x2122),
-      "uArr" => U(0x21D1),
-      "uacute" => U(0xFA),
-      "uarr" => U(0x2191),
-      "ucirc" => U(0xFB),
-      "ugrave" => U(0xF9),
-      "uml" => U(0xA8),
-      "upsih" => U(0x03D2),
-      "upsilon" => U(0x03C5),
-      "uuml" => U(0xFC),
-      "weierp" => U(0x2118),
-      "xi" => U(0x03BE),
-      "yacute" => U(0xFD),
-      "yen" => U(0xA5),
-      "yuml" => U(0xFF),
-      "zeta" => U(0x03B6),
-      "zwj" => U(0x200D),
-      "zwnj" => U(0x200C)
-  }
-
-  ENCODINGS = %w[
-      ansi_x3.4-1968
-      iso-ir-6
-      ansi_x3.4-1986
-      iso_646.irv:1991
-      ascii
-      iso646-us
-      us-ascii
-      us
-      ibm367
-      cp367
-      csascii
-      ks_c_5601-1987
-      korean
-      iso-2022-kr
-      csiso2022kr
-      euc-kr
-      iso-2022-jp
-      csiso2022jp
-      iso-2022-jp-2
-      iso-ir-58
-      chinese
-      csiso58gb231280
-      iso_8859-1:1987
-      iso-ir-100
-      iso_8859-1
-      iso-8859-1
-      latin1
-      l1
-      ibm819
-      cp819
-      csisolatin1
-      iso_8859-2:1987
-      iso-ir-101
-      iso_8859-2
-      iso-8859-2
-      latin2
-      l2
-      csisolatin2
-      iso_8859-3:1988
-      iso-ir-109
-      iso_8859-3
-      iso-8859-3
-      latin3
-      l3
-      csisolatin3
-      iso_8859-4:1988
-      iso-ir-110
-      iso_8859-4
-      iso-8859-4
-      latin4
-      l4
-      csisolatin4
-      iso_8859-6:1987
-      iso-ir-127
-      iso_8859-6
-      iso-8859-6
-      ecma-114
-      asmo-708
-      arabic
-      csisolatinarabic
-      iso_8859-7:1987
-      iso-ir-126
-      iso_8859-7
-      iso-8859-7
-      elot_928
-      ecma-118
-      greek
-      greek8
-      csisolatingreek
-      iso_8859-8:1988
-      iso-ir-138
-      iso_8859-8
-      iso-8859-8
-      hebrew
-      csisolatinhebrew
-      iso_8859-5:1988
-      iso-ir-144
-      iso_8859-5
-      iso-8859-5
-      cyrillic
-      csisolatincyrillic
-      iso_8859-9:1989
-      iso-ir-148
-      iso_8859-9
-      iso-8859-9
-      latin5
-      l5
-      csisolatin5
-      iso-8859-10
-      iso-ir-157
-      l6
-      iso_8859-10:1992
-      csisolatin6
-      latin6
-      hp-roman8
-      roman8
-      r8
-      ibm037
-      cp037
-      csibm037
-      ibm424
-      cp424
-      csibm424
-      ibm437
-      cp437
-      437
-      cspc8codepage437
-      ibm500
-      cp500
-      csibm500
-      ibm775
-      cp775
-      cspc775baltic
-      ibm850
-      cp850
-      850
-      cspc850multilingual
-      ibm852
-      cp852
-      852
-      cspcp852
-      ibm855
-      cp855
-      855
-      csibm855
-      ibm857
-      cp857
-      857
-      csibm857
-      ibm860
-      cp860
-      860
-      csibm860
-      ibm861
-      cp861
-      861
-      cp-is
-      csibm861
-      ibm862
-      cp862
-      862
-      cspc862latinhebrew
-      ibm863
-      cp863
-      863
-      csibm863
-      ibm864
-      cp864
-      csibm864
-      ibm865
-      cp865
-      865
-      csibm865
-      ibm866
-      cp866
-      866
-      csibm866
-      ibm869
-      cp869
-      869
-      cp-gr
-      csibm869
-      ibm1026
-      cp1026
-      csibm1026
-      koi8-r
-      cskoi8r
-      koi8-u
-      big5-hkscs
-      ptcp154
-      csptcp154
-      pt154
-      cp154
-      utf-7
-      utf-16be
-      utf-16le
-      utf-16
-      utf-8
-      iso-8859-13
-      iso-8859-14
-      iso-ir-199
-      iso_8859-14:1998
-      iso_8859-14
-      latin8
-      iso-celtic
-      l8
-      iso-8859-15
-      iso_8859-15
-      iso-8859-16
-      iso-ir-226
-      iso_8859-16:2001
-      iso_8859-16
-      latin10
-      l10
-      gbk
-      cp936
-      ms936
-      gb18030
-      shift_jis
-      ms_kanji
-      csshiftjis
-      euc-jp
-      gb2312
-      big5
-      csbig5
-      windows-1250
-      windows-1251
-      windows-1252
-      windows-1253
-      windows-1254
-      windows-1255
-      windows-1256
-      windows-1257
-      windows-1258
-      tis-620
-      hz-gb-2312
-  ]
-
-end
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/filters.rb
@ -1 +0,0 @@
-require 'html5lib/filters/optionaltags'
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_frameset_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_frameset_phase.rb
@ -1,57 +0,0 @@
-require 'html5lib/html5parser/phase'
-
-module HTML5lib
-  class InFramesetPhase < Phase
-
-    # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
-
-    handle_start 'html', 'frameset', 'frame', 'noframes'
-
-    handle_end 'frameset', 'noframes'
-
-    def processCharacters(data)
-      @parser.parseError(_('Unexpected characters in the frameset phase. Characters ignored.'))
-    end
-
-    def startTagFrameset(name, attributes)
-      @tree.insertElement(name, attributes)
-    end
-
-    def startTagFrame(name, attributes)
-      @tree.insertElement(name, attributes)
-      @tree.openElements.pop
-    end
-
-    def startTagNoframes(name, attributes)
-      @parser.phases[:inBody].processStartTag(name, attributes)
-    end
-
-    def startTagOther(name, attributes)
-      @parser.parseError(_("Unexpected start tag token (#{name}) in the frameset phase. Ignored"))
-    end
-
-    def endTagFrameset(name)
-      if @tree.openElements[-1].name == 'html'
-        # innerHTML case
-        @parser.parseError(_("Unexpected end tag token (frameset) in the frameset phase (innerHTML)."))
-      else
-        @tree.openElements.pop
-      end
-      if (not @parser.innerHTML and
-        @tree.openElements[-1].name != 'frameset')
-        # If we're not in innerHTML mode and the the current node is not a
-        # "frameset" element (anymore) then switch.
-        @parser.phase = @parser.phases[:afterFrameset]
-      end
-    end
-
-    def endTagNoframes(name)
-      @parser.phases[:inBody].processEndTag(name)
-    end
-
-    def endTagOther(name)
-      @parser.parseError(_("Unexpected end tag token (#{name}) in the frameset phase. Ignored."))
-    end
-
-  end
-end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_head_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_head_phase.rb
@ -1,126 +0,0 @@
-require 'html5lib/html5parser/phase'
-
-module HTML5lib
-  class InHeadPhase < Phase
-
-    handle_start 'html', 'head', 'title', 'style', 'script', %w( base link meta )
-
-    handle_end 'head'
-    handle_end %w( html body br ) => 'ImplyAfterHead'
-    handle_end %w( title style script )
-
-    def processEOF
-      if ['title', 'style', 'script'].include?(name = @tree.openElements[-1].name)
-        @parser.parseError(_("Unexpected end of file. Expected end tag (#{name})."))
-        @tree.openElements.pop
-      end
-      anythingElse
-      @parser.phase.processEOF
-    end
-
-    def processCharacters(data)
-      if ['title', 'style', 'script'].include?(@tree.openElements[-1].name)
-        @tree.insertText(data)
-      else
-        anythingElse
-        @parser.phase.processCharacters(data)
-      end
-    end
-
-    def startTagHead(name, attributes)
-      @parser.parseError(_('Unexpected start tag head in existing head. Ignored'))
-    end
-
-    def startTagTitle(name, attributes)
-      element = @tree.createElement(name, attributes)
-      appendToHead(element)
-      @tree.openElements.push(element)
-      @parser.tokenizer.contentModelFlag = :RCDATA
-    end
-
-    def startTagStyle(name, attributes)
-      element = @tree.createElement(name, attributes)
-      if @tree.headPointer != nil and @parser.phase == @parser.phases[:inHead]
-        appendToHead(element)
-      else
-        @tree.openElements[-1].appendChild(element)
-      end
-      @tree.openElements.push(element)
-      @parser.tokenizer.contentModelFlag = :CDATA
-    end
-
-    def startTagScript(name, attributes)
-      #XXX Inner HTML case may be wrong
-      element = @tree.createElement(name, attributes)
-      element._flags.push("parser-inserted")
-      if (@tree.headPointer != nil and
-        @parser.phase == @parser.phases[:inHead])
-        appendToHead(element)
-      else
-        @tree.openElements[-1].appendChild(element)
-      end
-      @tree.openElements.push(element)
-      @parser.tokenizer.contentModelFlag = :CDATA
-    end
-
-    def startTagBaseLinkMeta(name, attributes)
-      element = @tree.createElement(name, attributes)
-      if @tree.headPointer != nil and @parser.phase == @parser.phases[:inHead]
-        appendToHead(element)
-      else
-        @tree.openElements[-1].appendChild(element)
-      end
-    end
-
-    def startTagOther(name, attributes)
-      anythingElse
-      @parser.phase.processStartTag(name, attributes)
-    end
-
-    def endTagHead(name)
-      if @tree.openElements[-1].name == 'head'
-        @tree.openElements.pop
-      else
-        @parser.parseError(_("Unexpected end tag (head). Ignored."))
-      end
-      @parser.phase = @parser.phases[:afterHead]
-    end
-
-    def endTagImplyAfterHead(name)
-      anythingElse
-      @parser.phase.processEndTag(name)
-    end
-
-    def endTagTitleStyleScript(name)
-      if @tree.openElements[-1].name == name
-        @tree.openElements.pop
-      else
-        @parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
-      end
-    end
-
-    def endTagOther(name)
-      @parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
-    end
-
-    def anythingElse
-      if @tree.openElements[-1].name == 'head'
-        endTagHead('head')
-      else
-        @parser.phase = @parser.phases[:afterHead]
-      end
-    end
-
-    protected
-
-    def appendToHead(element)
-      if @tree.headPointer.nil?
-        assert @parser.innerHTML
-        @tree.openElements[-1].appendChild(element)
-      else
-        @tree.headPointer.appendChild(element)
-      end
-    end
-
-  end
-end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_select_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_select_phase.rb
@ -1,84 +0,0 @@
-require 'html5lib/html5parser/phase'
-
-module HTML5lib
-  class InSelectPhase < Phase
-
-    # http://www.whatwg.org/specs/web-apps/current-work/#in-select
-
-    handle_start 'html', 'option', 'optgroup', 'select'
-
-    handle_end 'option', 'optgroup', 'select', %w( caption table tbody tfoot thead tr td th ) => 'TableElements'
-
-    def processCharacters(data)
-      @tree.insertText(data)
-    end
-
-    def startTagOption(name, attributes)
-      # We need to imply </option> if <option> is the current node.
-      @tree.openElements.pop if @tree.openElements[-1].name == 'option'
-      @tree.insertElement(name, attributes)
-    end
-
-    def startTagOptgroup(name, attributes)
-      @tree.openElements.pop if @tree.openElements[-1].name == 'option'
-      @tree.openElements.pop if @tree.openElements[-1].name == 'optgroup'
-      @tree.insertElement(name, attributes)
-    end
-
-    def startTagSelect(name, attributes)
-      @parser.parseError(_('Unexpected start tag (select) in the select phase implies select start tag.'))
-      endTagSelect('select')
-    end
-
-    def startTagOther(name, attributes)
-      @parser.parseError(_('Unexpected start tag token (#{name}) in the select phase. Ignored.'))
-    end
-
-    def endTagOption(name)
-      if @tree.openElements[-1].name == 'option'
-        @tree.openElements.pop
-      else
-        @parser.parseError(_('Unexpected end tag (option) in the select phase. Ignored.'))
-      end
-    end
-
-    def endTagOptgroup(name)
-      # </optgroup> implicitly closes <option>
-      if @tree.openElements[-1].name == 'option' and @tree.openElements[-2].name == 'optgroup'
-        @tree.openElements.pop
-      end
-      # It also closes </optgroup>
-      if @tree.openElements[-1].name == 'optgroup'
-        @tree.openElements.pop
-      # But nothing else
-      else
-        @parser.parseError(_('Unexpected end tag (optgroup) in the select phase. Ignored.'))
-      end
-    end
-
-    def endTagSelect(name)
-      if in_scope?('select', true)
-        remove_open_elements_until('select')
-
-        @parser.resetInsertionMode
-      else
-        # innerHTML case
-        @parser.parseError
-      end
-    end
-
-    def endTagTableElements(name)
-      @parser.parseError(_("Unexpected table end tag (#{name}) in the select phase."))
-
-      if in_scope?(name, true)
-        endTagSelect('select')
-        @parser.phase.processEndTag(name)
-      end
-    end
-
-    def endTagOther(name)
-      @parser.parseError(_("Unexpected end tag token (#{name}) in the select phase. Ignored."))
-    end
-
-  end
-end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/trailing_end_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/trailing_end_phase.rb
@ -1,36 +0,0 @@
-require 'html5lib/html5parser/phase'
-
-module HTML5lib
-  class TrailingEndPhase < Phase
-
-    def processEOF
-    end
-
-    def processComment(data)
-      @tree.insertComment(data, @tree.document)
-    end
-
-    def processSpaceCharacters(data)
-      @parser.lastPhase.processSpaceCharacters(data)
-    end
-
-    def processCharacters(data)
-      @parser.parseError(_('Unexpected non-space characters. Expected end of file.'))
-      @parser.phase = @parser.lastPhase
-      @parser.phase.processCharacters(data)
-    end
-
-    def processStartTag(name, attributes)
-      @parser.parseError(_('Unexpected start tag (#{name}). Expected end of file.'))
-      @parser.phase = @parser.lastPhase
-      @parser.phase.processStartTag(name, attributes)
-    end
-
-    def processEndTag(name)
-      @parser.parseError(_('Unexpected end tag (#{name}). Expected end of file.'))
-      @parser.phase = @parser.lastPhase
-      @parser.phase.processEndTag(name)
-    end
-
-  end
-end
--- a/vendor/plugins/HTML5lib/lib/html5lib/serializer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/serializer.rb
@ -1,2 +0,0 @@
-require 'html5lib/serializer/htmlserializer'
-require 'html5lib/serializer/xhtmlserializer'
--- a/vendor/plugins/HTML5lib/lib/html5lib/serializer/xhtmlserializer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/serializer/xhtmlserializer.rb
@ -1,19 +0,0 @@
-require 'html5lib/serializer/htmlserializer'
-
-module HTML5lib
-
-  class XHTMLSerializer < HTMLSerializer
-    DEFAULTS = {
-      :quote_attr_values => true,
-      :minimize_boolean_attributes => false,
-      :use_trailing_solidus => true,
-      :escape_lt_in_attrs => true,
-      :omit_optional_tags => false
-    }
-
-    def initialize(options={})
-      super(DEFAULTS.clone.update(options))
-    end
-  end
-
-end
--- a/vendor/plugins/HTML5lib/lib/html5lib/tokenizer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/tokenizer.rb
--- a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treewalkers.rb
@ -1,26 +0,0 @@
-require 'html5lib/treewalkers/base'
-
-module HTML5lib
-  module TreeWalkers
-
-    class << self
-      def [](name)
-        case name.to_s.downcase
-        when 'simpletree' then
-          require 'html5lib/treewalkers/simpletree'
-          SimpleTree::TreeWalker
-        when 'rexml' then
-          require 'html5lib/treewalkers/rexml'
-          REXML::TreeWalker
-        when 'hpricot' then
-          require 'html5lib/treewalkers/hpricot'
-          Hpricot::TreeWalker
-        else
-          raise "Unknown TreeWalker #{name}"
-        end
-      end
-
-      alias :getTreeWalker :[]
-    end
-  end
-end
--- a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/base.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/base.rb
@ -1,156 +0,0 @@
-require 'html5lib/constants'
-module HTML5lib
-module TreeWalkers
-
-module TokenConstructor
-    def error(msg)
-        return {:type => "SerializeError", :data => msg}
-    end
-
-    def normalizeAttrs(attrs)
-        attrs.to_a
-    end
-
-    def emptyTag(name, attrs, hasChildren=false)
-        error(_("Void element has children")) if hasChildren
-        return({:type => :EmptyTag, :name => name, \
-                :data => normalizeAttrs(attrs)})
-    end
-
-    def startTag(name, attrs)
-        return {:type => :StartTag, :name => name, \
-                 :data => normalizeAttrs(attrs)}
-    end
-
-    def endTag(name)
-        return {:type => :EndTag, :name => name, :data => []}
-    end
-
-    def text(data)
-        if data =~ /\A([#{SPACE_CHARACTERS.join('')}]+)/m
-          yield({:type => :SpaceCharacters, :data => $1})
-          data = data[$1.length .. -1]
-          return if data.empty?
-        end
-
-        if data =~ /([#{SPACE_CHARACTERS.join('')}]+)\Z/m
-          yield({:type => :Characters, :data => data[0 ... -$1.length]})
-          yield({:type => :SpaceCharacters, :data => $1})
-        else
-          yield({:type => :Characters, :data => data})
-        end
-    end
-
-    def comment(data)
-        return {:type => :Comment, :data => data}
-    end
-
-    def doctype(name)
-        return {:type => :Doctype, :name => name, :data => name.upcase() == "HTML"}
-    end
-
-    def unknown(nodeType)
-        return error(_("Unknown node type: ") + nodeType.to_s)
-    end
-
-    def _(str)
-      str
-    end
-end
-
-class Base
-    include TokenConstructor
-
-    def initialize(tree)
-        @tree = tree
-    end
-
-    def each
-        raise NotImplementedError
-    end
-
-    alias walk each
-end
-
-class NonRecursiveTreeWalker < TreeWalkers::Base
-    def node_details(node)
-        raise NotImplementedError
-    end
-
-    def first_child(node)
-        raise NotImplementedError
-    end
-
-    def next_sibling(node)
-        raise NotImplementedError
-    end
-
-    def parent(node)
-        raise NotImplementedError
-    end
-
-    def each
-        currentNode = @tree
-        while currentNode != nil
-            details = node_details(currentNode)
-            hasChildren = false
-
-            case details.shift
-            when :DOCTYPE
-                yield doctype(*details)
-
-            when :TEXT
-                text(*details) {|token| yield token}
-
-            when :ELEMENT
-                name, attributes, hasChildren = details
-                if VOID_ELEMENTS.include?(name)
-                    yield emptyTag(name, attributes.to_a, hasChildren)
-                    hasChildren = false
-                else
-                    yield startTag(name, attributes.to_a)
-                end
-
-            when :COMMENT
-                yield comment(details[0])
-
-            when :DOCUMENT, :DOCUMENT_FRAGMENT
-                hasChildren = true
-
-            when nil
-                # ignore (REXML::XMLDecl is an example)
-
-            else
-                yield unknown(details[0])
-            end
-
-            firstChild = hasChildren ? first_child(currentNode) : nil
-            if firstChild != nil
-                currentNode = firstChild
-            else
-                while currentNode != nil
-                    details = node_details(currentNode)
-                    if details.shift == :ELEMENT
-                        name, attributes, hasChildren = details
-                        yield endTag(name) if !VOID_ELEMENTS.include?(name)
-                    end
-
-                    if @tree == currentNode
-                        currentNode = nil
-                    else
-                        nextSibling = next_sibling(currentNode)
-                        if nextSibling != nil
-                            currentNode = nextSibling
-                            break
-                        end
-
-                        currentNode = parent(currentNode)
-                    end
-                end
-            end
-        end
-    end
-end
-
-end
-end
--- a/vendor/plugins/HTML5lib/parse.rb
+++ b/vendor/plugins/HTML5lib/parse.rb
@ -26,15 +26,15 @@ def parse(opts, args)
    exit(1)
  end

-  require 'html5lib/treebuilders'
-  treebuilder = HTML5lib::TreeBuilders[opts.treebuilder]
+  require 'html5/treebuilders'
+  treebuilder = HTML5::TreeBuilders[opts.treebuilder]

  if opts.output == :xml
-    require 'html5lib/liberalxmlparser'
-    p = HTML5lib::XHTMLParser.new(:tree=>treebuilder)
+    require 'html5/liberalxmlparser'
+    p = HTML5::XHTMLParser.new(:tree=>treebuilder)
  else
-    require 'html5lib/html5parser'
-    p = HTML5lib::HTMLParser.new(:tree=>treebuilder)
+    require 'html5/html5parser'
+    p = HTML5::HTMLParser.new(:tree=>treebuilder)
  end

  if opts.parsemethod == :parse
@ -70,10 +70,10 @@ def printOutput(parser, document, opts)
  when :xml
    print document
  when :html
-    require 'html5lib/treewalkers'
-    tokens = HTML5lib::TreeWalkers[opts.treebuilder].new(document)
-    require 'html5lib/serializer'
-    puts HTML5lib::HTMLSerializer.serialize(tokens, opts.serializer)
+    require 'html5/treewalkers'
+    tokens = HTML5::TreeWalkers[opts.treebuilder].new(document)
+    require 'html5/serializer'
+    puts HTML5::HTMLSerializer.serialize(tokens, opts.serializer)
  when :hilite
    print document.hilite
  when :tree
@ -188,6 +188,10 @@ opts = OptionParser.new do |opts|
    options.serializer[:escape_lt_in_attrs] = lt
  end

+  opts.on("--[no-]escape-rcdata", "Escape rcdata element values") do |rcdata|
+    options.serializer[:escape_rcdata] = rcdata
+  end
+
  opts.separator ""
  opts.separator "Other Options:"

--- a/vendor/plugins/HTML5lib/testdata/encoding/tests2.dat
+++ b/vendor/plugins/HTML5lib/testdata/encoding/tests2.dat
@ -33,7 +33,6 @@ EUC-jp
 #encoding
 EUC-jp

-
 #data
 <!-- -->
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
--- a/vendor/plugins/HTML5lib/testdata/serializer/core.test
+++ b/vendor/plugins/HTML5lib/testdata/serializer/core.test
@ -92,7 +92,8 @@

 {"description": "rcdata",
 "input": [["StartTag", "script", {}], ["Characters", "a<b>c&d"]],
- "expected": ["<script>a<b>c&d"]
+ "expected": ["<script>a<b>c&d"],
+ "xhtml": ["<script>a&lt;b&gt;c&amp;d"]
 },

 {"description": "doctype",
--- a/vendor/plugins/HTML5lib/testdata/serializer/options.test
+++ b/vendor/plugins/HTML5lib/testdata/serializer/options.test
@ -49,6 +49,12 @@
 "options": {"escape_lt_in_attrs": true},
 "input": [["StartTag", "a", {"title": "a<b>c&d"}]],
 "expected": ["<a title=\"a&lt;b>c&amp;d\">"]
+},
+
+{"description": "rcdata",
+ "options": {"escape_rcdata": true},
+ "input": [["StartTag", "script", {}], ["Characters", "a<b>c&d"]],
+ "expected": ["<script>a&lt;b&gt;c&amp;d"]
 }

 ]}
--- a/vendor/plugins/HTML5lib/testdata/serializer/whitespace.test
+++ b/vendor/plugins/HTML5lib/testdata/serializer/whitespace.test
@ -3,13 +3,13 @@
 {"description": "bare text with leading spaces",
 "options": {"strip_whitespace": true},
 "input": [["Characters", "\t\r\n\u000B\u000C foo"]],
- "expected": ["foo"]
+ "expected": [" foo"]
 },

 {"description": "bare text with trailing spaces",
 "options": {"strip_whitespace": true},
 "input": [["Characters", "foo \t\r\n\u000B\u000C"]],
- "expected": ["foo"]
+ "expected": ["foo "]
 },

 {"description": "bare text with inner spaces",
--- a/vendor/plugins/HTML5lib/testdata/sniffer/htmlOrFeed.json
+++ b/vendor/plugins/HTML5lib/testdata/sniffer/htmlOrFeed.json
@ -0,0 +1,43 @@
+[
+    {"type": "text/html", "input": ""},
+    {"type": "text/html", "input": "<!---->"},
+    {"type": "text/html", "input": "<!--asdfaslkjdf;laksjdf as;dkfjsd-->"},
+    {"type": "text/html", "input": "<!"},
+    {"type": "text/html", "input": "\t"},
+    {"type": "text/html", "input": "<!>"},
+    {"type": "text/html", "input": "<?"},
+    {"type": "text/html", "input": "<??>"},
+    {"type": "application/rss+xml", "input": "<rss"},
+    {"type": "application/atom+xml", "input": "<feed"},
+    {"type": "text/html", "input": "<html"},
+    {"type": "text/html", "input": "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n<html><head>\n<title>302 Found</title>\n</head><body>\n<h1>Found</h1>\n<p>The document has moved <a href=\"http://feeds.feedburner.com/gofug\">here</a>.</p>\n</body></html>\n"},
+    {"type": "text/html", "input": "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">\r\n<HTML><HEAD>\r\n   <link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/289619328/feed.css\" /><link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/431602649/feed.css\" />\r\n<link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/382549546/feed.css\" />\r\n<link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/314618017/feed.css\" /><META http-equiv=\"expires\" content="},
+    {"type": "text/html", "input": "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\r\n<html>\r\n<head>\r\n<title>Xiaxue - Chicken pie blogger.</title><meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\"><style type=\"text/css\">\r\n<style type=\"text/css\">\r\n<!--\r\nbody {\r\n background-color: #FFF2F2;\r\n}\r\n.style1 {font-family: Georgia, \"Times New Roman\", Times, serif}\r\n.style2 {\r\n color: #8a567c;\r\n font-size: 14px;\r\n font-family: Georgia, \"Times New Roman\", Times, serif;\r\n}\r"},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<head> \r\n<title>Google Operating System</title>\r\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"Description\" content=\"Unofficial news and tips about Google. A blog that watches Google's latest developments and the attempts to move your operating system online.\" />\r\n<meta name=\"generator\" c"},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<head>\r\n  <title>Assimilated Press</title>  <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"MSSmartTagsPreventParsing\" content=\"true\" />\r\n<meta name=\"generator\" content=\"Blogger\" />\r\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"Assimilated Press - Atom\" href=\"http://assimila"},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<head>\r\n  <title>PostSecret</title>\r\n<META name=\"keywords\" Content=\"secrets, postcard, secret, postcards, postsecret, postsecrets,online confessional, post secret, post secrets, artomatic, post a secret\"><META name=\"discription\" Content=\"See a Secret...Share a Secret\">  <meta http-equiv=\"Content-Type\" content=\"te"},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns='http://www.w3.org/1999/xhtml' xmlns:b='http://www.google.com/2005/gml/b' xmlns:data='http://www.google.com/2005/gml/data' xmlns:expr='http://www.google.com/2005/gml/expr'>\n  <head>\n    \n  <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'/>\n  <meta content='true' name='MSSmartTagsPreventParsing'/>\n  <meta content='blogger' name='generator'/>\n  <link rel=\"alternate\" typ"},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"ja\">\n<head profile=\"http://gmpg.org/xfn/11\"> \n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />  \n<title> CMS Lever</title><link rel=\"stylesheet\" type=\"text/css\" media=\"screen\" href=\"http://s.wordpress.com/wp-content/themes/pub/twenty-eight/2813.css\"/>\n<link rel=\"alternate\" type=\"application/rss+xml\" title=\"RSS 2.0\" h"},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n    \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"en\"><head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n<title> Park Avenue Peerage</title>\t<meta name=\"generator\" content=\"WordPress.com\" />\t<!-- feeds -->\n\t<link rel=\"alternate\" type=\"application/rss+xml\" title=\"RSS 2.0\" href=\"http://parkavenuepeerage.wordpress.com/feed/\" />\t<link rel=\"pingback\" href="},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n    \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"ja\"><head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n<title> \u884c\u96f2\u6d41\u6c34 -like a floating clouds and running water-</title>\t<meta name=\"generator\" content=\"WordPress.com\" />\t<!-- feeds -->\n\t<link rel=\"alternate\" type=\"application/rss+xml\" title=\"RSS 2.0\" href=\"http://shw4.wordpress.com/feed/\" />\t<li"},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n<title>Go Fug Yourself</title><link rel=\"stylesheet\" href=\"http://gofugyourself.typepad.com/go_fug_yourself/styles.css\" type=\"text/css\" />\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"Atom\" "},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"en\"><head profile=\"http://gmpg.org/xfn/11\">\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" /><title> Ladies&#8230;</title><meta name=\"generator\" content=\"WordPress.com\" /> <!-- leave this for stats --><link rel=\"stylesheet\" href=\"http://s.wordpress.com/wp-content/themes/default/style.css?1\" type=\"tex"},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\r\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\r\n<head>\r\n  <title>The Sartorialist</title>  <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"MSSmartTagsPreventParsing\" content=\"true\" />\r\n<meta name=\"generator\" content=\"Blogger\" />\r\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"The Sartorialist - Atom\" href=\"http://thesartorialist.blogspot"},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \n     \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\"><html  xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-1\" />\n<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n<title>Creating Passionate Users</title><link rel=\"stylesheet\" href=\"http://headrush.typepad.com/creating_passionate_users/styles.css\" type=\"text/css\" />\n<link rel=\"alternate\" type"},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n\t\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" id=\"sixapart-standard\">\n<head>\n\t<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n\t<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n\t\n\t\n    <meta name=\"keywords\" content=\"marketing, blog, seth, ideas, respect, permission\" />\n    <meta name=\"description\" content=\"Seth Godin's riffs on marketing, respect, and the "},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n\t\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" id=\"sixapart-standard\">\n<head>\n\t<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n\t<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n\t\n\t\n    \n    <meta name=\"description\" content=\" Western Civilization hangs in the balance. This blog is part of the solution,the cure. Get your heads out of the sand and Fight the G"},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=pahrefhttpwwwfeedburnercomtarget_blankimgsrchttpwwwfeedburnercomfbimagespubpowered_by_fbgifaltPoweredbyFeedBurnerstyleborder0ap\" />\n<title> From Under the Rotunda</title>\n<link rel=\"stylesheet\" href=\"http://s.wordpress.com/wp-content/themes/pub/andreas04/style.css\" type=\"text/css\""},
+    {"type": "application/atom+xml", "input": "<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href=\"http://www.blogger.com/styles/atom.css\" type=\"text/css\"?><feed xmlns='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/'><id>tag:blogger.com,1999:blog-10861780</id><updated>2007-07-27T12:38:50.888-07:00</updated><title type='text'>Official Google Blog</title><link rel='alternate' type='text/html' href='http://googleblog.blogspot.com/'/><link rel='next' type='application/atom+xml' href='http://googleblog.blogs"},
+    {"type": "application/rss+xml", "input": "<?xml version='1.0' encoding='UTF-8'?><rss xmlns:atom='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/' version='2.0'><channel><atom:id>tag:blogger.com,1999:blog-10861780</atom:id><lastBuildDate>Fri, 27 Jul 2007 19:38:50 +0000</lastBuildDate><title>Official Google Blog</title><description/><link>http://googleblog.blogspot.com/</link><managingEditor>Eric Case</managingEditor><generator>Blogger</generator><openSearch:totalResults>729</openSearch:totalResults><openSearc"},
+    {"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"pahrefhttpwwwfeedburnercomtarget_blankimgsrchttpwwwfeedburnercomfbimagespubpowered_by_fbgifaltPoweredbyFeedBurnerstyleborder0ap\"?>\n<!-- generator=\"wordpress/MU\" -->\n<rss version=\"2.0\"\n\txmlns:content=\"http://purl.org/rss/1.0/modules/content/\"\n\txmlns:wfw=\"http://wellformedweb.org/CommentAPI/\"\n\txmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n\t><channel>\n\t<title>From Under the Rotunda</title>\n\t<link>http://dannybernardi.wordpress.com</link>\n\t<description>The Monographs of Danny Ber"},
+    {"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!-- generator=\"wordpress/MU\" -->\n<rss version=\"2.0\"\n\txmlns:content=\"http://purl.org/rss/1.0/modules/content/\"\n\txmlns:wfw=\"http://wellformedweb.org/CommentAPI/\"\n\txmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n\t><channel>\n\t<title>CMS Lever</title>\n\t<link>http://kanaguri.wordpress.com</link>\n\t<description>CMS\u306e\u6c17\u306b\u306a\u3063\u305f\u3053\u3068</description>\n\t<pubDate>Wed, 18 Jul 2007 21:26:22 +0000</pubDate>\n\t<generator>http://wordpress.org/?v=MU</generator>\n\t<language>ja</languag"},
+    {"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:thr=\"http://purl.org/syndication/thread/1.0\">\n    <title>Atlas Shrugs</title>\n    <link rel=\"self\" type=\"application/atom+xml\" href=\"http://atlasshrugs2000.typepad.com/atlas_shrugs/atom.xml\" />\n    <link rel=\"alternate\" type=\"text/html\" href=\"http://atlasshrugs2000.typepad.com/atlas_shrugs/\" />\n    <id>tag:typepad.com,2003:weblog-132946</id>\n    <updated>2007-08-15T16:07:34-04"},
+    {"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atom10full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:thr=\"http://purl.org/syndication/thread/1.0\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\">\r\n    <title>Creating Passionate Users</title>\r\n  "},
+    {"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atom10full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\">\r\n    <title>Seth's Blog</title>\r\n    <link rel=\"alternate\" type=\"text/html\" href=\"http://sethgodin.typepad.com/seths_blog/\" />\r\n    <link rel=\"s"},
+    {"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atom10full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:openSearch=\"http://a9.com/-/spec/opensearchrss/1.0/\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\"><id>tag:blogger.com,1999:blog-32454861</id><updated>2007-07-31T21:44:09.867+02:00</upd"},
+    {"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atomfull.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://purl.org/atom/ns#\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\" version=\"0.3\">\r\n  <title>Go Fug Yourself</title>\r\n  <link rel=\"alternate\" type=\"text/html\" href=\"http://go"},
+    {"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/rss2full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><rss xmlns:creativeCommons=\"http://backend.userland.com/creativeCommonsRssModule\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\" version=\"2.0\"><channel><title>Google Operating System</title><link>http://googlesystem.blogspot.com/</link>"},
+    {"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"\"?>\n<!-- generator=\"wordpress/MU\" -->\n<rss version=\"2.0\"\n\txmlns:content=\"http://purl.org/rss/1.0/modules/content/\"\n\txmlns:wfw=\"http://wellformedweb.org/CommentAPI/\"\n\txmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n\t><channel>\n\t<title>Nunublog</title>\n\t<link>http://nunubh.wordpress.com</link>\n\t<description>Just Newbie Blog!</description>\n\t<pubDate>Mon, 09 Jul 2007 18:54:09 +0000</pubDate>\n\t<generator>http://wordpress.org/?v=MU</generator>\n\t<language>id</language>\n\t\t\t<item>\n\t\t<ti"},
+    {"type": "text/html", "input": "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<HEAD>\r\n<TITLE>Design*Sponge</TITLE><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"MSSmartTagsPreventParsing\" content=\"true\" />\r\n<meta name=\"generator\" content=\"Blogger\" />\r\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"Design*Sponge - Atom\" href=\"http://designsponge.blogspot.com/feeds/posts/default\" />\r\n<link rel=\"alternate\" type=\"application/rss+xml\" title=\"Design*Sponge - RSS\" href="},
+    {"type": "text/html", "input": "<HTML>\n<HEAD>\n<TITLE>Moved Temporarily</TITLE>\n</HEAD>\n<BODY BGCOLOR=\"#FFFFFF\" TEXT=\"#000000\">\n<H1>Moved Temporarily</H1>\nThe document has moved <A HREF=\"http://feeds.feedburner.com/thesecretdiaryofstevejobs\">here</A>.\n</BODY>\n</HTML>\n"}
+]
--- a/vendor/plugins/HTML5lib/testdata/tokenizer/contentModelFlags.test
+++ b/vendor/plugins/HTML5lib/testdata/tokenizer/contentModelFlags.test
@ -11,12 +11,24 @@
 "input":"foo</bar>",
 "output":[["Character", "foo"], ["EndTag", "bar"]]},

+{"description":"End tag closing RCDATA or CDATA (case-insensitivity)",
+"contentModelFlags":["RCDATA", "CDATA"],
+"lastStartTag":"bar",
+"input":"foo</bAr>",
+"output":[["Character", "foo"], ["EndTag", "bar"]]},
+
 {"description":"End tag with incorrect name in RCDATA or CDATA",
 "contentModelFlags":["RCDATA", "CDATA"],
 "lastStartTag":"baz",
 "input":"</foo>bar</baz>",
 "output":[["Character", "</foo>bar"], ["EndTag", "baz"]]},

+{"description":"End tag with incorrect name in RCDATA or CDATA (starting like correct name)",
+"contentModelFlags":["RCDATA", "CDATA"],
+"lastStartTag":"baz",
+"input":"</foo>bar</bazaar>",
+"output":[["Character", "</foo>bar</bazaar>"]]},
+
 {"description":"End tag closing RCDATA or CDATA, switching back to PCDATA",
 "contentModelFlags":["RCDATA", "CDATA"],
 "lastStartTag":"bar",
--- a/vendor/plugins/HTML5lib/testdata/tokenizer/entities.test
+++ b/vendor/plugins/HTML5lib/testdata/tokenizer/entities.test
--- a/vendor/plugins/HTML5lib/testdata/tokenizer/test1.test
+++ b/vendor/plugins/HTML5lib/testdata/tokenizer/test1.test
@ -135,7 +135,7 @@

 {"description":"Entity without trailing semicolon (2)",
 "input":"I'm &notin",
-"output":[["Character","I'm "], "ParseError", ["Character", "∉"]]},
+"output":[["Character","I'm "], "ParseError", ["Character", "¬in"]]},

 {"description":"Partial entity match at end of file",
 "input":"I'm &no",
@ -151,6 +151,22 @@

 {"description":"Hexadecimal entity in attribute",
 "input":"<h a='&#x3f;'></h>",
-"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]}
+"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]},
+
+{"description":"Entity in attribute without semicolon ending in x",
+"input":"<h a='&notx'>",
+"output":["ParseError", ["StartTag", "h", {"a":"&notx"}]]},
+
+{"description":"Entity in attribute without semicolon ending in 1",
+"input":"<h a='&not1'>",
+"output":["ParseError", ["StartTag", "h", {"a":"&not1"}]]},
+
+{"description":"Entity in attribute without semicolon ending in i",
+"input":"<h a='&noti'>",
+"output":["ParseError", ["StartTag", "h", {"a":"&noti"}]]},
+
+{"description":"Entity in attribute without semicolon",
+"input":"<h a='&COPY'>",
+"output":["ParseError", ["StartTag", "h", {"a":"©"}]]}

 ]}
--- a/vendor/plugins/HTML5lib/testdata/tokenizer/test2.test
+++ b/vendor/plugins/HTML5lib/testdata/tokenizer/test2.test
@ -42,27 +42,23 @@

 {"description":"Numeric entity representing the NUL character",
 "input":"&#0000;",
-"output":[["Character", "\uFFFD"]]},
+"output":["ParseError", ["Character", "\uFFFD"]]},

 {"description":"Hexadecimal entity representing the NUL character",
 "input":"&#x0000;",
-"output":[["Character", "\uFFFD"]]},
+"output":["ParseError", ["Character", "\uFFFD"]]},

 {"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
 "input":"&#2225222;",
-"output":[["Character", "\uFFFD"]]},
+"output":["ParseError", ["Character", "\uFFFD"]]},

 {"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)",
 "input":"&#x1010FFFF;",
-"output":[["Character", "\uFFFD"]]},
+"output":["ParseError", ["Character", "\uFFFD"]]},

-{"description":"Numeric entity representing a Windows-1252 'codepoint'",
-"input":"&#137;",
-"output":["ParseError", ["Character", "\u2030"]]},
-
-{"description":"Hexadecimal entity representing a Windows-1252 'codepoint'",
-"input":"&#x89;",
-"output":["ParseError", ["Character", "\u2030"]]},
+{"description":"Hexadecimal entity pair representing a surrogate pair",
+"input":"&#xD869;&#xDED6;",
+"output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]},

 {"description":"Hexadecimal entity with mixed uppercase and lowercase",
 "input":"&#xaBcD;",
@ -118,7 +114,15 @@

 {"description":"Null Byte Replacement",
 "input":"\u0000",
-"output":[["Character", "\ufffd"]]}
+"output":["ParseError", ["Character", "\ufffd"]]},
+
+{"description":"Comment with dash",
+"input":"<!---x",
+"output":["ParseError", ["Comment", "-x"]]},
+
+{"description":"Entity + newline",
+"input":"\nx\n&gt;\n",
+"output":[["Character","\nx\n>\n"]]}

 ]}

--- a/vendor/plugins/HTML5lib/testdata/tokenizer/test3.test
+++ b/vendor/plugins/HTML5lib/testdata/tokenizer/test3.test
@ -0,0 +1,367 @@
+{"tests": [
+
+{"description":"<",
+"input":"<",
+"output":["ParseError", ["Character", "<"]]},
+
+{"description":"<>",
+"input":"<>",
+"output":["ParseError", ["Character", "<>"]]},
+
+{"description":"<!",
+"input":"<!",
+"output":["ParseError", ["Comment", ""]]},
+
+{"description":"<!>",
+"input":"<!>",
+"output":["ParseError", ["Comment", ""]]},
+
+{"description":"<!--",
+"input":"<!--",
+"output":["ParseError", ["Comment", ""]]},
+
+{"description":"<!-->",
+"input":"<!-->",
+"output":["ParseError", ["Comment", ""]]},
+
+{"description":"<!---",
+"input":"<!---",
+"output":["ParseError", ["Comment", ""]]},
+
+{"description":"<!--->",
+"input":"<!--->",
+"output":["ParseError", ["Comment", ""]]},
+
+{"description":"<!---->",
+"input":"<!---->",
+"output":[["Comment", ""]]},
+
+{"description":"<!-----",
+"input":"<!-----",
+"output":["ParseError", "ParseError", ["Comment", "-"]]},
+
+{"description":"<!----.",
+"input":"<!----.",
+"output":["ParseError", "ParseError", ["Comment", "--."]]},
+
+{"description":"<!---?",
+"input":"<!---?",
+"output":["ParseError", ["Comment", "-?"]]},
+
+{"description":"<!--?-",
+"input":"<!--?-",
+"output":["ParseError", ["Comment", "?"]]},
+
+{"description":"<!--?--",
+"input":"<!--?--",
+"output":["ParseError", ["Comment", "?"]]},
+
+{"description":"<!--?-.",
+"input":"<!--?-.",
+"output":["ParseError", ["Comment", "?-."]]},
+
+{"description":"<!--?.",
+"input":"<!--?.",
+"output":["ParseError", ["Comment", "?."]]},
+
+{"description":"<?>",
+"input":"<?>",
+"output":["ParseError", ["Comment", "?"]]},
+
+{"description":"<??",
+"input":"<??",
+"output":["ParseError", ["Comment", "??"]]},
+
+{"description":"</",
+"input":"</",
+"output":["ParseError", ["Character", "</"]]},
+
+{"description":"</>",
+"input":"</>",
+"output":["ParseError"]},
+
+{"description":"</?",
+"input":"</?",
+"output":["ParseError", ["Comment", "?"]]},
+
+{"description":">",
+"input":">",
+"output":[["Character", ">"]]},
+
+{"description":"-",
+"input":"-",
+"output":[["Character", "-"]]},
+
+{"description":"?",
+"input":"?",
+"output":[["Character", "?"]]},
+
+{"description":"&",
+"input":"&",
+"output":[["Character", "&"]]},
+
+{"description":"&#",
+"input":"&#",
+"output":["ParseError", ["Character", "&#"]]},
+
+{"description":"&#9",
+"input":"&#9",
+"output":["ParseError", ["Character", "\t"]]},
+
+{"description":"<!doctype >",
+"input":"<!doctype >",
+"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+
+{"description":"<!doctype  ",
+"input":"<!doctype  ",
+"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+
+{"description":"<!doctype!>",
+"input":"<!doctype!>",
+"output":["ParseError", ["DOCTYPE", "!", null, null, true]]},
+
+{"description":"<!doctype! >",
+"input":"<!doctype! >",
+"output":["ParseError", ["DOCTYPE", "!", null, null, true]]},
+
+{"description":"<!doctype!  ",
+"input":"<!doctype!  ",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
+
+{"description":"<!doctype! ?>",
+"input":"<!doctype! ?>",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
+
+{"description":"<!doctype! ??",
+"input":"<!doctype! ??",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
+
+{"description":"<!doctype!?",
+"input":"<!doctype!?",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!?", null, null, false]]},
+
+{"description":"<!doctype! public>",
+"input":"<!doctype! public>",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
+
+{"description":"<!doctype! public ",
+"input":"<!doctype! public ",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
+
+{"description":"<!doctype! public?",
+"input":"<!doctype! public?",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
+
+{"description":"<!doctype! public''",
+"input":"<!doctype! public''",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", null, false]]},
+
+{"description":"<!doctype! public'(",
+"input":"<!doctype! public'(",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", "(", null, false]]},
+
+{"description":"<!doctype! public\"\">",
+"input":"<!doctype! public\"\">",
+"output":["ParseError", ["DOCTYPE", "!", "", null, true]]},
+
+{"description":"<!doctype! public\"\" ",
+"input":"<!doctype! public\"\" ",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", null, false]]},
+
+{"description":"<!doctype! public\"\"?",
+"input":"<!doctype! public\"\"?",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", "", null, false]]},
+
+{"description":"<!doctype! public\"\"'",
+"input":"<!doctype! public\"\"'",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", "", false]]},
+
+{"description":"<!doctype! public\"\"\"",
+"input":"<!doctype! public\"\"\"",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", "", false]]},
+
+{"description":"<!doctype! public\"#",
+"input":"<!doctype! public\"#",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", "#", null, false]]},
+
+{"description":"<!doctype! system>",
+"input":"<!doctype! system>",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
+
+{"description":"<!doctype! system ",
+"input":"<!doctype! system ",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
+
+{"description":"<!doctype! system?",
+"input":"<!doctype! system?",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
+
+{"description":"<!doctype! system''",
+"input":"<!doctype! system''",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "", false]]},
+
+{"description":"<!doctype! system'(",
+"input":"<!doctype! system'(",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "(", false]]},
+
+{"description":"<!doctype! system\"\">",
+"input":"<!doctype! system\"\">",
+"output":["ParseError", ["DOCTYPE", "!", null, "", true]]},
+
+{"description":"<!doctype! system\"\" ",
+"input":"<!doctype! system\"\" ",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "", false]]},
+
+{"description":"<!doctype! system\"\"?",
+"input":"<!doctype! system\"\"?",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, "", false]]},
+
+{"description":"<!doctype! system\"#",
+"input":"<!doctype! system\"#",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "#", false]]},
+
+{"description":"</z",
+"input":"</z",
+"output":["ParseError", ["EndTag", "z"]]},
+
+{"description":"<z>",
+"input":"<z>",
+"output":[["StartTag", "z", {}]]},
+
+{"description":"<z ",
+"input":"<z ",
+"output":["ParseError", ["StartTag", "z", {}]]},
+
+{"description":"<z/>",
+"input":"<z/>",
+"output":["ParseError", ["StartTag", "z", {}]]},
+
+{"description":"<z/ ",
+"input":"<z/ ",
+"output":["ParseError", "ParseError", ["StartTag", "z", {}]]},
+
+{"description":"<z//",
+"input":"<z//",
+"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {}]]},
+
+{"description":"<z",
+"input":"<z",
+"output":["ParseError", ["StartTag", "z", {}]]},
+
+{"description":"</z",
+"input":"</z",
+"output":["ParseError", ["EndTag", "z"]]},
+
+{"description":"<z0",
+"input":"<z0",
+"output":["ParseError", ["StartTag", "z0", {}]]},
+
+{"description":"<z/0=>",
+"input":"<z/0=>",
+"output":["ParseError", ["StartTag", "z", {"0": ""}]]},
+
+{"description":"<z/0= ",
+"input":"<z/0= ",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
+
+{"description":"<z/0=?>",
+"input":"<z/0=?>",
+"output":["ParseError", ["StartTag", "z", {"0": "?"}]]},
+
+{"description":"<z/0=? ",
+"input":"<z/0=? ",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "?"}]]},
+
+{"description":"<z/0=??",
+"input":"<z/0=??",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "??"}]]},
+
+{"description":"<z/0=''",
+"input":"<z/0=''",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
+
+{"description":"<z/0='&",
+"input":"<z/0='&",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "&"}]]},
+
+{"description":"<z/0='%",
+"input":"<z/0='%",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "%"}]]},
+
+{"description":"<z/0=\"'",
+"input":"<z/0=\"'",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "'"}]]},
+
+{"description":"<z/0=\"\"",
+"input":"<z/0=\"\"",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
+
+{"description":"<z/0=\"&",
+"input":"<z/0=\"&",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "&"}]]},
+
+{"description":"<z/0=&",
+"input":"<z/0=&",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "&"}]]},
+
+{"description":"<z/0>",
+"input":"<z/0>",
+"output":["ParseError", ["StartTag", "z", {"0": ""}]]},
+
+{"description":"<z/0 =",
+"input":"<z/0 =",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
+
+{"description":"<z/0 >",
+"input":"<z/0 >",
+"output":["ParseError", ["StartTag", "z", {"0": ""}]]},
+
+{"description":"<z/0  ",
+"input":"<z/0  ",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
+
+{"description":"<z/0 /",
+"input":"<z/0 /",
+"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
+
+{"description":"<z/0/",
+"input":"<z/0/",
+"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
+
+{"description":"<z/00",
+"input":"<z/00",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"00": ""}]]},
+
+{"description":"<z/0 0",
+"input":"<z/0 0",
+"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
+
+{"description":"<z/0='&#9",
+"input":"<z/0='&#9",
+"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": "\t"}]]},
+
+{"description":"<z/0=\"&#9",
+"input":"<z/0=\"&#9",
+"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": "\t"}]]},
+
+{"description":"<z/0=&#9",
+"input":"<z/0=&#9",
+"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": "\t"}]]},
+
+{"description":"<z/0z",
+"input":"<z/0z",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0z": ""}]]},
+
+{"description":"<z/0 z",
+"input":"<z/0 z",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "z": ""}]]},
+
+{"description":"<zz",
+"input":"<zz",
+"output":["ParseError", ["StartTag", "zz", {}]]},
+
+{"description":"<z/z",
+"input":"<z/z",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"z": ""}]]}
+
+]}
--- a/vendor/plugins/HTML5lib/testdata/tokenizer/test4.test
+++ b/vendor/plugins/HTML5lib/testdata/tokenizer/test4.test
@ -0,0 +1,198 @@
+{"tests": [
+
+{"description":"< in attribute name",
+"input":"<z/0  <",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "<": ""}]]},
+
+{"description":"< in attribute value",
+"input":"<z x=<",
+"output":["ParseError", ["StartTag", "z", {"x": "<"}]]},
+
+{"description":"CR EOF after doctype name",
+"input":"<!doctype html \r",
+"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
+
+{"description":"CR EOF in tag name",
+"input":"<z\r",
+"output":["ParseError", ["StartTag", "z", {}]]},
+
+{"description":"Zero hex numeric entity",
+"input":"&#x0",
+"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"Zero decimal numeric entity",
+"input":"&#0",
+"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"Zero-prefixed hex numeric entity",
+"input":"&#x000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000041;",
+"output":[["Character", "A"]]},
+
+{"description":"Zero-prefixed decimal numeric entity",
+"input":"&#000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000065;",
+"output":[["Character", "A"]]},
+
+{"description":"Empty hex numeric entities",
+"input":"&#x &#X ",
+"output":["ParseError", ["Character", "&#x "], "ParseError", ["Character", "&#X "]]},
+
+{"description":"Empty decimal numeric entities",
+"input":"&# &#; ",
+"output":["ParseError", ["Character", "&# "], "ParseError", ["Character", "&#; "]]},
+
+{"description":"Non-BMP numeric entity",
+"input":"&#x10000;",
+"output":[["Character", "\uD800\uDC00"]]},
+
+{"description":"Maximum non-BMP numeric entity",
+"input":"&#X10FFFF;",
+"output":[["Character", "\uDBFF\uDFFF"]]},
+
+{"description":"Above maximum numeric entity",
+"input":"&#x110000;",
+"output":["ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"32-bit hex numeric entity",
+"input":"&#x80000041;",
+"output":["ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"33-bit hex numeric entity",
+"input":"&#x100000041;",
+"output":["ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"33-bit decimal numeric entity",
+"input":"&#4294967361;",
+"output":["ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"65-bit hex numeric entity",
+"input":"&#x10000000000000041;",
+"output":["ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"65-bit decimal numeric entity",
+"input":"&#18446744073709551681;",
+"output":["ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"Surrogate code point edge cases",
+"input":"&#xD7FF;&#xD800;&#xD801;&#xDFFE;&#xDFFF;&#xE000;",
+"output":[["Character", "\uD7FF"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD\uE000"]]},
+
+{"description":"Uppercase start tag name",
+"input":"<X>",
+"output":[["StartTag", "x", {}]]},
+
+{"description":"Uppercase end tag name",
+"input":"</X>",
+"output":[["EndTag", "x"]]},
+
+{"description":"Uppercase attribute name",
+"input":"<x X>",
+"output":[["StartTag", "x", { "x":"" }]]},
+
+{"description":"Tag/attribute name case edge values",
+"input":"<x@AZ[`az{ @AZ[`az{>",
+"output":[["StartTag", "x@az[`az{", { "@az[`az{":"" }]]},
+
+{"description":"Duplicate different-case attributes",
+"input":"<x x=1 x=2 X=3>",
+"output":["ParseError", "ParseError", ["StartTag", "x", { "x":"1" }]]},
+
+{"description":"Uppercase close tag attributes",
+"input":"</x X>",
+"output":["ParseError", ["EndTag", "x"]]},
+
+{"description":"Duplicate close tag attributes",
+"input":"</x x x>",
+"output":["ParseError", "ParseError", ["EndTag", "x"]]},
+
+{"description":"Permitted slash",
+"input":"<br/>",
+"output":[["StartTag", "br", {}]]},
+
+{"description":"Non-permitted slash",
+"input":"<xr/>",
+"output":["ParseError", ["StartTag", "xr", {}]]},
+
+{"description":"Permitted slash but in close tag",
+"input":"</br/>",
+"output":["ParseError", ["EndTag", "br"]]},
+
+{"description":"Doctype public case-sensitivity (1)",
+"input":"<!DoCtYpE HtMl PuBlIc \"AbC\" \"XyZ\">",
+"output":[["DOCTYPE", "HtMl", "AbC", "XyZ", true]]},
+
+{"description":"Doctype public case-sensitivity (2)",
+"input":"<!dOcTyPe hTmL pUbLiC \"aBc\" \"xYz\">",
+"output":[["DOCTYPE", "hTmL", "aBc", "xYz", true]]},
+
+{"description":"Doctype system case-sensitivity (1)",
+"input":"<!DoCtYpE HtMl SyStEm \"XyZ\">",
+"output":[["DOCTYPE", "HtMl", null, "XyZ", true]]},
+
+{"description":"Doctype system case-sensitivity (2)",
+"input":"<!dOcTyPe hTmL sYsTeM \"xYz\">",
+"output":[["DOCTYPE", "hTmL", null, "xYz", true]]},
+
+{"description":"U+0000 in lookahead region after non-matching character",
+"input":"<!doc>\u0000",
+"output":["ParseError", ["Comment", "doc"], "ParseError", ["Character", "\uFFFD"]],
+"ignoreErrorOrder":true},
+
+{"description":"U+0000 in lookahead region",
+"input":"<!doc\u0000",
+"output":["ParseError", "ParseError", ["Comment", "doc\uFFFD"]],
+"ignoreErrorOrder":true},
+
+{"description":"CR followed by U+0000",
+"input":"\r\u0000",
+"output":["ParseError", ["Character", "\n\uFFFD"]],
+"ignoreErrorOrder":true},
+
+{"description":"CR followed by non-LF",
+"input":"\r?",
+"output":[["Character", "\n?"]]},
+
+{"description":"CR at EOF",
+"input":"\r",
+"output":[["Character", "\n"]]},
+
+{"description":"LF at EOF",
+"input":"\n",
+"output":[["Character", "\n"]]},
+
+{"description":"CR LF",
+"input":"\r\n",
+"output":[["Character", "\n"]]},
+
+{"description":"CR CR",
+"input":"\r\r",
+"output":[["Character", "\n\n"]]},
+
+{"description":"LF LF",
+"input":"\n\n",
+"output":[["Character", "\n\n"]]},
+
+{"description":"LF CR",
+"input":"\n\r",
+"output":[["Character", "\n\n"]]},
+
+{"description":"text CR CR CR text",
+"input":"text\r\r\rtext",
+"output":[["Character", "text\n\n\ntext"]]},
+
+{"description":"Doctype publik",
+"input":"<!DOCTYPE html PUBLIK \"AbC\" \"XyZ\">",
+"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
+
+{"description":"Doctype publi",
+"input":"<!DOCTYPE html PUBLI",
+"output":["ParseError", "ParseError", ["DOCTYPE", "html", null, null, false]]},
+
+{"description":"Doctype sistem",
+"input":"<!DOCTYPE html SISTEM \"AbC\">",
+"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
+
+{"description":"Doctype sys",
+"input":"<!DOCTYPE html SYS",
+"output":["ParseError", "ParseError", ["DOCTYPE", "html", null, null, false]]}
+
+]}
--- a/vendor/plugins/HTML5lib/testdata/tree-construction/tests1.dat
+++ b/vendor/plugins/HTML5lib/testdata/tree-construction/tests1.dat
@ -113,7 +113,6 @@ Line1<br>Line2<br>Line3<br>Line4
 <html><head></body></html>
 #errors
 6: missing document type declaration
-19: unexpected body element end tag in head
 #document
 | <html>
 |   <head>
@ -159,7 +158,6 @@ Line1<br>Line2<br>Line3<br>Line4
 </head>
 #errors
 7: missing document type declaration
-7: unexpected head element end tag
 #document
 | <html>
 |   <head>
@ -169,7 +167,6 @@ Line1<br>Line2<br>Line3<br>Line4
 </body>
 #errors
 7: missing document type declaration
-7: unexpected body element end tag
 #document
 | <html>
 |   <head>
@ -285,6 +282,7 @@ Line1<br>Line2<br>Line3<br>Line4
 |     <div>
 |       <b>
 |         <marquee>
+|           <p>
 |           "X"

 #data
@ -330,6 +328,7 @@ Unexpected end of file
 |   <body>
 |     <p>
 |     <hr>
+|     <p>

 #data
 <select><b><option><select><option></b></select>X
@ -435,6 +434,7 @@ Unexpected end of file
 #data
 <!DOCTYPE HTML><li>hello<li>world<ul>how<li>do</ul>you</body><!--do-->
 #errors
+Unexpected end of file. Expected </li>. XXX
 #document
 | <!DOCTYPE HTML>
 | <html>
@ -636,7 +636,6 @@ Unexpected end of file
 #data
 <!DOCTYPE HTML><script> <!-- </script> --> </script> EOF
 #errors
-52: unexpected script element end tag
 #document
 | <!DOCTYPE HTML>
 | <html>
@ -730,6 +729,7 @@ Unexpected end of file
 #errors
 6: missing document type declaration
 29: mismatched font element end tag (misnested tags)
+AAA </font> tag strikes again
 35: mismatched body element end tag (premature end of file?)
 #document
 | <html>
@ -1120,6 +1120,7 @@ Unexpected end of file
 15: missing document type declaration
 39: unexpected node in table context
 39: a element start tag implying a element end tag
+AAA violation: </a>
 39: unexpected node in table context
 39: mismatched a element end tag (misnested tags across <table> tag)
 43: unexpected node in table context
@ -1175,6 +1176,8 @@ Unexpected end of file
 7: missing document type declaration
 22: unexpected node in table context
 27: unexpected node in table context
+XXX more table voodoo
+XXX more table voodoo
 54: unexpected td element end tag implied other end tags
 63: unexpected node in table context
 72: mismatched body element end tag (premature end of file?)
@ -1299,11 +1302,9 @@ unexpected EOF
 #errors
 6: missing document type declaration
 12: unexpected body element start tag
-18: base element start tag out of place
-24: link element start tag out of place
-30: meta element start tag out of place
 37: title element start tag out of place
 54: unexpected body element start tag
+Missing end tag </p>. XXX
 #document
 | <html>
 |   <head>
@ -1344,7 +1345,6 @@ unexpected EOF
 3: missing document type declaration
 13: unexpected node in table context
 13: a element start tag implying a element end tag
-13: unexpected node in table context
 13: mismatched a element end tag (misnested tags across <table> tag)
 21: mismatched table element end tag
 27: a element start tag implying a element end tag
@ -1369,13 +1369,14 @@ unexpected EOF
 <head></p><meta><p>
 #errors
 6: missing document type declaration
-10: unexpected p element end tag in head
+10: unexpected p element end tag
 #document
 | <html>
 |   <head>
-|     <meta>
 |   <body>
 |     <p>
+|     <meta>
+|     <p>

 #data
 <head></html><meta><p>
@ -1485,6 +1486,7 @@ unexpected EOF
 |     <div>
 |       <b>
 |         <marquee>
+|           <p>

 #data
 <script></script></div><title></title><p><p>
@ -1511,6 +1513,7 @@ unexpected EOF
 |   <body>
 |     <p>
 |     <hr>
+|     <p>

 #data
 <select><b><option><select><option></b></select>
@ -1571,6 +1574,8 @@ unexpected EOF
 <ul><li></li><div><li></div><li><li><div><li><address><li><b><em></b><li></ul>
 #errors
 4: missing document type declaration
+Missing end tag for <div> (nr2)
+Missing end tag for <address>
 69: mismatched b element end tag (misnested tags)
 #document
 | <html>
@ -1615,7 +1620,6 @@ unexpected EOF
 56: unexpected frameset element start tag in body
 63: unexpected frame element start tag in body
 74: unexpected frameset element end tag
-87: unescaped '</' in CDATA or RCDATA block
 106: unexpected end of file while parsing CDATA section for element noframes
 #document
 | <html>
@ -1630,6 +1634,7 @@ unexpected EOF
 4: missing document type declaration
 15: required tr element start tag implied by unexpected td element start tag
 27: unexpected td element end tag implied other end tags
+Unexpected </h1> tag. Expected other.
 Unexpected EOF
 #document
 | <html>
@ -1737,9 +1742,9 @@ Unexpected EOF
 108: unexpected h4 element end tag
 113: unexpected h5 element end tag
 118: unexpected h6 element end tag
-125: unexpected body element end tag
+125: unexpected end tag token br in after body phase
 130: unexpected br element end tag
-134: unexpected a element end tag
+134: unexpected a element end tag (AAA)
 140: unexpected img element end tag
 148: unexpected title element end tag
 155: unexpected span element end tag
@ -1807,6 +1812,7 @@ Unexpected EOF
 |   <head>
 |   <body>
 |     <br>
+|     <p>

 #data
 <table><tr></strong></b></em></i></u></strike></s></blink></tt></pre></big></small></font></select></h1></h2></h3></h4></h5></h6></body></br></a></img></title></span></style></script></table></th></td></tr></frame></area></link></param></hr></input></col></base></meta></basefont></bgsound></embed></spacer></p></dd></dt></caption></colgroup></tbody></tfoot></thead></address></blockquote></center></dir></div></dl></fieldset></listing></menu></ol></ul></li></nobr></wbr></form></button></marquee></object></html></frameset></head></iframe></image></isindex></noembed></noframes></noscript></optgroup></option></plaintext></textarea>
@ -1920,6 +1926,9 @@ Unexpected EOF
 610: unexpected option element end tag
 622: unexpected plaintext element end tag
 633: mismatched special end tag textarea
+XXX
+XXX
+XXX
 #document
 | <html>
 |   <head>
@ -1928,3 +1937,14 @@ Unexpected EOF
 |     <table>
 |       <tbody>
 |         <tr>
+|     <p>
+
+#data
+<frameset>
+#errors
+10: Start tag seen without seeing a doctype first.
+11: End of file seen and there were open elements.
+#document
+| <html>
+|   <head>
+|   <frameset>
--- a/vendor/plugins/HTML5lib/testdata/tree-construction/tests2.dat
+++ b/vendor/plugins/HTML5lib/testdata/tree-construction/tests2.dat
@ -12,7 +12,6 @@
 <textarea>test</div>test
 #errors
 10: missing document type declaration.
-17: unescaped '</' in CDATA or RCDATA block.
 25: unexpected end of file while parsing CDATA section for element textarea.
 #document
 | <html>
@ -87,6 +86,8 @@ Expected end tag </frameset>
 #data
 <!DOCTYPE HTML><font><p><b>test</font>
 #errors
+AAA violation. </font>
+AAA violation. </font>
 #document
 | <!DOCTYPE HTML>
 | <html>
@ -101,6 +102,7 @@ Expected end tag </frameset>
 #data
 <!DOCTYPE HTML><dt><div><dd>
 #errors
+Missing end tag for <div>.
 #document
 | <!DOCTYPE HTML>
 | <html>
@ -114,7 +116,6 @@ Expected end tag </frameset>
 <script></x
 #errors
 no document type
-</ in script
 Unexpected end of file. Expected </script> end tag.
 #document
 | <html>
@ -129,6 +130,7 @@ Unexpected end of file. Expected </script> end tag.
 no document type
 <plaintext> directly inside table
 Characters inside table.
+Characters inside table. (XXX?)
 Unexpected end of file.
 #document
 | <html>
@ -175,10 +177,10 @@ Unexpected start tag "body"
 | <html>
 |   <head>
 |   <body>
-|     t4="4"
+|     t1="1"
 |     t2="2"
 |     t3="3"
-|     t1="1"
+|     t4="4"

 #data
 </b test
@ -195,7 +197,6 @@ Unexpected end tag.
 #data
 <!DOCTYPE HTML></b test<b &=&amp>X
 #errors
-Unexpected < in attribute
 End tag contains attributes.
 Unexpected end tag.
 Named entity didn't end with ;
@ -224,7 +225,6 @@ Unexpected EOF in (end) tag name
 &
 #errors
 No doctype.
-Unfinished entity.
 #document
 | <html>
 |   <head>
@ -349,11 +349,11 @@ Unexpected end EOF. Missing closing tags.
 |       <b>
 |         <i>
 |           <u>
-|     " "
-|     <p>
-|       <b>
-|         <i>
-|           <u>
+|     <b>
+|       <i>
+|         <u>
+|           " "
+|           <p>
 |             "X"

 #data
@ -538,10 +538,10 @@ No doctype
 |       <hr>
 |       <p>
 |         <label>
-|           "This is a searchable index. Insert your search keywords here:"
+|           "This is a searchable index. Insert your search keywords here: "
 |           <input>
-|             test="x"
 |             name="isindex"
+|             test="x"
 |       <hr>

 #data
@ -571,19 +571,18 @@ Unexpected EOF.
 |       <b>
 |         <i>
 |           <u>
-|     "
+|     <b>
+|       <i>
+|         <u>
+|           "
 "
-|     <p>
-|       <b>
-|         <i>
-|           <u>
+|           <p>
 |             "X"

 #data
 <!DOCTYPE HTML><body><title>test</body></title>
 #errors
 Unexpected start tag that belongs in the head.
-Expected closing tag after </.
 #document
 | <!DOCTYPE HTML>
 | <html>
@ -596,10 +595,7 @@ Expected closing tag after </.
 <!DOCTYPE HTML><body><title>X</title><meta name=z><link rel=foo><style>
 x { content:"</style" } </style>
 #errors
-Unexpected start tag that belongs in head.
-Unexpected start tag that belongs in head.
-Unexpected start tag that belongs in head.
-Expected closing tag after </.
+Unexpected start tag that belongs in head. <title>
 #document
 | <!DOCTYPE HTML>
 | <html>
@ -632,8 +628,6 @@ x { content:"</style" } "
 #errors
 No doctype.
 #document
-| " 
- "
 | <html>
 |   <head>
 |   <body>
@ -643,7 +637,6 @@ No doctype.
 #errors
 #document
 | <!DOCTYPE HTML>
-| "  "
 | <html>
 |   <head>
 |   <body>
@ -749,8 +742,8 @@ Solidus (/) incorrectly placed.
 |   <body>
 |     "X"
 |     <p>
-|       y=""
 |       x=""
+|       y=""
 |       z=""

 #data
@ -777,3 +770,4 @@ Unexpected </p> end tag.
 |       <tbody>
 |         <tr>
 |           <td>
+|             <p>
--- a/vendor/plugins/HTML5lib/testdata/tree-construction/tests3.dat
+++ b/vendor/plugins/HTML5lib/testdata/tree-construction/tests3.dat
@ -61,7 +61,6 @@ No DOCTYPE

 #data
 <!DOCTYPE htML><html><head></head><body><pre>
-
 foo</pre></body></html>
 #errors
 #document
@ -72,10 +71,22 @@ foo</pre></body></html>
 |     <pre>
 |       "foo"

-
 #data
 <!DOCTYPE htML><html><head></head><body><pre>

+foo</pre></body></html>
+#errors
+#document
+| <!DOCTYPE htML>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "
+foo"
+
+#data
+<!DOCTYPE htML><html><head></head><body><pre>
 foo
 </pre></body></html>
 #errors
@ -120,6 +131,7 @@ y"
 <!DOCTYPE htML><html><head></head><body><pre>x<div>
 y</pre></body></html>
 #errors
+End tag <pre> seen too early. Expected other end tag.
 #document
 | <!DOCTYPE htML>
 | <html>
@ -129,11 +141,12 @@ y</pre></body></html>
 |       "x"
 |       <div>
 |         "
-| y"
+y"

 #data
 <!DOCTYPE htML><HTML><META><HEAD></HEAD></HTML>
 #errors
+Unexpected start tag HEAD in HEAD. Ignored.
 #document
 | <!DOCTYPE htML>
 | <html>
@ -144,6 +157,7 @@ y</pre></body></html>
 #data
 <!DOCTYPE htML><HTML><HEAD><head></HEAD></HTML>
 #errors
+Unexpected start tag HEAD in HEAD. Ignored.
 #document
 | <!DOCTYPE htML>
 | <html>
@ -153,6 +167,8 @@ y</pre></body></html>
 #data
 <textarea>foo<span>bar</span><i>baz
 #errors
+Unexpected start tag. Expected DOCTYPE.
+Unexpected end of file.
 #document
 | <html>
 |   <head>
@ -163,6 +179,8 @@ y</pre></body></html>
 #data
 <title>foo<span>bar</em><i>baz
 #errors
+Unexpected start tag. Expected DOCTYPE.
+Unexpected end of file.
 #document
 | <html>
 |   <head>
@ -183,7 +201,6 @@ y</pre></body></html>

 #data
 <!DOCTYPE htML><textarea>
-
 foo</textarea>
 #errors
 #document
@ -194,6 +211,20 @@ foo</textarea>
 |     <textarea>
 |       "foo"

+#data
+<!DOCTYPE htML><textarea>
+
+foo</textarea>
+#errors
+#document
+| <!DOCTYPE htML>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "
+foo"
+
 #data
 <!DOCTYPE htML><html><head></head><body><ul><li><div><p><li></ul></body></html>
 #errors
@ -212,6 +243,8 @@ Missing end tag (div)
 #data
 <!doctype html><nobr><nobr><nobr>
 #errors
+Unexpected <nobr> tag.
+Unexpected <nobr> tag.
 Unexpected end of file.
 #document
 | <!DOCTYPE html>
@ -225,6 +258,7 @@ Unexpected end of file.
 #data
 <!doctype html><nobr><nobr></nobr><nobr>
 #errors
+Unexpected <nobr> tag.
 Unexpected end of file.
 #document
 | <!DOCTYPE html>
--- a/vendor/plugins/HTML5lib/testdata/tree-construction/tests4.dat
+++ b/vendor/plugins/HTML5lib/testdata/tree-construction/tests4.dat
@ -1,37 +1,50 @@
 #data
 direct div content
 #errors
-#document-fragment div
+#document-fragment
+div
+#document
 | "direct div content"

 #data
 direct textarea content
 #errors
-#document-fragment textarea
+#document-fragment
+textarea
+#document
 | "direct textarea content"

 #data
 textarea content with <em>pseudo</em> <foo>markup
 #errors
-#document-fragment textarea
+#document-fragment
+textarea
+#document
 | "textarea content with <em>pseudo</em> <foo>markup"

 #data
 this is &#x0043;DATA inside a <style> element
 #errors
-#document-fragment style
+#document-fragment
+style
+#document
 | "this is &#x0043;DATA inside a <style> element"

 #data
 </plaintext>
 #errors
-#document-fragment plaintext
+#document-fragment
+plaintext
+#document
 | "</plaintext>"

 #data
 setting html's innerHTML
 #errors
-#document-fragment html
+XXX innerHTML EOF
+#document-fragment
+html
+#document
 | <head>
 | <body>
 |   "setting html's innerHTML"
@ -39,6 +52,9 @@ setting html's innerHTML
 #data
 <title>setting head's innerHTML</title>
 #errors
-#document-fragment head
+Unexpected title element that belongs in head.
+#document-fragment
+head
+#document
 | <title>
 |   "setting head's innerHTML"
--- a/vendor/plugins/HTML5lib/testdata/tree-construction/tests5.dat
+++ b/vendor/plugins/HTML5lib/testdata/tree-construction/tests5.dat
@ -110,7 +110,6 @@ No DOCTYPE
 <style> <!</-- </style>x
 #errors
 No DOCTYPE
-Unexpected end of file
 #document
 | <html>
 |   <head>
@ -118,3 +117,59 @@ Unexpected end of file
 |       " <!</-- "
 |   <body>
 |     "x"
+
+#data
+<xmp> <!-- > --> </xmp>
+#errors
+No DOCTYPE
+#document
+| <html>
+|   <head>
+|   <body>
+|     <xmp>
+|       " <!-- > --> "
+
+#data
+<title>&amp;</title>
+#errors
+No DOCTYPE
+#document
+| <html>
+|   <head>
+|     <title>
+|       "&"
+|   <body>
+
+#data
+<title><!--&amp;--></title>
+#errors
+No DOCTYPE
+#document
+| <html>
+|   <head>
+|     <title>
+|       "<!--&amp;-->"
+|   <body>
+
+#data
+<title><!--</title>
+#errors
+No DOCTYPE
+Unexpected EOF
+#document
+| <html>
+|   <head>
+|     <title>
+|       "<!--</title>"
+|   <body>
+
+#data
+<noscript><!--</noscript>--></noscript>
+#errors
+No DOCTYPE
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       "<!--</noscript>-->"
+|   <body>
--- a/vendor/plugins/HTML5lib/testdata/tree-construction/tests6.dat
+++ b/vendor/plugins/HTML5lib/testdata/tree-construction/tests6.dat
@ -1,6 +1,7 @@
 #data
 <!doctype html></head> <head>
 #errors
+Unexpected start tag head. Ignored.
 #document
 | <!DOCTYPE html>
 | <html>
@ -11,6 +12,9 @@
 #data
 <!doctype html></html> <head>
 #errors
+Unexpected start tag head.
+Unexpected start tag head in after body phase.
+Unexpected start tag head. Ignored.
 #document
 | <!DOCTYPE html>
 | <html>
@ -21,9 +25,69 @@
 #data
 <!doctype html></body><meta>
 #errors
+Unexpected meta element in after body phase.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <meta>
+
+#data
+<!doctype HTml><form><div></form><div>
+#errors
+Form end tag ignored.
+Unexpected end of file.
+#document
+| <!DOCTYPE HTml>
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       <div>
+|         <div>
+
+#data
+<!doctype HTml><title>&amp;</title>
+#errors
+#document
+| <!DOCTYPE HTml>
+| <html>
+|   <head>
+|     <title>
+|       "&"
+|   <body>
+
+#data
+<!doctype HTml><title><!--&amp;--></title>
+#errors
+#document
+| <!DOCTYPE HTml>
+| <html>
+|   <head>
+|     <title>
+|       "<!--&amp;-->"
+|   <body>
+
+#data
+<!doctype>
+#errors
+No space after "doctype"
+Unexpected ">"
+Incorrect doctype
+#document
+| <!DOCTYPE >
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!---x
+#errors
+End of file in comment
+End of file before doctype
+#document
+| <!-- -x -->
+| <html>
+|   <head>
+|   <body>
--- a/vendor/plugins/HTML5lib/testdata/validator/attributes.test
+++ b/vendor/plugins/HTML5lib/testdata/validator/attributes.test
--- a/vendor/plugins/HTML5lib/testdata/validator/classattribute.test
+++ b/vendor/plugins/HTML5lib/testdata/validator/classattribute.test
@ -0,0 +1,159 @@
+{"tests": [
+
+{"description": "valid single class attribute value",
+"input": "<span class=a>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid single class attribute value with leading space",
+"input": "<span class=' a'>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid single class attribute value with trailing space",
+"input": "<span class='a '>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid single class attribute value with leading and trailing space",
+"input": "<span class=' a '>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid single class attribute value with leading tab",
+"input": "<span class='	a'>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid single class attribute value with trailing tab",
+"input": "<span class='a	'>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid single class attribute value with leading and trailing tab",
+"input": "<span class='	a	'>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid single class attribute value with leading LF",
+"input": "<span class='
+a'>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid single class attribute value with trailing LF",
+"input": "<span class='a
+'>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid single class attribute value with leading and trailing LF",
+"input": "<span class='
+a
+'>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid single class attribute value with leading LT",
+"input": "<span class='a'>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid single class attribute value with trailing LT",
+"input": "<span class='a'>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid single class attribute value with leading and trailing LT",
+"input": "<span class='a'>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid single class attribute value with leading FF",
+"input": "<span class='a'>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid single class attribute value with trailing FF",
+"input": "<span class='a'>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid single class attribute value with leading and trailing FF",
+"input": "<span class='a'>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid single class attribute value with leading CR",
+"input": "<span class='
a'>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid single class attribute value with trailing CR",
+"input": "<span class='a
'>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid single class attribute value with leading and trailing CR",
+"input": "<span class='
a
'>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid double class attribute value separated by space",
+"input": "<span class='a b'>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid double class attribute value separated by tab",
+"input": "<span class='a	b'>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid double class attribute value separated by LF",
+"input": "<span class='a
+b'>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid double class attribute value separated by LT",
+"input": "<span class='ab'>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid double class attribute value separated by FF",
+"input": "<span class='ab'>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid double class attribute value separated by CR",
+"input": "<span class='a
b'>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "invalid duplicated class attribute value separated by space",
+"input": "<span class='a a'>",
+"fail-unless": "invalid-attribute-value"},
+
+{"description": "invalid duplicated class attribute value separated by tab",
+"input": "<span class='a	a'>",
+"fail-unless": "invalid-attribute-value"},
+
+{"description": "invalid duplicated class attribute value separated by LF",
+"input": "<span class='a
+a'>",
+"fail-unless": "invalid-attribute-value"},
+
+{"description": "invalid duplicated class attribute value separated by LT",
+"input": "<span class='aa'>",
+"fail-unless": "invalid-attribute-value"},
+
+{"description": "invalid duplicated class attribute value separated by FF",
+"input": "<span class='aa'>",
+"fail-unless": "invalid-attribute-value"},
+
+{"description": "invalid duplicated class attribute value separated by CR",
+"input": "<span class='a
a'>",
+"fail-unless": "duplicate-value-in-token-list"},
+
+{"description": "invalid duplicated class attribute value separated by space",
+"input": "<span class='a a'>",
+"fail-unless": "duplicate-value-in-token-list"},
+
+{"description": "invalid duplicated class attribute value separated by tab",
+"input": "<span class='a	a'>",
+"fail-unless": "duplicate-value-in-token-list"},
+
+{"description": "invalid duplicated class attribute value separated by LF",
+"input": "<span class='a
+a'>",
+"fail-unless": "duplicate-value-in-token-list"},
+
+{"description": "invalid duplicated class attribute value separated by LT",
+"input": "<span class='aa'>",
+"fail-unless": "duplicate-value-in-token-list"},
+
+{"description": "invalid duplicated class attribute value separated by FF",
+"input": "<span class='aa'>",
+"fail-unless": "duplicate-value-in-token-list"},
+
+{"description": "invalid duplicated class attribute value separated by CR",
+"input": "<span class='a
a'>",
+"fail-unless": "duplicate-value-in-token-list"}
+
+]}
+
--- a/vendor/plugins/HTML5lib/testdata/validator/contenteditableattribute.test
+++ b/vendor/plugins/HTML5lib/testdata/validator/contenteditableattribute.test
@ -0,0 +1,59 @@
+{"tests": [
+
+{"description": "valid contenteditable attribute value 'true'",
+"input": "<span contenteditable=true>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid contenteditable attribute value 'TRUE'",
+"input": "<span contenteditable=TRUE>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid contenteditable attribute value 'TrUe'",
+"input": "<span contenteditable=TrUe>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid contenteditable attribute value 'false'",
+"input": "<span contenteditable=false>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid contenteditable attribute value 'FALSE'",
+"input": "<span contenteditable=FALSE>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid contenteditable attribute value 'FalSe'",
+"input": "<span contenteditable=FalSe>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid contenteditable attribute value ''",
+"input": "<span contenteditable=''>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid contenteditable attribute value (not specified)",
+"input": "<span contenteditable>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "invalid contenteditable attribute value 'foo'",
+"input": "<span contenteditable=foo>",
+"fail-unless": "invalid-attribute-value"},
+
+{"description": "invalid contenteditable attribute value '0'",
+"input": "<span contenteditable=0>",
+"fail-unless": "invalid-attribute-value"},
+
+{"description": "invalid contenteditable attribute value '1'",
+"input": "<span contenteditable=1>",
+"fail-unless": "invalid-attribute-value"},
+
+{"description": "invalid contenteditable attribute value 'yes'",
+"input": "<span contenteditable=yes>",
+"fail-unless": "invalid-attribute-value"},
+
+{"description": "invalid contenteditable attribute value 'no'",
+"input": "<span contenteditable=no>",
+"fail-unless": "invalid-attribute-value"},
+
+{"description": "invalid contenteditable attribute value 'inherit'",
+"input": "<span contenteditable=inherit>",
+"fail-unless": "invalid-attribute-value"}
+
+]}
--- a/vendor/plugins/HTML5lib/testdata/validator/contextmenuattribute.test
+++ b/vendor/plugins/HTML5lib/testdata/validator/contextmenuattribute.test
@ -0,0 +1,118 @@
+{"tests": [
+
+{"description": "contextmenu points to valid ID earlier",
+"input": "<menu id=a><span contextmenu=a>",
+"fail-if": "id-does-not-exist"},
+
+{"description": "contextmenu points to valid ID later",
+"input": "<span contextmenu=a><menu id=a>",
+"fail-if": "id-does-not-exist"},
+
+{"description": "contextmenu points to non-existent ID",
+"input": "<span contextmenu=a>",
+"fail-unless": "id-does-not-exist"},
+
+{"description": "contextmenu points to ID on non-menu element",
+"input": "<span id=a><span contextmenu=a>",
+"fail-unless": "contextmenu-must-point-to-menu"},
+
+{"description": "uppercase contextmenu points to ID on non-menu element",
+"input": "<span id=a><span CONTEXTMENU=a>",
+"fail-unless": "contextmenu-must-point-to-menu"},
+
+{"description": "valid ID 'a'",
+"input": "<span contextmenu=a>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid ID '1'",
+"input": "<span contextmenu=1>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "wacky but valid ID",
+"input": "<span contextmenu='<html><head><title>a</title></head><body><p>b</p></body></html>'>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "invalid blank ID",
+"input": "<span id>",
+"fail-unless": "attribute-value-can-not-be-blank"},
+
+{"description": "invalid blank ID with quotes",
+"input": "<span contextmenu=''>",
+"fail-unless": "attribute-value-can-not-be-blank"},
+
+{"description": "invalid ID because of leading space",
+"input": "<span contextmenu=' a'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of trailing space",
+"input": "<span contextmenu='a '>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of space in value",
+"input": "<span contextmenu='a b'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of leading tab",
+"input": "<span contextmenu='	a'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of trailing tab",
+"input": "<span contextmenu='a	'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of tab in value",
+"input": "<span contextmenu='a	b'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of leading LF",
+"input": "<span contextmenu='
+a'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of trailing LF",
+"input": "<span contextmenu='a
+'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of LF in value",
+"input": "<span contextmenu='a
+b'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of leading LT",
+"input": "<span contextmenu='a'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of trailing LT",
+"input": "<span contextmenu='a'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of LT in value",
+"input": "<span contextmenu='ab'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of leading FF",
+"input": "<span contextmenu='a'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of trailing FF",
+"input": "<span contextmenu='a'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of FF in value",
+"input": "<span contextmenu='ab'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of leading CR",
+"input": "<span contextmenu='
a'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of trailing CR",
+"input": "<span contextmenu='a
'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of CR in value",
+"input": "<span contextmenu='a
b'>",
+"fail-unless": "space-in-id"}
+
+]}
--- a/vendor/plugins/HTML5lib/testdata/validator/idattribute.test
+++ b/vendor/plugins/HTML5lib/testdata/validator/idattribute.test
@ -0,0 +1,118 @@
+{"tests": [
+
+{"description": "valid ID 'a'",
+"input": "<span id=a>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "valid ID '1'",
+"input": "<span id=1>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "wacky but valid ID",
+"input": "<span id='<html><head><title>a</title></head><body><p>b</p></body></html>'>",
+"fail-if": "invalid-attribute-value"},
+
+{"description": "invalid blank ID",
+"input": "<span id>",
+"fail-unless": "attribute-value-can-not-be-blank"},
+
+{"description": "invalid blank ID with quotes",
+"input": "<span id=''>",
+"fail-unless": "attribute-value-can-not-be-blank"},
+
+{"description": "invalid ID because of leading space",
+"input": "<span id=' a'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of trailing space",
+"input": "<span id='a '>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of space in value",
+"input": "<span id='a b'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of leading tab",
+"input": "<span id='	a'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of trailing tab",
+"input": "<span id='a	'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of tab in value",
+"input": "<span id='a	b'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of leading LF",
+"input": "<span id='
+a'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of trailing LF",
+"input": "<span id='a
+'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of LF in value",
+"input": "<span id='a
+b'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of leading LT",
+"input": "<span id='a'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of trailing LT",
+"input": "<span id='a'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of LT in value",
+"input": "<span id='ab'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of leading FF",
+"input": "<span id='a'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of trailing FF",
+"input": "<span id='a'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of FF in value",
+"input": "<span id='ab'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of leading CR",
+"input": "<span id='
a'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of trailing CR",
+"input": "<span id='a
'>",
+"fail-unless": "space-in-id"},
+
+{"description": "invalid ID because of CR in value",
+"input": "<span id='a
b'>",
+"fail-unless": "space-in-id"},
+
+{"description": "duplicate ID values",
+"input": "<span id=a><span id=a>",
+"fail-unless": "duplicate-id"},
+
+{"description": "duplicate ID values with spaces (weird but true)",
+"input": "<span id='a '><span id='a '>",
+"fail-unless": "duplicate-id"},
+
+{"description": "not duplicate ID values because spaces don't match",
+"input": "<span id=a><span id='a '>",
+"fail-if": "duplicate-id"},
+
+{"description": "not duplicate ID values because spaces don't match",
+"input": "<span id=' a'><span id='a '>",
+"fail-if": "duplicate-id"},
+
+{"description": "not duplicate ID values because case doesn't match",
+"input": "<span id=a><span id=A>",
+"fail-if": "duplicate-id"}
+
+]}
--- a/vendor/plugins/HTML5lib/testdata/validator/inputattributes.test
+++ b/vendor/plugins/HTML5lib/testdata/validator/inputattributes.test
--- a/vendor/plugins/HTML5lib/testdata/validator/starttags.test
+++ b/vendor/plugins/HTML5lib/testdata/validator/starttags.test
@ -0,0 +1,375 @@
+{"tests": [
+
+{"description": "unknown start tag <foo>",
+"input": "<foo>",
+"fail-unless": "unknown-start-tag"},
+
+{"description": "allowed start tag <code>",
+"input": "<code>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <kbd>",
+"input": "<kbd>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <aside>",
+"input": "<aside>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <datagrid>",
+"input": "<datagrid>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <font>",
+"input": "<font>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <noscript>",
+"input": "<noscript>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <style>",
+"input": "<style>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <img>",
+"input": "<img>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <title>",
+"input": "<title>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <menu>",
+"input": "<menu>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <tr>",
+"input": "<tr>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <param>",
+"input": "<param>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <li>",
+"input": "<li>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <source>",
+"input": "<source>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <tfoot>",
+"input": "<tfoot>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <th>",
+"input": "<th>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <td>",
+"input": "<td>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <dl>",
+"input": "<dl>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <blockquote>",
+"input": "<blockquote>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <dd>",
+"input": "<dd>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <abbr>",
+"input": "<abbr>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <dt>",
+"input": "<dt>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <noembed>",
+"input": "<noembed>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <p>",
+"input": "<p>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <small>",
+"input": "<small>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <meter>",
+"input": "<meter>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <em>",
+"input": "<em>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <meta>",
+"input": "<meta>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <video>",
+"input": "<video>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <div>",
+"input": "<div>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <canvas>",
+"input": "<canvas>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <sub>",
+"input": "<sub>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <section>",
+"input": "<section>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <sup>",
+"input": "<sup>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <progress>",
+"input": "<progress>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <body>",
+"input": "<body>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <base>",
+"input": "<base>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <br>",
+"input": "<br>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <address>",
+"input": "<address>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <article>",
+"input": "<article>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <strong>",
+"input": "<strong>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <legend>",
+"input": "<legend>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <event-source>",
+"input": "<event-source>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <ol>",
+"input": "<ol>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <script>",
+"input": "<script>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <caption>",
+"input": "<caption>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <dialog>",
+"input": "<dialog>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <col>",
+"input": "<col>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <h2>",
+"input": "<h2>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <h3>",
+"input": "<h3>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <h1>",
+"input": "<h1>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <h6>",
+"input": "<h6>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <h4>",
+"input": "<h4>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <h5>",
+"input": "<h5>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <header>",
+"input": "<header>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <table>",
+"input": "<table>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <span>",
+"input": "<span>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <area>",
+"input": "<area>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <dfn>",
+"input": "<dfn>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <var>",
+"input": "<var>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <cite>",
+"input": "<cite>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <thead>",
+"input": "<thead>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <head>",
+"input": "<head>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <hr>",
+"input": "<hr>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <link>",
+"input": "<link>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <datatemplate>",
+"input": "<datatemplate>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <b>",
+"input": "<b>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <colgroup>",
+"input": "<colgroup>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <ul>",
+"input": "<ul>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <del>",
+"input": "<del>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <iframe>",
+"input": "<iframe>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <pre>",
+"input": "<pre>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <figure>",
+"input": "<figure>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <ins>",
+"input": "<ins>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <tbody>",
+"input": "<tbody>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <html>",
+"input": "<html>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <nav>",
+"input": "<nav>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <details>",
+"input": "<details>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <samp>",
+"input": "<samp>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <map>",
+"input": "<map>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <nest>",
+"input": "<nest>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <object>",
+"input": "<object>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <a>",
+"input": "<a>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <footer>",
+"input": "<footer>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <i>",
+"input": "<i>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <m>",
+"input": "<m>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <rule>",
+"input": "<rule>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <q>",
+"input": "<q>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <command>",
+"input": "<command>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <time>",
+"input": "<time>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <audio>",
+"input": "<audio>",
+"fail-if": "unknown-start-tag"},
+
+{"description": "allowed start tag <bdo>",
+"input": "<bdo>",
+"fail-if": "unknown-start-tag"}
+
+]}
--- a/vendor/plugins/HTML5lib/tests/preamble.rb
+++ b/vendor/plugins/HTML5lib/tests/preamble.rb
@ -1,81 +1,70 @@
-require 'test/unit'
-
-HTML5LIB_BASE = File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__)))) 
-
-if File.exists?(File.join(HTML5LIB_BASE, 'testdata'))
-  TESTDATA_DIR = File.join(HTML5LIB_BASE, 'testdata')
-else
-  TESTDATA_DIR = File.join(File.dirname(File.dirname(File.expand_path(__FILE__))), 'testdata')
-end
-
-$:.unshift File.join(File.dirname(File.dirname(__FILE__)),'lib')
-
-$:.unshift File.dirname(__FILE__)
-
-def html5lib_test_files(subdirectory)
-  Dir[File.join(TESTDATA_DIR, subdirectory, '*.*')]
-end
-
-begin
-  require 'rubygems'
-  require 'json'
-rescue LoadError
-  class JSON
-    def self.parse json
-      json.gsub!(/"\s*:/, '"=>')
-      json.gsub!(/\\u[0-9a-fA-F]{4}/) {|x| [x[2..-1].to_i(16)].pack('U')}
-      null = nil
-      eval json
-    end
-  end
-end
-
-module HTML5lib
-  module TestSupport
-    def self.startswith?(a, b)
-      b[0... a.length] == a
-    end
-
-    def self.parseTestcase(data)
-      innerHTML = nil
-      input = []
-      output = []
-      errors = []
-      currentList = input
-      data.split(/\n/).each do |line|
-        if !line.empty? and !startswith?("#errors", line) and
-          !startswith?("#document", line) and
-          !startswith?("#data", line) and
-          !startswith?("#document-fragment", line)
-
-          if currentList == output and startswith?("|", line)
-            currentList.push(line[2..-1])
-          else
-            currentList.push(line)
-          end
-        elsif line == "#errors"
-          currentList = errors
-        elsif line == "#document" or startswith?("#document-fragment", line)
-          if startswith?("#document-fragment", line)
-            innerHTML = line[19..-1]
-            raise AssertionError unless innerHTML
-          end
-          currentList = output
-        end
-      end
-      return innerHTML, input.join("\n"), output.join("\n"), errors
-    end
-
-    # convert the output of str(document) to the format used in the testcases
-    def convertTreeDump(treedump)
-      treedump.split(/\n/)[1..-1].map { |line| (line.length > 2 and line[0] == ?|) ? line[3..-1] : line }.join("\n")
-    end
-
-    def sortattrs(output)
-      output.gsub(/^(\s+)\w+=.*(\n\1\w+=.*)+/) do |match|
-         match.split("\n").sort.join("\n")
-      end
-    end
-
-  end
-end
+require 'test/unit'
+
+HTML5_BASE = File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__)))) 
+
+if File.exists?(File.join(HTML5_BASE, 'testdata'))
+  TESTDATA_DIR = File.join(HTML5_BASE, 'testdata')
+else
+  TESTDATA_DIR = File.join(File.dirname(File.dirname(File.expand_path(__FILE__))), 'testdata')
+end
+
+$:.unshift File.join(File.dirname(File.dirname(__FILE__)),'lib')
+
+$:.unshift File.dirname(__FILE__)
+
+def html5_test_files(subdirectory)
+  Dir[File.join(TESTDATA_DIR, subdirectory, '*.*')]
+end
+
+require 'rubygems'
+require 'json'
+
+module HTML5
+  module TestSupport
+    # convert the output of str(document) to the format used in the testcases
+    def convertTreeDump(treedump)
+      treedump.split(/\n/)[1..-1].map { |line| (line.length > 2 and line[0] == ?|) ? line[3..-1] : line }.join("\n")
+    end
+
+    def sortattrs(output)
+      output.gsub(/^(\s+)\w+=.*(\n\1\w+=.*)+/) do |match|
+         match.split("\n").sort.join("\n")
+      end
+    end
+
+    class TestData
+      include Enumerable
+
+      def initialize(filename, sections)
+        @f = open(filename)
+        @sections = sections
+      end
+    
+      def each
+        data = {}
+        key=nil
+        @f.each_line do |line|
+          if line[0] == ?# and @sections.include?(line[1..-2])
+            heading = line[1..-2]
+            if data.any? and heading == @sections[0]
+              data[key].chomp!  #Remove trailing newline
+              yield normaliseOutput(data)
+              data = {}
+            end
+            key = heading
+            data[key]=""
+          elsif key
+            data[key] += line
+          end
+        end
+        yield normaliseOutput(data) if data
+      end
+        
+      def normaliseOutput(data)
+        #Remove trailing newlines
+        data.keys.each { |key| data[key].chomp! }
+        @sections.map {|heading| data[heading]}
+      end
+    end
+  end
+end
--- a/vendor/plugins/HTML5lib/tests/test_encoding.rb
+++ b/vendor/plugins/HTML5lib/tests/test_encoding.rb
@ -1,8 +1,10 @@
 require File.join(File.dirname(__FILE__), 'preamble')

-require 'html5lib/inputstream'
+require 'html5/inputstream'

 class Html5EncodingTestCase < Test::Unit::TestCase
+  include HTML5
+  include TestSupport

  begin
    require 'rubygems'
@ -10,23 +12,21 @@ class Html5EncodingTestCase < Test::Unit::TestCase

    def test_chardet
      file = File.open(File.join(TESTDATA_DIR, 'encoding', 'chardet', 'test_big5.txt'), 'r')
-      stream = HTML5lib::HTMLInputStream.new(file, :chardet => true)
+      stream = HTML5::HTMLInputStream.new(file, :chardet => true)
      assert_equal 'big5', stream.char_encoding.downcase
    rescue LoadError
      puts "chardet not found, skipping chardet tests"
    end
  end

-  html5lib_test_files('encoding').each do |test_file|        
+  html5_test_files('encoding').each do |test_file|        
    test_name = File.basename(test_file).sub('.dat', '').tr('-', '')

-    File.read(test_file).split("#data\n").each_with_index do |data, index|
-      next if data.empty?
-      input, encoding = data.split(/\n#encoding\s+/, 2)
-      encoding = encoding.split[0]
+    TestData.new(test_file, %w(data encoding)).
+      each_with_index do |(input, encoding), index|

      define_method 'test_%s_%d' % [ test_name, index + 1 ] do
-        stream = HTML5lib::HTMLInputStream.new(input, :chardet => false)
+        stream = HTML5::HTMLInputStream.new(input, :chardet => false)
        assert_equal encoding.downcase, stream.char_encoding.downcase, input
      end
    end
--- a/Show more
+++ b/Show more