Ruby 1.9 Compatibility

Completely removed the html5lib sanitizer. Fixed the string-handling to work in both Ruby 1.8.x and 1.9.2. There are still, inexplicably, two functional tests that fail. But the rest seems to work quite well.
2009-11-30 16:28:18 -06:00 · 2009-11-30 16:28:18 -06:00 · a6429f8c22
commit a6429f8c22
parent 79c8572053
142 changed files with 519 additions and 843 deletions
--- a/attic/vendor/plugins/HTML5lib/lib/html5/cli.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/cli.rb
@ -0,0 +1,248 @@
+$:.unshift File.dirname(__FILE__), 'lib'
+require 'html5'
+require 'ostruct'
+require 'optparse'
+
+module HTML5::CLI
+
+  def self.parse_opts argv
+    options = OpenStruct.new
+    options.profile        = false
+    options.time           = false
+    options.output         = :html
+    options.treebuilder    = 'simpletree'
+    options.error          = false
+    options.encoding       = false
+    options.parsemethod    = :parse
+    options.serializer     = {
+      :encoding            => 'utf-8',
+      :omit_optional_tags  => false,
+      :inject_meta_charset => false
+    }
+
+    opts = OptionParser.new do |opts|
+      opts.separator ""
+      opts.separator "Parse Options:"
+
+      opts.on("-b", "--treebuilder NAME") do |treebuilder|
+        options.treebuilder = treebuilder
+      end
+
+      opts.on("-f", "--fragment CONTAINER", "Parse as a fragment") do |container|
+        options.parsemethod = :parse_fragment
+        options.container = container if container
+      end
+
+      opts.separator ""
+      opts.separator "Filter Options:"
+
+      opts.on("--[no-]inject-meta-charset", "inject <meta charset>") do |inject|
+        options.serializer[:inject_meta_charset] = inject
+      end
+
+      opts.on("--[no-]strip-whitespace", "strip unnecessary whitespace") do |strip|
+        options.serializer[:strip_whitespace] = strip
+      end
+
+      opts.on("--[no-]sanitize", "escape unsafe tags") do |sanitize|
+        options.serializer[:sanitize] = sanitize
+      end
+
+      opts.separator ""
+      opts.separator "Output Options:"
+
+      opts.on("--tree", "output as debug tree") do |tree|
+        options.output = :tree
+      end
+
+      opts.on("-x", "--xml", "output as xml") do |xml|
+        options.output = :xml
+        options.treebuilder = "rexml"
+      end
+
+      opts.on("--[no-]html", "Output as html") do |html|
+        options.output = (html ? :html : nil)
+      end
+
+      opts.on("--hilite", "Output as formatted highlighted code.") do |hilite|
+        options.output = :hilite
+      end
+
+      opts.on("-e", "--error", "Print a list of parse errors") do |error|
+        options.error = error
+      end
+
+      opts.separator ""
+      opts.separator "Serialization Options:"
+
+      opts.on("--[no-]omit-optional-tags", "Omit optional tags") do |omit|
+        options.serializer[:omit_optional_tags] = omit
+      end
+
+      opts.on("--[no-]quote-attr-values", "Quote attribute values") do |quote|
+        options.serializer[:quote_attr_values] = quote
+      end
+
+      opts.on("--[no-]use-best-quote-char", "Use best quote character") do |best|
+        options.serializer[:use_best_quote_char] = best
+      end
+
+      opts.on("--quote-char C", "Use specified quote character") do |c|
+        options.serializer[:quote_char] = c
+      end
+
+      opts.on("--[no-]minimize-boolean-attributes", "Minimize boolean attributes") do |min|
+        options.serializer[:minimize_boolean_attributes] = min
+      end
+
+      opts.on("--[no-]use-trailing-solidus", "Use trailing solidus") do |slash|
+        options.serializer[:use_trailing_solidus] = slash
+      end
+
+      opts.on("--[no-]escape-lt-in-attrs", "Escape less than signs in attribute values") do |lt|
+        options.serializer[:escape_lt_in_attrs] = lt
+      end
+
+      opts.on("--[no-]escape-rcdata", "Escape rcdata element values") do |rcdata|
+        options.serializer[:escape_rcdata] = rcdata
+      end
+
+      opts.separator ""
+      opts.separator "Other Options:"
+
+      opts.on("-p", "--[no-]profile", "Profile the run") do |profile|
+        options.profile = profile
+      end
+
+      opts.on("-t", "--[no-]time", "Time the run") do |time|
+        options.time = time
+      end
+
+      opts.on("-c", "--[no-]encoding", "Print character encoding used") do |encoding|
+        options.encoding = encoding
+      end
+
+      opts.on_tail("-h", "--help", "Show this message") do
+        puts opts
+        exit
+      end
+
+
+    end
+    opts.parse!(argv)
+    options
+  end
+
+  def self.open_input f
+    if f
+      begin
+        if f[0..6] == 'http://'
+          require 'open-uri'
+          f = URI.parse(f).open
+          encoding = f.charset
+        elsif f == '-'
+          f = $stdin
+        else
+          f = open(f)
+        end
+      rescue
+      end
+    else
+      $stderr.write("No filename provided. Use -h for help\n")
+      exit(1)
+    end
+    f
+  end
+
+  def self.parse(opts, args)
+    encoding = nil
+
+    f = open_input args.last
+
+    require 'html5/treebuilders'
+    treebuilder = HTML5::TreeBuilders[opts.treebuilder]
+
+    if opts.output == :xml
+      require 'html5/liberalxmlparser'
+      p = HTML5::XMLParser.new(:tree=>treebuilder)
+    else
+      require 'html5/html5parser'
+      p = HTML5::HTMLParser.new(:tree=>treebuilder)
+    end
+
+    if opts.parsemethod == :parse
+      args = [f, encoding]
+    else
+      args = [f, (opts.container || 'div'), encoding]
+    end
+
+    if opts.profile
+      require 'profiler'
+      Profiler__::start_profile
+      p.send(opts.parsemethod, *args)
+      Profiler__::stop_profile
+      Profiler__::print_profile($stderr)
+    elsif opts.time
+      require 'time' # TODO: switch to benchmark
+      t0 = Time.new
+      document = p.send(opts.parsemethod, *args)
+      t1 = Time.new
+      print_output(p, document, opts)
+      t2 = Time.new
+      puts "\n\nRun took: #{t1-t0}s (plus #{t2-t1}s to print the output)"
+    else
+      document = p.send(opts.parsemethod, *args)
+      print_output(p, document, opts)
+    end
+  end
+
+  def self.print_output(parser, document, opts)
+    puts "Encoding: #{parser.tokenizer.stream.char_encoding}" if opts.encoding
+
+    case opts.output
+    when :xml
+      print document
+    when :html
+      require 'html5/treewalkers'
+      tokens = HTML5::TreeWalkers[opts.treebuilder].new(document)
+      require 'html5/serializer'
+      puts HTML5::HTMLSerializer.serialize(tokens, opts.serializer)
+    when :hilite
+      print document.hilite
+    when :tree
+      document = [document] unless document.respond_to?(:each)
+      document.each {|fragment| puts parser.tree.testSerializer(fragment)}
+    end
+
+    if opts.error
+      errList=[]
+      for pos, errorcode, datavars in parser.errors
+        formatstring = HTML5::E[errorcode] || 'Unknown error "%(errorcode)"'
+        message = PythonicTemplate.new(formatstring).to_s(datavars)
+        errList << "Line #{pos[0]} Col #{pos[1]} " + message
+      end
+      $stdout.write("\nParse errors:\n" + errList.join("\n")+"\n")
+    end
+  end
+
+  class PythonicTemplate
+    # convert Python format string into a Ruby string, ready to eval
+    def initialize format
+      @format = format
+      @format.gsub!('"', '\\"')
+      @format.gsub!(/%\((\w+)\)/, '#{@_\1}')
+      @format = '"' + @format + '"'
+    end
+
+    # evaluate string
+    def to_s(vars=nil)
+      vars.each {|var,value| eval "@_#{var}=#{value.dump}"} if vars
+      eval @format
+    end
+  end
+
+  def self.run
+    options = parse_opts ARGV
+    parse options, ARGV
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/constants.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/constants.rb
--- a/attic/vendor/plugins/HTML5lib/lib/html5/filters/base.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/filters/base.rb
@ -0,0 +1,10 @@
+require 'delegate'
+require 'enumerator'
+
+module HTML5
+  module Filters
+    class Base < SimpleDelegator
+      include Enumerable
+    end
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/filters/inject_meta_charset.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/filters/inject_meta_charset.rb
@ -0,0 +1,82 @@
+require 'html5/filters/base'
+
+module HTML5
+  module Filters
+    class InjectMetaCharset < Base
+      def initialize(source, encoding)
+        super(source)
+        @encoding = encoding
+      end
+
+      def each
+        state = :pre_head
+        meta_found = @encoding.nil?
+        pending = []
+
+        __getobj__.each do |token|
+          case token[:type]
+          when :StartTag
+            state = :in_head if token[:name].downcase == "head"
+
+          when :EmptyTag
+            if token[:name].downcase == "meta"
+              # replace charset with actual encoding
+              token[:data].each_with_index do |(name, value), index|
+                if name == 'charset'
+                  token[:data][index][1] = @encoding
+                  meta_found = true
+                end
+              end
+
+              # replace charset with actual encoding
+              has_http_equiv_content_type = false
+              content_index = -1
+              token[:data].each_with_index do |(name, value), i|
+                if name.downcase == 'charset'
+                  token[:data][i] = ['charset', @encoding]
+                  meta_found = true
+                  break
+                elsif name == 'http-equiv' and value.downcase == 'content-type'
+                  has_http_equiv_content_type = true
+                elsif name == 'content'
+                  content_index = i
+                end
+              end
+
+              if !meta_found
+                if has_http_equiv_content_type && content_index >= 0
+                  token[:data][content_index][1] = 'text/html; charset=%s' % @encoding
+                  meta_found = true
+                end
+              end
+
+            elsif token[:name].downcase == "head" && !meta_found
+              # insert meta into empty head
+              yield :type => :StartTag, :name => "head", :data => token[:data]
+              yield :type => :EmptyTag, :name => "meta", :data => [["charset", @encoding]]
+              yield :type => :EndTag,   :name => "head"
+              meta_found = true
+              next
+            end
+
+          when :EndTag
+            if token[:name].downcase == "head" && pending.any?
+              # insert meta into head (if necessary) and flush pending queue
+              yield pending.shift
+              yield :type => :EmptyTag, :name => "meta", :data => [["charset", @encoding]] if !meta_found
+              yield pending.shift while pending.any?
+              meta_found = true
+              state = :post_head
+            end
+          end
+
+          if state == :in_head
+            pending << token
+          else
+            yield token
+          end
+        end
+      end
+    end
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/filters/iso639codes.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/filters/iso639codes.rb
@ -0,0 +1,752 @@
+# borrowed from feedvalidator, original copyright license is
+#
+# Copyright (c) 2002-2006, Sam Ruby, Mark Pilgrim, Joseph Walton, and Phil Ringnalda
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+ISO_LANG = {
+   'aa' => 'Afar',
+   'ab' => 'Abkhazian',
+   'ae' => 'Avestan',
+   'af' => 'Afrikaans',
+   'ak' => 'Akan',
+   'am' => 'Amharic',
+   'an' => 'Aragonese',
+   'ar' => 'Arabic',
+   'as' => 'Assamese',
+   'av' => 'Avaric',
+   'ay' => 'Aymara',
+   'az' => 'Azerbaijani',
+   'ba' => 'Bashkir',
+   'be' => 'Byelorussian',
+   'bg' => 'Bulgarian',
+   'bh' => 'Bihari',
+   'bi' => 'Bislama',
+   'bm' => 'Bambara',
+   'bn' => 'Bengali;Bangla',
+   'bo' => 'Tibetan',
+   'br' => 'Breton',
+   'bs' => 'Bosnian',
+   'ca' => 'Catalan',
+   'ce' => 'Chechen',
+   'ch' => 'Chamorro',
+   'co' => 'Corsican',
+   'cr' => 'Cree',
+   'cs' => 'Czech',
+   'cu' => 'Church Slavic',
+   'cv' => 'Chuvash',
+   'cy' => 'Welsh',
+   'da' => 'Danish',
+   'de' => 'German',
+   'dv' => 'Divehi',
+   'dz' => 'Dzongkha',
+   'ee' => 'Ewe',
+   'el' => 'Greek',
+   'en' => 'English',
+   'eo' => 'Esperanto',
+   'es' => 'Spanish',
+   'et' => 'Estonian',
+   'eu' => 'Basque',
+   'fa' => 'Persian (Farsi)',
+   'ff' => 'Fulah',
+   'fi' => 'Finnish',
+   'fj' => 'Fiji',
+   'fo' => 'Faroese',
+   'fr' => 'French',
+   'fy' => 'Frisian, Western',
+   'ga' => 'Irish',
+   'gd' => 'Scots Gaelic',
+   'gl' => 'Galician',
+   'gn' => 'Guarani',
+   'gu' => 'Gujarati',
+   'gv' => 'Manx',
+   'ha' => 'Hausa',
+   'he' => 'Hebrew',
+   'hi' => 'Hindi',
+   'ho' => 'Hiri Motu',
+   'hr' => 'Croatian',
+   'ht' => 'Haitian',
+   'hu' => 'Hungarian',
+   'hy' => 'Armenian',
+   'hz' => 'Herero',
+   'ia' => 'Interlingua',
+   'id' => 'Indonesian',
+   'ie' => 'Interlingue',
+   'ig' => 'Igbo',
+   'ii' => 'Sichuan Yi',
+   'ik' => 'Inupiak',
+   'io' => 'Ido',
+   'is' => 'Icelandic',
+   'it' => 'Italian',
+   'iu' => 'Inuktitut',
+   'ja' => 'Japanese',
+   'jv' => 'Javanese',
+   'ka' => 'Georgian',
+   'kg' => 'Kongo',
+   'ki' => 'Kikuyu; Gikuyu',
+   'kj' => 'Kuanyama; Kwanyama',
+   'kk' => 'Kazakh',
+   'kl' => 'Greenlandic',
+   'km' => 'Cambodian',
+   'kn' => 'Kannada',
+   'ko' => 'Korean',
+   'kr' => 'Kanuri',
+   'ks' => 'Kashmiri',
+   'ku' => 'Kurdish',
+   'kv' => 'Komi',
+   'kw' => 'Cornish',
+   'ky' => 'Kirghiz',
+   'la' => 'Latin',
+   'lb' => 'Letzeburgesch; Luxembourgish',
+   'lg' => 'Ganda',
+   'li' => 'Limburgan; Limburger, Limburgish',
+   'ln' => 'Lingala',
+   'lo' => 'Lao',
+   'lt' => 'Lithuanian',
+   'lu' => 'Luba-Katanga',
+   'lv' => 'Latvian',
+   'mg' => 'Malagasy',
+   'mh' => 'Marshallese',
+   'mi' => 'Maori',
+   'mk' => 'Macedonian',
+   'ml' => 'Malayalam',
+   'mn' => 'Mongolian',
+   'mo' => 'Moldavian',
+   'mr' => 'Marathi',
+   'ms' => 'Malay',
+   'mt' => 'Maltese',
+   'my' => 'Burmese',
+   'na' => 'Nauru',
+   'nb' => 'Norwegian Bokmal',
+   'nd' => 'Ndebele, North',
+   'ne' => 'Nepali',
+   'ng' => 'Ndonga',
+   'nl' => 'Dutch',
+   'nn' => 'Norwegian Nynorsk',
+   'no' => 'Norwegian',
+   'nr' => 'Ndebele, South',
+   'nv' => 'Navaho; Navajo',
+   'ny' => 'Chewa; Chichewa; Nyanha',
+   'oc' => 'Occitan',
+   'oj' => 'Ojibwa',
+   'om' => 'Afan (Oromo)',
+   'or' => 'Oriya',
+   'os' => 'Ossetian; Ossetic',
+   'pa' => 'Punjabi',
+   'pi' => 'Pali',
+   'pl' => 'Polish',
+   'ps' => 'Pushto',
+   'pt' => 'Portuguese',
+   'qu' => 'Quechua',
+   'rm' => 'Rhaeto-Romance',
+   'rn' => 'Kurundi',
+   'ro' => 'Romanian',
+   'ru' => 'Russian',
+   'rw' => 'Kinyarwanda',
+   'sa' => 'Sanskrit',
+   'sc' => 'Sardinian',
+   'sd' => 'Sindhi',
+   'se' => 'Northern Sami',
+   'sg' => 'Sangho',
+   'sh' => 'Serbo-Croatian',
+   'si' => 'Singhalese',
+   'sk' => 'Slovak',
+   'sl' => 'Slovenian',
+   'sm' => 'Samoan',
+   'sn' => 'Shona',
+   'so' => 'Somali',
+   'sq' => 'Albanian',
+   'sr' => 'Serbian',
+   'ss' => 'Swati',
+   'st' => 'Sotho, Southern',
+   'su' => 'Sundanese',
+   'sv' => 'Swedish',
+   'sw' => 'Swahili',
+   'ta' => 'Tamil',
+   'te' => 'Telugu',
+   'tg' => 'Tajik',
+   'th' => 'Thai',
+   'ti' => 'Tigrinya',
+   'tk' => 'Turkmen',
+   'tl' => 'Tagalog',
+   'tn' => 'Tswana',
+   'to' => 'Tonga',
+   'tr' => 'Turkish',
+   'ts' => 'Tsonga',
+   'tt' => 'Tatar',
+   'tw' => 'Twi',
+   'ty' => 'Tahitian',
+   'ug' => 'Uigur',
+   'uk' => 'Ukrainian',
+   'ur' => 'Urdu',
+   'uz' => 'Uzbek',
+   've' => 'Venda',
+   'vi' => 'Vietnamese',
+   'vo' => 'Volapuk',
+   'wa' => 'Walloon',
+   'wo' => 'Wolof',
+   'xh' => 'Xhosa',
+   'yi' => 'Yiddish',
+   'yo' => 'Yoruba',
+   'za' => 'Zhuang',
+   'zh' => 'Chinese',
+   'zu' => 'Zulu',
+   'x'  => 'a user-defined language',
+   'xx' => 'a user-defined language',
+   
+   'abk' => 'Abkhazian',
+   'ace' => 'Achinese',
+   'ach' => 'Acoli',
+   'ada' => 'Adangme',
+   'ady' => 'Adygei',
+   'ady' => 'Adyghe',
+   'aar' => 'Afar',
+   'afh' => 'Afrihili',
+   'afr' => 'Afrikaans',
+   'afa' => 'Afro-Asiatic (Other)',
+   'ain' => 'Ainu',
+   'aka' => 'Akan',
+   'akk' => 'Akkadian',
+   'alb' => 'Albanian',
+   'sqi' => 'Albanian',
+   'gws' => 'Alemanic',
+   'ale' => 'Aleut',
+   'alg' => 'Algonquian languages',
+   'tut' => 'Altaic (Other)',
+   'amh' => 'Amharic',
+   'anp' => 'Angika',
+   'apa' => 'Apache languages',
+   'ara' => 'Arabic',
+   'arg' => 'Aragonese',
+   'arc' => 'Aramaic',
+   'arp' => 'Arapaho',
+   'arn' => 'Araucanian',
+   'arw' => 'Arawak',
+   'arm' => 'Armenian',
+   'hye' => 'Armenian',
+   'rup' => 'Aromanian',
+   'art' => 'Artificial (Other)',
+   'asm' => 'Assamese',
+   'ast' => 'Asturian',
+   'ath' => 'Athapascan languages',
+   'aus' => 'Australian languages',
+   'map' => 'Austronesian (Other)',
+   'ava' => 'Avaric',
+   'ave' => 'Avestan',
+   'awa' => 'Awadhi',
+   'aym' => 'Aymara',
+   'aze' => 'Azerbaijani',
+   'ast' => 'Bable',
+   'ban' => 'Balinese',
+   'bat' => 'Baltic (Other)',
+   'bal' => 'Baluchi',
+   'bam' => 'Bambara',
+   'bai' => 'Bamileke languages',
+   'bad' => 'Banda',
+   'bnt' => 'Bantu (Other)',
+   'bas' => 'Basa',
+   'bak' => 'Bashkir',
+   'baq' => 'Basque',
+   'eus' => 'Basque',
+   'btk' => 'Batak (Indonesia)',
+   'bej' => 'Beja',
+   'bel' => 'Belarusian',
+   'bem' => 'Bemba',
+   'ben' => 'Bengali',
+   'ber' => 'Berber (Other)',
+   'bho' => 'Bhojpuri',
+   'bih' => 'Bihari',
+   'bik' => 'Bikol',
+   'byn' => 'Bilin',
+   'bin' => 'Bini',
+   'bis' => 'Bislama',
+   'byn' => 'Blin',
+   'nob' => 'Bokmal, Norwegian',
+   'bos' => 'Bosnian',
+   'bra' => 'Braj',
+   'bre' => 'Breton',
+   'bug' => 'Buginese',
+   'bul' => 'Bulgarian',
+   'bua' => 'Buriat',
+   'bur' => 'Burmese',
+   'mya' => 'Burmese',
+   'cad' => 'Caddo',
+   'car' => 'Carib',
+   'spa' => 'Castilian',
+   'cat' => 'Catalan',
+   'cau' => 'Caucasian (Other)',
+   'ceb' => 'Cebuano',
+   'cel' => 'Celtic (Other)',
+   'cai' => 'Central American Indian (Other)',
+   'chg' => 'Chagatai',
+   'cmc' => 'Chamic languages',
+   'cha' => 'Chamorro',
+   'che' => 'Chechen',
+   'chr' => 'Cherokee',
+   'nya' => 'Chewa',
+   'chy' => 'Cheyenne',
+   'chb' => 'Chibcha',
+   'nya' => 'Chichewa',
+   'chi' => 'Chinese',
+   'zho' => 'Chinese',
+   'chn' => 'Chinook jargon',
+   'chp' => 'Chipewyan',
+   'cho' => 'Choctaw',
+   'zha' => 'Chuang',
+   'chu' => 'Church Slavic; Church Slavonic; Old Church Slavonic; Old Church Slavic; Old Bulgarian',
+   'chk' => 'Chuukese',
+   'chv' => 'Chuvash',
+   'nwc' => 'Classical Nepal Bhasa; Classical Newari; Old Newari',
+   'cop' => 'Coptic',
+   'cor' => 'Cornish',
+   'cos' => 'Corsican',
+   'cre' => 'Cree',
+   'mus' => 'Creek',
+   'crp' => 'Creoles and pidgins(Other)',
+   'cpe' => 'Creoles and pidgins, English-based (Other)',
+   'cpf' => 'Creoles and pidgins, French-based (Other)',
+   'cpp' => 'Creoles and pidgins, Portuguese-based (Other)',
+   'crh' => 'Crimean Tatar; Crimean Turkish',
+   'scr' => 'Croatian',
+   'hrv' => 'Croatian',
+   'cus' => 'Cushitic (Other)',
+   'cze' => 'Czech',
+   'ces' => 'Czech',
+   'dak' => 'Dakota',
+   'dan' => 'Danish',
+   'dar' => 'Dargwa',
+   'day' => 'Dayak',
+   'del' => 'Delaware',
+   'din' => 'Dinka',
+   'div' => 'Divehi',
+   'doi' => 'Dogri',
+   'dgr' => 'Dogrib',
+   'dra' => 'Dravidian (Other)',
+   'dua' => 'Duala',
+   'dut' => 'Dutch',
+   'nld' => 'Dutch',
+   'dum' => 'Dutch, Middle (ca. 1050-1350)',
+   'dyu' => 'Dyula',
+   'dzo' => 'Dzongkha',
+   'efi' => 'Efik',
+   'egy' => 'Egyptian (Ancient)',
+   'eka' => 'Ekajuk',
+   'elx' => 'Elamite',
+   'eng' => 'English',
+   'enm' => 'English, Middle (1100-1500)',
+   'ang' => 'English, Old (ca.450-1100)',
+   'myv' => 'Erzya',
+   'epo' => 'Esperanto',
+   'est' => 'Estonian',
+   'ewe' => 'Ewe',
+   'ewo' => 'Ewondo',
+   'fan' => 'Fang',
+   'fat' => 'Fanti',
+   'fao' => 'Faroese',
+   'fij' => 'Fijian',
+   'fil' => 'Filipino; Pilipino',
+   'fin' => 'Finnish',
+   'fiu' => 'Finno-Ugrian (Other)',
+   'fon' => 'Fon',
+   'fre' => 'French',
+   'fra' => 'French',
+   'frm' => 'French, Middle (ca.1400-1600)',
+   'fro' => 'French, Old (842-ca.1400)',
+   'frs' => 'Frisian, Eastern',
+   'fry' => 'Frisian, Western',
+   'fur' => 'Friulian',
+   'ful' => 'Fulah',
+   'gaa' => 'Ga',
+   'gla' => 'Gaelic',
+   'glg' => 'Gallegan',
+   'lug' => 'Ganda',
+   'gay' => 'Gayo',
+   'gba' => 'Gbaya',
+   'gez' => 'Geez',
+   'geo' => 'Georgian',
+   'kat' => 'Georgian',
+   'ger' => 'German',
+   'deu' => 'German',
+   'nds' => 'German, Low',
+   'gmh' => 'German, Middle High (ca.1050-1500)',
+   'goh' => 'German, Old High (ca.750-1050)',
+   'gem' => 'Germanic (Other)',
+   'kik' => 'Gikuyu',
+   'gil' => 'Gilbertese',
+   'gon' => 'Gondi',
+   'gor' => 'Gorontalo',
+   'got' => 'Gothic',
+   'grb' => 'Grebo',
+   'grc' => 'Greek, Ancient (to 1453)',
+   'gre' => 'Greek, Modern (1453-)',
+   'ell' => 'Greek, Modern (1453-)',
+   'kal' => 'Greenlandic; Kalaallisut',
+   'grn' => 'Guarani',
+   'guj' => 'Gujarati',
+   'gwi' => 'Gwich\'in',
+   'hai' => 'Haida',
+   'hat' => 'Haitian',
+   'hau' => 'Hausa',
+   'haw' => 'Hawaiian',
+   'heb' => 'Hebrew',
+   'her' => 'Herero',
+   'hil' => 'Hiligaynon',
+   'him' => 'Himachali',
+   'hin' => 'Hindi',
+   'hmo' => 'Hiri Motu',
+   'hit' => 'Hittite',
+   'hmn' => 'Hmong',
+   'hun' => 'Hungarian',
+   'hup' => 'Hupa',
+   'iba' => 'Iban',
+   'ice' => 'Icelandic',
+   'isl' => 'Icelandic',
+   'ido' => 'Ido',
+   'ibo' => 'Igbo',
+   'ijo' => 'Ijo',
+   'ilo' => 'Iloko',
+   'smn' => 'Inari Sami',
+   'inc' => 'Indic (Other)',
+   'ine' => 'Indo-European (Other)',
+   'ind' => 'Indonesian',
+   'inh' => 'Ingush',
+   'ina' => 'Interlingua (International Auxiliary Language Association)',
+   'ile' => 'Interlingue',
+   'iku' => 'Inuktitut',
+   'ipk' => 'Inupiaq',
+   'ira' => 'Iranian (Other)',
+   'gle' => 'Irish',
+   'mga' => 'Irish, Middle (900-1200)',
+   'sga' => 'Irish, Old (to 900)',
+   'iro' => 'Iroquoian languages',
+   'ita' => 'Italian',
+   'jpn' => 'Japanese',
+   'jav' => 'Javanese',
+   'jrb' => 'Judeo-Arabic',
+   'jpr' => 'Judeo-Persian',
+   'kbd' => 'Kabardian',
+   'kab' => 'Kabyle',
+   'kac' => 'Kachin',
+   'kal' => 'Kalaallisut',
+   'xal' => 'Kalmyk',
+   'kam' => 'Kamba',
+   'kan' => 'Kannada',
+   'kau' => 'Kanuri',
+   'krc' => 'Karachay-Balkar',
+   'kaa' => 'Kara-Kalpak',
+   'krl' => 'Karelian',
+   'kar' => 'Karen',
+   'kas' => 'Kashmiri',
+   'csb' => 'Kashubian',
+   'kaw' => 'Kawi',
+   'kaz' => 'Kazakh',
+   'kha' => 'Khasi',
+   'khm' => 'Khmer',
+   'khi' => 'Khoisan (Other)',
+   'kho' => 'Khotanese',
+   'kik' => 'Kikuyu',
+   'kmb' => 'Kimbundu',
+   'kin' => 'Kinyarwanda',
+   'kir' => 'Kirghiz',
+   'tlh' => 'Klingon; tlhIngan-Hol',
+   'kom' => 'Komi',
+   'kon' => 'Kongo',
+   'kok' => 'Konkani',
+   'kor' => 'Korean',
+   'kos' => 'Kosraean',
+   'kpe' => 'Kpelle',
+   'kro' => 'Kru',
+   'kua' => 'Kuanyama',
+   'kum' => 'Kumyk',
+   'kur' => 'Kurdish',
+   'kru' => 'Kurukh',
+   'kut' => 'Kutenai',
+   'kua' => 'Kwanyama',
+   'lad' => 'Ladino',
+   'lah' => 'Lahnda',
+   'lam' => 'Lamba',
+   'lao' => 'Lao',
+   'lat' => 'Latin',
+   'lav' => 'Latvian',
+   'ltz' => 'Letzeburgesch',
+   'lez' => 'Lezghian',
+   'lim' => 'Limburgan',
+   'lin' => 'Lingala',
+   'lit' => 'Lithuanian',
+   'jbo' => 'Lojban',
+   'nds' => 'Low German',
+   'dsb' => 'Lower Sorbian',
+   'loz' => 'Lozi',
+   'lub' => 'Luba-Katanga',
+   'lua' => 'Luba-Lulua',
+   'lui' => 'Luiseno',
+   'smj' => 'Lule Sami',
+   'lun' => 'Lunda',
+   'luo' => 'Luo (Kenya and Tanzania)',
+   'lus' => 'Lushai',
+   'ltz' => 'Luxembourgish',
+   'mac' => 'Macedonian',
+   'mkd' => 'Macedonian',
+   'mad' => 'Madurese',
+   'mag' => 'Magahi',
+   'mai' => 'Maithili',
+   'mak' => 'Makasar',
+   'mlg' => 'Malagasy',
+   'may' => 'Malay',
+   'msa' => 'Malay',
+   'mal' => 'Malayalam',
+   'mlt' => 'Maltese',
+   'mnc' => 'Manchu',
+   'mdr' => 'Mandar',
+   'man' => 'Mandingo',
+   'mni' => 'Manipuri',
+   'mno' => 'Manobo languages',
+   'glv' => 'Manx',
+   'mao' => 'Maori',
+   'mri' => 'Maori',
+   'mar' => 'Marathi',
+   'chm' => 'Mari',
+   'mah' => 'Marshallese',
+   'mwr' => 'Marwari',
+   'mas' => 'Masai',
+   'myn' => 'Mayan languages',
+   'men' => 'Mende',
+   'mic' => 'Micmac',
+   'min' => 'Minangkabau',
+   'mwl' => 'Mirandese',
+   'mis' => 'Miscellaneous languages',
+   'moh' => 'Mohawk',
+   'mdf' => 'Moksha',
+   'mol' => 'Moldavian',
+   'mkh' => 'Mon-Khmer (Other)',
+   'lol' => 'Mongo',
+   'mon' => 'Mongolian',
+   'mos' => 'Mossi',
+   'mul' => 'Multiple languages',
+   'mun' => 'Munda languages',
+   'nah' => 'Nahuatl',
+   'nau' => 'Nauru',
+   'nav' => 'Navaho; Navajo',
+   'nde' => 'Ndebele, North',
+   'nbl' => 'Ndebele, South',
+   'ndo' => 'Ndonga',
+   'nap' => 'Neapolitan',
+   'nep' => 'Nepali',
+   'new' => 'Newari',
+   'nia' => 'Nias',
+   'nic' => 'Niger-Kordofanian (Other)',
+   'ssa' => 'Nilo-Saharan (Other)',
+   'niu' => 'Niuean',
+   'nog' => 'Nogai',
+   'non' => 'Norse, Old',
+   'nai' => 'North American Indian (Other)',
+   'frr' => 'Northern Frisian',
+   'sme' => 'Northern Sami',
+   'nso' => 'Northern Sotho; Pedi; Sepedi',
+   'nde' => 'North Ndebele',
+   'nor' => 'Norwegian',
+   'nob' => 'Norwegian Bokmal',
+   'nno' => 'Norwegian Nynorsk',
+   'nub' => 'Nubian languages',
+   'nym' => 'Nyamwezi',
+   'nya' => 'Nyanja',
+   'nyn' => 'Nyankole',
+   'nno' => 'Nynorsk, Norwegian',
+   'nyo' => 'Nyoro',
+   'nzi' => 'Nzima',
+   'oci' => 'Occitan (post 1500)',
+   'oji' => 'Ojibwa',
+   'ori' => 'Oriya',
+   'orm' => 'Oromo',
+   'osa' => 'Osage',
+   'oss' => 'Ossetian; Ossetic',
+   'oto' => 'Otomian languages',
+   'pal' => 'Pahlavi',
+   'pau' => 'Palauan',
+   'pli' => 'Pali',
+   'pam' => 'Pampanga',
+   'pag' => 'Pangasinan',
+   'pan' => 'Panjabi',
+   'pap' => 'Papiamento',
+   'paa' => 'Papuan (Other)',
+   'per' => 'Persian',
+   'fas' => 'Persian',
+   'peo' => 'Persian, Old (ca.600-400)',
+   'phi' => 'Philippine (Other)',
+   'phn' => 'Phoenician',
+   'pon' => 'Pohnpeian',
+   'pol' => 'Polish',
+   'por' => 'Portuguese',
+   'pra' => 'Prakrit languages',
+   'oci' => 'Provencal',
+   'pro' => 'Provencal, Old (to 1500)',
+   'pan' => 'Punjabi',
+   'pus' => 'Pushto',
+   'que' => 'Quechua',
+   'roh' => 'Raeto-Romance',
+   'raj' => 'Rajasthani',
+   'rap' => 'Rapanui',
+   'rar' => 'Rarotongan',
+   'qaa' => 'Reserved for local use',
+   'qtz' => 'Reserved for local use',
+   'roa' => 'Romance (Other)',
+   'rum' => 'Romanian',
+   'ron' => 'Romanian',
+   'rom' => 'Romany',
+   'run' => 'Rundi',
+   'rus' => 'Russian',
+   'sal' => 'Salishan languages',
+   'sam' => 'Samaritan Aramaic',
+   'smi' => 'Sami languages (Other)',
+   'smo' => 'Samoan',
+   'sad' => 'Sandawe',
+   'sag' => 'Sango',
+   'san' => 'Sanskrit',
+   'sat' => 'Santali',
+   'srd' => 'Sardinian',
+   'sas' => 'Sasak',
+   'nds' => 'Saxon, Low',
+   'sco' => 'Scots',
+   'gla' => 'Scottish Gaelic',
+   'sel' => 'Selkup',
+   'sem' => 'Semitic (Other)',
+   'nso' => 'Sepedi; Northern Sotho; Pedi',
+   'scc' => 'Serbian',
+   'srp' => 'Serbian',
+   'srr' => 'Serer',
+   'shn' => 'Shan',
+   'sna' => 'Shona',
+   'iii' => 'Sichuan Yi',
+   'scn' => 'Sicilian',
+   'sid' => 'Sidamo',
+   'sgn' => 'Sign languages',
+   'bla' => 'Siksika',
+   'snd' => 'Sindhi',
+   'sin' => 'Sinhalese',
+   'sit' => 'Sino-Tibetan (Other)',
+   'sio' => 'Siouan languages',
+   'sms' => 'Skolt Sami',
+   'den' => 'Slave (Athapascan)',
+   'sla' => 'Slavic (Other)',
+   'slo' => 'Slovak',
+   'slk' => 'Slovak',
+   'slv' => 'Slovenian',
+   'sog' => 'Sogdian',
+   'som' => 'Somali',
+   'son' => 'Songhai',
+   'snk' => 'Soninke',
+   'wen' => 'Sorbian languages',
+   'nso' => 'Sotho, Northern',
+   'sot' => 'Sotho, Southern',
+   'sai' => 'South American Indian (Other)',
+   'alt' => 'Southern Altai',
+   'sma' => 'Southern Sami',
+   'nbl' => 'South Ndebele',
+   'spa' => 'Spanish',
+   'srn' => 'Sranan Tongo',
+   'suk' => 'Sukuma',
+   'sux' => 'Sumerian',
+   'sun' => 'Sundanese',
+   'sus' => 'Susu',
+   'swa' => 'Swahili',
+   'ssw' => 'Swati',
+   'swe' => 'Swedish',
+   'gsw' => 'Swiss German; Alemanic',
+   'syr' => 'Syriac',
+   'tgl' => 'Tagalog',
+   'tah' => 'Tahitian',
+   'tai' => 'Tai (Other)',
+   'tgk' => 'Tajik',
+   'tmh' => 'Tamashek',
+   'tam' => 'Tamil',
+   'tat' => 'Tatar',
+   'tel' => 'Telugu',
+   'ter' => 'Tereno',
+   'tet' => 'Tetum',
+   'tha' => 'Thai',
+   'tib' => 'Tibetan',
+   'bod' => 'Tibetan',
+   'tig' => 'Tigre',
+   'tir' => 'Tigrinya',
+   'tem' => 'Timne',
+   'tiv' => 'Tiv',
+   'tlh' => 'tlhIngan-Hol; Klingon',
+   'tli' => 'Tlingit',
+   'tpi' => 'Tok Pisin',
+   'tkl' => 'Tokelau',
+   'tog' => 'Tonga (Nyasa)',
+   'ton' => 'Tonga (Tonga Islands)',
+   'tsi' => 'Tsimshian',
+   'tso' => 'Tsonga',
+   'tsn' => 'Tswana',
+   'tum' => 'Tumbuka',
+   'tup' => 'Tupi languages',
+   'tur' => 'Turkish',
+   'ota' => 'Turkish, Ottoman (1500-1928)',
+   'tuk' => 'Turkmen',
+   'tvl' => 'Tuvalu',
+   'tyv' => 'Tuvinian',
+   'twi' => 'Twi',
+   'udm' => 'Udmurt',
+   'uga' => 'Ugaritic',
+   'uig' => 'Uighur',
+   'ukr' => 'Ukrainian',
+   'umb' => 'Umbundu',
+   'und' => 'Undetermined',
+   'hsb' => 'Upper Sorbian',
+   'urd' => 'Urdu',
+   'uzb' => 'Uzbek',
+   'vai' => 'Vai',
+   'cat' => 'Valencian',
+   'ven' => 'Venda',
+   'vie' => 'Vietnamese',
+   'vol' => 'Volapuk',
+   'vot' => 'Votic',
+   'wak' => 'Wakashan languages',
+   'wal' => 'Walamo',
+   'wln' => 'Walloon',
+   'war' => 'Waray',
+   'was' => 'Washo',
+   'wel' => 'Welsh',
+   'cym' => 'Welsh',
+   'fry' => 'Wester Frisian',
+   'wol' => 'Wolof',
+   'xho' => 'Xhosa',
+   'sah' => 'Yakut',
+   'yao' => 'Yao',
+   'yap' => 'Yapese',
+   'yid' => 'Yiddish',
+   'yor' => 'Yoruba',
+   'ypk' => 'Yupik languages',
+   'znd' => 'Zande',
+   'zap' => 'Zapotec',
+   'zen' => 'Zenaga',
+   'zha' => 'Zhuang',
+   'zul' => 'Zulu',
+   'zun' => 'Zuni'
+}
+
+def is_valid_lang_code(value)
+  if value.include? '-'
+    lang, sublang = value.split('-', 2)
+  else
+    lang = value
+  end
+  !!ISO_LANG[lang.downcase]
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/filters/optionaltags.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/filters/optionaltags.rb
@ -0,0 +1,198 @@
+require 'html5/constants'
+require 'html5/filters/base'
+
+module HTML5
+  module Filters
+
+    class OptionalTagFilter < Base
+      def slider
+        previous1 = previous2 = nil
+        __getobj__.each do |token|
+          yield previous2, previous1, token if previous1 != nil
+          previous2 = previous1
+          previous1 = token
+        end
+        yield previous2, previous1, nil
+      end
+
+      def each
+        slider do |previous, token, nexttok|
+          type = token[:type]
+          if type == :StartTag
+            yield token unless token[:data].empty? and is_optional_start(token[:name], previous, nexttok)
+          elsif type == :EndTag
+            yield token unless is_optional_end(token[:name], nexttok)
+          else
+            yield token
+          end
+        end
+      end
+
+      def is_optional_start(tagname, previous, nexttok)
+        type = nexttok ? nexttok[:type] : nil
+        if tagname == 'html'
+          # An html element's start tag may be omitted if the first thing
+          # inside the html element is not a space character or a comment.
+          return ![:Comment, :SpaceCharacters].include?(type)
+        elsif tagname == 'head'
+          # A head element's start tag may be omitted if the first thing
+          # inside the head element is an element.
+          return type == :StartTag
+        elsif tagname == 'body'
+          # A body element's start tag may be omitted if the first thing
+          # inside the body element is not a space character or a comment,
+          # except if the first thing inside the body element is a script
+          # or style element and the node immediately preceding the body
+          # element is a head element whose end tag has been omitted.
+          if [:Comment, :SpaceCharacters].include?(type)
+            return false
+          elsif type == :StartTag
+            # XXX: we do not look at the preceding event, so we never omit
+            # the body element's start tag if it's followed by a script or
+            # a style element.
+            return !%w[script style].include?(nexttok[:name])
+          else
+            return true
+          end
+        elsif tagname == 'colgroup'
+          # A colgroup element's start tag may be omitted if the first thing
+          # inside the colgroup element is a col element, and if the element
+          # is not immediately preceeded by another colgroup element whose
+          # end tag has been omitted.
+          if type == :StartTag
+            # XXX: we do not look at the preceding event, so instead we never
+            # omit the colgroup element's end tag when it is immediately
+            # followed by another colgroup element. See is_optional_end.
+            return nexttok[:name] == "col"
+          else
+            return false
+          end
+        elsif tagname == 'tbody'
+          # A tbody element's start tag may be omitted if the first thing
+          # inside the tbody element is a tr element, and if the element is
+          # not immediately preceeded by a tbody, thead, or tfoot element
+          # whose end tag has been omitted.
+          if type == :StartTag
+            # omit the thead and tfoot elements' end tag when they are
+            # immediately followed by a tbody element. See is_optional_end.
+            if previous and previous[:type] == :EndTag && %w(tbody thead tfoot).include?(previous[:name])
+              return false
+            end
+
+            return nexttok[:name] == 'tr'
+          else
+            return false
+          end
+        end
+        return false
+      end
+
+      def is_optional_end(tagname, nexttok)
+        type = nexttok ? nexttok[:type] : nil
+        if %w[html head body].include?(tagname)
+          # An html element's end tag may be omitted if the html element
+          # is not immediately followed by a space character or a comment.
+          return ![:Comment, :SpaceCharacters].include?(type)
+        elsif %w[li optgroup option tr].include?(tagname)
+          # A li element's end tag may be omitted if the li element is
+          # immediately followed by another li element or if there is
+          # no more content in the parent element.
+          # An optgroup element's end tag may be omitted if the optgroup
+          # element is immediately followed by another optgroup element,
+          # or if there is no more content in the parent element.
+          # An option element's end tag may be omitted if the option
+          # element is immediately followed by another option element,
+          # or if there is no more content in the parent element.
+          # A tr element's end tag may be omitted if the tr element is
+          # immediately followed by another tr element, or if there is
+          # no more content in the parent element.
+          if type == :StartTag
+            return nexttok[:name] == tagname
+          else
+            return type == :EndTag || type == nil
+          end
+        elsif %w(dt dd).include?(tagname)
+          # A dt element's end tag may be omitted if the dt element is
+          # immediately followed by another dt element or a dd element.
+          # A dd element's end tag may be omitted if the dd element is
+          # immediately followed by another dd element or a dt element,
+          # or if there is no more content in the parent element.
+          if type == :StartTag
+            return %w(dt dd).include?(nexttok[:name])
+          elsif tagname == 'dd'
+            return type == :EndTag || type == nil
+          else
+            return false
+          end
+        elsif tagname == 'p'
+          # A p element's end tag may be omitted if the p element is
+          # immediately followed by an address, blockquote, dl, fieldset,
+          # form, h1, h2, h3, h4, h5, h6, hr, menu, ol, p, pre, table,
+          # or ul  element, or if there is no more content in the parent
+          # element.
+          if type == :StartTag
+            return %w(address blockquote dl fieldset form h1 h2 h3 h4 h5
+                      h6 hr menu ol p pre table ul).include?(nexttok[:name])
+          else
+            return type == :EndTag || type == nil
+          end
+        elsif tagname == 'colgroup'
+          # A colgroup element's end tag may be omitted if the colgroup
+          # element is not immediately followed by a space character or
+          # a comment.
+          if [:Comment, :SpaceCharacters].include?(type)
+            return false
+          elsif type == :StartTag
+            # XXX: we also look for an immediately following colgroup
+            # element. See is_optional_start.
+            return nexttok[:name] != 'colgroup'
+          else
+            return true
+          end
+        elsif %w(thead tbody).include? tagname
+          # A thead element's end tag may be omitted if the thead element
+          # is immediately followed by a tbody or tfoot element.
+          # A tbody element's end tag may be omitted if the tbody element
+          # is immediately followed by a tbody or tfoot element, or if
+          # there is no more content in the parent element.
+          # A tfoot element's end tag may be omitted if the tfoot element
+          # is immediately followed by a tbody element, or if there is no
+          # more content in the parent element.
+          # XXX: we never omit the end tag when the following element is
+          # a tbody. See is_optional_start.
+          if type == :StartTag
+            return %w(tbody tfoot).include?(nexttok[:name])
+          elsif tagname == 'tbody'
+            return (type == :EndTag or type == nil)
+          else
+            return false
+          end
+        elsif tagname == 'tfoot'
+          # A tfoot element's end tag may be omitted if the tfoot element
+          # is immediately followed by a tbody element, or if there is no
+          # more content in the parent element.
+          # XXX: we never omit the end tag when the following element is
+          # a tbody. See is_optional_start.
+          if type == :StartTag
+            return nexttok[:name] == 'tbody'
+          else
+            return type == :EndTag || type == nil
+          end
+        elsif %w(td th).include? tagname
+          # A td element's end tag may be omitted if the td element is
+          # immediately followed by a td or th element, or if there is
+          # no more content in the parent element.
+          # A th element's end tag may be omitted if the th element is
+          # immediately followed by a td or th element, or if there is
+          # no more content in the parent element.
+          if type == :StartTag
+            return %w(td th).include?(nexttok[:name])
+          else
+            return type == :EndTag || type == nil
+          end
+        end
+        return false
+      end
+    end
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/filters/rfc2046.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/filters/rfc2046.rb
@ -0,0 +1,30 @@
+# adapted from feedvalidator, original copyright license is
+#
+# Copyright (c) 2002-2006, Sam Ruby, Mark Pilgrim, Joseph Walton, and Phil Ringnalda
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+
+# mime_re = Regexp.new('[^\s()<>,;:\\"/[\]?=]+/[^\s()<>,;:\\"/[\]?=]+(\s*;\s*[^\s()<>,;:\\"/[\]?=]+=("(\\"|[^"])*"|[^\s()<>,;:\\"/[\]?=]+))*$')
+
+def is_valid_mime_type(value)
+  # !!mime_re.match(value)
+  true
+end
+
--- a/attic/vendor/plugins/HTML5lib/lib/html5/filters/rfc3987.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/filters/rfc3987.rb
@ -0,0 +1,89 @@
+# adapted from feedvalidator, original copyright license is
+#
+# Copyright (c) 2002-2006, Sam Ruby, Mark Pilgrim, Joseph Walton, and Phil Ringnalda
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+iana_schemes = [ # http://www.iana.org/assignments/uri-schemes.html
+  "ftp", "http", "gopher", "mailto", "news", "nntp", "telnet", "wais",
+  "file", "prospero", "z39.50s", "z39.50r", "cid", "mid", "vemmi",
+  "service", "imap", "nfs", "acap", "rtsp", "tip", "pop", "data", "dav",
+  "opaquelocktoken", "sip", "sips", "tel", "fax", "modem", "ldap",
+  "https", "soap.beep", "soap.beeps", "xmlrpc.beep", "xmlrpc.beeps",
+  "urn", "go", "h323", "ipp", "tftp", "mupdate", "pres", "im", "mtqp",
+  "iris.beep", "dict", "snmp", "crid", "tag", "dns", "info"
+]
+ALLOWED_SCHEMES = iana_schemes + ['javascript']
+
+RFC2396      = Regexp.new("^([a-zA-Z][0-9a-zA-Z+\\-\\.]*:)?/{0,2}[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%,#]*$", Regexp::MULTILINE)
+rfc2396_full = Regexp.new("[a-zA-Z][0-9a-zA-Z+\\-\\.]*:(//)?[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%,#]+$")
+URN = Regexp.new("^[Uu][Rr][Nn]:[a-zA-Z0-9][a-zA-Z0-9-]{1,31}:([a-zA-Z0-9()+,\.:=@;$_!*'\-]|%[0-9A-Fa-f]{2})+$")
+TAG = Regexp.new("^tag:([a-z0-9\\-\._]+?@)?[a-z0-9\.\-]+?,\d{4}(-\d{2}(-\d{2})?)?:[0-9a-zA-Z;/\?:@&=+$\.\-_!~*'\(\)%,]*(#[0-9a-zA-Z;/\?:@&=+$\.\-_!~*'\(\)%,]*)?$")
+
+def is_valid_uri(value, uri_pattern = RFC2396)
+  scheme = value.split(':').first
+  scheme.downcase! if scheme
+  if scheme == 'tag'
+    if !TAG.match(value)
+      return false, "invalid-tag-uri"
+    end
+  elsif scheme == "urn"
+    if !URN.match(value)
+      return false, "invalid-urn"
+    end
+  elsif uri_pattern.match(value).to_a.reject{|i| i == ''}.compact.length == 0 || uri_pattern.match(value)[0] != value
+    urichars = Regexp.new("^[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%,#]$", Regexp::MULTILINE)
+    if value.length > 0
+      value.each_byte do |b|
+        if b < 128 and !urichars.match([b].pack('c*'))
+          return false, "invalid-uri-char"
+        end
+      end
+    else
+      begin
+        if uri_pattern.match(value.encode('idna'))
+          return false, "uri-not-iri"
+        end
+      rescue
+      end
+      return false, "invalid-uri"
+    end
+  elsif ['http','ftp'].include?(scheme)
+    if !value.match(%r{^\w+://[^/].*})
+      return false, "invalid-http-or-ftp-uri"
+    end
+  elsif value.index(':') && scheme.match(/^[a-z]+$/) && !ALLOWED_SCHEMES.include?(scheme)
+    return false, "invalid-scheme"
+  end
+  return true, ""
+end
+
+def is_valid_iri(value)
+  begin
+    if value.length > 0
+      value = value.encode('idna')
+    end
+  rescue
+  end
+  is_valid_uri(value)
+end
+
+def is_valid_fully_qualified_uri(value)
+  is_valid_uri(value, rfc2396_full)
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/filters/sanitizer.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/filters/sanitizer.rb
@ -0,0 +1,15 @@
+require 'html5/filters/base'
+require 'html5/sanitizer'
+
+module HTML5
+  module Filters
+    class HTMLSanitizeFilter < Base
+      include HTMLSanitizeModule
+      def each
+        __getobj__.each do |token|
+          yield(sanitize_token(token))
+        end
+      end
+    end
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/filters/validator.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/filters/validator.rb
@ -0,0 +1,830 @@
+# HTML 5 conformance checker
+# 
+# Warning: this module is experimental, incomplete, and subject to removal at any time.
+# 
+# Usage:
+# >>> from html5lib.html5parser import HTMLParser
+# >>> from html5lib.filters.validator import HTMLConformanceChecker
+# >>> p = HTMLParser(tokenizer=HTMLConformanceChecker)
+# >>> p.parse('<!doctype html>\n<html foo=bar></html>')
+# <<class 'html5lib.treebuilders.simpletree.Document'> nil>
+# >>> p.errors
+# [((2, 14), 'unknown-attribute', {'attributeName' => u'foo', 'tagName' => u'html'})]
+
+require 'html5/constants'
+require 'html5/filters/base'
+require 'html5/filters/iso639codes'
+require 'html5/filters/rfc3987'
+require 'html5/filters/rfc2046'
+
+def _(str); str; end
+
+class String
+  # lifted from rails
+  def underscore()
+     self.gsub(/::/, '/').
+       gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
+       gsub(/([a-z\d])([A-Z])/,'\1_\2').
+       tr("-", "_").
+       downcase
+   end
+end
+
+HTML5::E.update({
+  "unknown-start-tag" =>
+    _("Unknown start tag <%(tagName)>."),
+  "unknown-attribute" =>
+    _("Unknown '%(attributeName)' attribute on <%(tagName)>."),
+  "missing-required-attribute" =>
+    _("The '%(attributeName)' attribute is required on <%(tagName)>."),
+  "unknown-input-type" =>
+    _("Illegal value for attribute on <input type='%(inputType)'>."),
+  "attribute-not-allowed-on-this-input-type" =>
+    _("The '%(attributeName)' attribute is not allowed on <input type=%(inputType)>."),
+  "deprecated-attribute" =>
+    _("This attribute is deprecated: '%(attributeName)' attribute on <%(tagName)>."),
+  "duplicate-value-in-token-list" =>
+    _("Duplicate value in token list: '%(attributeValue)' in '%(attributeName)' attribute on <%(tagName)>."),
+  "invalid-attribute-value" =>
+    _("Invalid attribute value: '%(attributeName)' attribute on <%(tagName)>."),
+  "space-in-id" =>
+    _("Whitespace is not allowed here: '%(attributeName)' attribute on <%(tagName)>."),
+  "duplicate-id" =>
+    _("This ID was already defined earlier: 'id' attribute on <%(tagName)>."),
+  "attribute-value-can-not-be-blank" =>
+    _("This value can not be blank: '%(attributeName)' attribute on <%(tagName)>."),
+  "id-does-not-exist" =>
+    _("This value refers to a non-existent ID: '%(attributeName)' attribute on <%(tagName)>."),
+  "invalid-enumerated-value" =>
+    _("Value must be one of %(enumeratedValues): '%(attributeName)' attribute on <%tagName)>."),
+  "invalid-boolean-value" =>
+    _("Value must be one of %(enumeratedValues): '%(attributeName)' attribute on <%tagName)>."),
+  "contextmenu-must-point-to-menu" =>
+    _("The contextmenu attribute must point to an ID defined on a <menu> element."),
+  "invalid-lang-code" =>
+    _("Invalid language code: '%(attributeName)' attibute on <%(tagName)>."),
+  "invalid-integer-value" =>
+    _("Value must be an integer: '%(attributeName)' attribute on <%tagName)>."),
+  "invalid-root-namespace" =>
+    _("Root namespace must be 'http://www.w3.org/1999/xhtml', or omitted."),
+  "invalid-browsing-context" =>
+    _("Value must be one of ('_self', '_parent', '_top'), or a name that does not start with '_' => '%(attributeName)' attribute on <%(tagName)>."),
+  "invalid-tag-uri" =>
+    _("Invalid URI: '%(attributeName)' attribute on <%(tagName)>."),
+  "invalid-urn" =>
+    _("Invalid URN: '%(attributeName)' attribute on <%(tagName)>."),
+  "invalid-uri-char" =>
+    _("Illegal character in URI: '%(attributeName)' attribute on <%(tagName)>."),
+  "uri-not-iri" =>
+    _("Expected a URI but found an IRI: '%(attributeName)' attribute on <%(tagName)>."),
+  "invalid-uri" =>
+    _("Invalid URI: '%(attributeName)' attribute on <%(tagName)>."),
+  "invalid-http-or-ftp-uri" =>
+    _("Invalid URI: '%(attributeName)' attribute on <%(tagName)>."),
+  "invalid-scheme" =>
+    _("Unregistered URI scheme: '%(attributeName)' attribute on <%(tagName)>."),
+  "invalid-rel" =>
+    _("Invalid link relation: '%(attributeName)' attribute on <%(tagName)>."),
+  "invalid-mime-type" =>
+    _("Invalid MIME type: '%(attributeName)' attribute on <%(tagName)>."),
+})
+
+
+class HTMLConformanceChecker < HTML5::Filters::Base
+
+  @@global_attributes = %w[class contenteditable contextmenu dir
+    draggable id irrelevant lang ref tabindex template
+    title onabort onbeforeunload onblur onchange onclick
+    oncontextmenu ondblclick ondrag ondragend ondragenter
+    ondragleave ondragover ondragstart ondrop onerror
+    onfocus onkeydown onkeypress onkeyup onload onmessage
+    onmousedown onmousemove onmouseout onmouseover onmouseup
+    onmousewheel onresize onscroll onselect onsubmit onunload]
+  # XXX lang in HTML only, xml:lang in XHTML only
+  # XXX validate ref, template
+
+  @@allowed_attribute_map = {
+    'html'         => %w[xmlns],
+    'head'         => [],
+    'title'        => [],
+    'base'         => %w[href target],
+    'link'         => %w[href rel media hreflang type],
+    'meta'         => %w[name http-equiv content charset], # XXX charset in HTML only
+    'style'        => %w[media type scoped],
+    'body'         => [],
+    'section'      => [],
+    'nav'          => [],
+    'article'      => [],
+    'blockquote'   => %w[cite],
+    'aside'        => [],
+    'h1'           => [],
+    'h2'           => [],
+    'h3'           => [],
+    'h4'           => [],
+    'h5'           => [],
+    'h6'           => [],
+    'header'       => [],
+    'footer'       => [],
+    'address'      => [],
+    'p'            => [],
+    'hr'           => [],
+    'br'           => [],
+    'dialog'       => [],
+    'pre'          => [],
+    'ol'           => %w[start],
+    'ul'           => [],
+    'li'           => %w[value], # XXX depends on parent
+    'dl'           => [],
+    'dt'           => [],
+    'dd'           => [],
+    'a'            => %w[href target ping rel media hreflang type],
+    'q'            => %w[cite],
+    'cite'         => [],
+    'em'           => [],
+    'strong'       => [],
+    'small'        => [],
+    'm'            => [],
+    'dfn'          => [],
+    'abbr'         => [],
+    'time'         => %w[datetime],
+    'meter'        => %w[value min low high max optimum],
+    'progress'     => %w[value max],
+    'code'         => [],
+    'var'          => [],
+    'samp'         => [],
+    'kbd'          => [],
+    'sup'          => [],
+    'sub'          => [],
+    'span'         => [],
+    'i'            => [],
+    'b'            => [],
+    'bdo'          => [],
+    'ins'          => %w[cite datetime],
+    'del'          => %w[cite datetime],
+    'figure'       => [],
+    'img'          => %w[alt src usemap ismap height width], # XXX ismap depends on parent
+    'iframe'       => %w[src],
+    # <embed> handled separately
+    'object'       => %w[data type usemap height width],
+    'param'        => %w[name value],
+    'video'        => %w[src autoplay start loopstart loopend end loopcount controls],
+    'audio'        => %w[src autoplay start loopstart loopend end loopcount controls],
+    'source'       => %w[src type media],
+    'canvas'       => %w[height width],
+    'map'          => [],
+    'area'         => %w[alt coords shape href target ping rel media hreflang type],
+    'table'        => [],
+    'caption'      => [],
+    'colgroup'     => %w[span], # XXX only if element contains no <col> elements
+    'col'          => %w[span],
+    'tbody'        => [],
+    'thead'        => [],
+    'tfoot'        => [],
+    'tr'           => [],
+    'td'           => %w[colspan rowspan],
+    'th'           => %w[colspan rowspan scope],
+    # all possible <input> attributes are listed here but <input> is really handled separately
+    'input'        => %w[accept accesskey action alt autocomplete autofocus checked
+                         disabled enctype form inputmode list maxlength method min 
+                         max name pattern step readonly replace required size src
+                         tabindex target template value
+    ],
+    'form'         => %w[action method enctype accept name onsubmit onreset accept-charset
+                         data replace
+    ],
+    'button'       => %w[action enctype method replace template name value type disabled form autofocus], # XXX may need matrix of acceptable attributes based on value of type attribute (like input)
+    'select'       => %w[name size multiple disabled data accesskey form autofocus],
+    'optgroup'     => %w[disabled label],
+    'option'       => %w[selected disabled label value],
+    'textarea'     => %w[maxlength name rows cols disabled readonly required form autofocus wrap accept],
+    'label'        => %w[for accesskey form],
+    'fieldset'     => %w[disabled form],
+    'output'       => %w[form name for onforminput onformchange],
+    'datalist'     => %w[data],
+     # XXX repetition model for repeating form controls
+    'script'       => %w[src defer async type],
+    'noscript'     => [],
+    'noembed'      => [],
+    'event-source' => %w[src],
+    'details'      => %w[open],
+    'datagrid'     => %w[multiple disabled],
+    'command'      => %w[type label icon hidden disabled checked radiogroup default],
+    'menu'         => %w[type label autosubmit],
+    'datatemplate' => [],
+    'rule'         => [],
+    'nest'         => [],
+    'legend'       => [],
+    'div'          => [],
+    'font'         => %w[style]
+  }
+
+  @@required_attribute_map = {
+    'link'   => %w[href rel],
+    'bdo'    => %w[dir],
+    'img'    => %w[src],
+    'embed'  => %w[src],
+    'object' => [], # XXX one of 'data' or 'type' is required
+    'param'  => %w[name value],
+    'source' => %w[src],
+    'map'    => %w[id]
+  }
+
+  @@input_type_allowed_attribute_map = {
+    'text'           => %w[accesskey autocomplete autofocus disabled form inputmode list maxlength name pattern readonly required size tabindex value],
+    'password'       => %w[accesskey autocomplete autofocus disabled form inputmode maxlength name pattern readonly required size tabindex value],
+    'checkbox'       => %w[accesskey autofocus checked disabled form name required tabindex value],
+    'radio'          => %w[accesskey autofocus checked disabled form name required tabindex value],
+    'button'         => %w[accesskey autofocus disabled form name tabindex value],
+    'submit'         => %w[accesskey action autofocus disabled enctype form method name replace tabindex target value],
+    'reset'          => %w[accesskey autofocus disabled form name tabindex value],
+    'add'            => %w[accesskey autofocus disabled form name tabindex template value],
+    'remove'         => %w[accesskey autofocus disabled form name tabindex value],
+    'move-up'        => %w[accesskey autofocus disabled form name tabindex value],
+    'move-down'      => %w[accesskey autofocus disabled form name tabindex value],
+    'file'           => %w[accept accesskey autofocus disabled form min max name required tabindex],
+    'hidden'         => %w[disabled form name value],
+    'image'          => %w[accesskey action alt autofocus disabled enctype form method name replace src tabindex target],
+    'datetime'       => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
+    'datetime-local' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
+    'date'           => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
+    'month'          => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
+    'week'           => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
+    'time'           => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
+    'number'         => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
+    'range'          => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
+    'email'          => %w[accesskey autocomplete autofocus disabled form inputmode list maxlength name pattern readonly required tabindex value],
+    'url'            => %w[accesskey autocomplete autofocus disabled form inputmode list maxlength name pattern readonly required tabindex value],
+  }
+
+  @@input_type_deprecated_attribute_map = {
+    'text'     => ['size'],
+    'password' => ['size']
+  }
+
+  @@link_rel_values = %w[alternate archive archives author contact feed first begin start help icon index top contents toc last end license copyright next pingback prefetch prev previous search stylesheet sidebar tag up]
+  @@a_rel_values    = %w[alternate archive archives author contact feed first begin start help index top contents toc last end license copyright next prev previous search sidebar tag up bookmark external nofollow]
+
+  def initialize(stream, *args)
+    super(HTML5::HTMLTokenizer.new(stream, *args))
+    @things_that_define_an_id    = []
+    @things_that_point_to_an_id  = []
+    @ids_we_have_known_and_loved = []
+  end
+  
+  def each
+    __getobj__.each do |token|
+      method = "validate_#{token.fetch(:type, '-').to_s.underscore}_#{token.fetch(:name, '-').to_s.underscore}"
+      if respond_to?(method)
+        send(method, token){|t| yield t }
+      else
+        method = "validate_#{token.fetch(:type, '-').to_s.underscore}"
+        if respond_to?(method)
+          send(method, token) do |t|
+            yield t
+          end
+        end
+      end
+      yield token
+    end
+    eof do |t|
+      yield t
+    end
+  end
+
+  ##########################################################################
+  # Start tag validation
+  ##########################################################################
+
+  def validate_start_tag(token)
+    check_unknown_start_tag(token){|t| yield t}
+    check_start_tag_required_attributes(token) do |t|
+      yield t
+    end
+    check_start_tag_unknown_attributes(token) do |t|
+      yield t
+    end
+    check_attribute_values(token) do |t|
+      yield t
+    end
+  end
+
+  def validate_start_tag_embed(token)
+    check_start_tag_required_attributes(token) do |t|
+      yield t
+    end
+    check_attribute_values(token) do |t|
+      yield t
+    end
+    # spec says "any attributes w/o namespace"
+    # so don't call check_start_tag_unknown_attributes
+  end
+
+  def validate_start_tag_input(token)
+    check_attribute_values(token) do |t|
+      yield t
+    end
+    attr_dict = Hash[*token[:data].collect{|(name, value)| [name.downcase, value]}.flatten]
+    input_type = attr_dict.fetch('type', "text")
+    if !@@input_type_allowed_attribute_map.keys().include?(input_type)
+      yield({:type => "ParseError",
+           :data => "unknown-input-type",
+           :datavars => {:attrValue => input_type}})
+    end
+    allowed_attributes = @@input_type_allowed_attribute_map.fetch(input_type, [])
+    attr_dict.each do |attr_name, attr_value|
+      if !@@allowed_attribute_map['input'].include?(attr_name)
+        yield({:type => "ParseError",
+             :data => "unknown-attribute",
+             :datavars => {"tagName" => "input",
+                  "attributeName" => attr_name}})
+      elsif !allowed_attributes.include?(attr_name)
+        yield({:type => "ParseError",
+             :data => "attribute-not-allowed-on-this-input-type",
+             :datavars => {"attributeName" => attr_name,
+                  "inputType" => input_type}})
+      end
+      if @@input_type_deprecated_attribute_map.fetch(input_type, []).include?(attr_name)
+        yield({:type => "ParseError",
+             :data => "deprecated-attribute",
+             :datavars => {"attributeName" => attr_name,
+                  "inputType" => input_type}})
+      end
+    end
+  end
+
+  ##########################################################################
+  # Start tag validation helpers
+  ##########################################################################
+
+  def check_unknown_start_tag(token)
+    # check for recognized tag name
+    name = (token[:name] || "").downcase
+    if !@@allowed_attribute_map.keys.include?(name)
+      yield({:type => "ParseError",
+             :data => "unknown-start-tag",
+             :datavars => {"tagName" => name}})
+    end
+  end
+
+  def check_start_tag_required_attributes(token)
+    # check for presence of required attributes
+    name = (token[:name] || "").downcase
+    if @@required_attribute_map.keys().include?(name)
+      attrs_present = (token[:data] || []).collect{|t| t[0]}
+      for attr_name in @@required_attribute_map[name]
+        if !attrs_present.include?(attr_name)
+          yield( {:type => "ParseError",
+               :data => "missing-required-attribute",
+               :datavars => {"tagName" => name,
+                    "attributeName" => attr_name}})
+        end
+      end
+    end
+  end
+
+  def check_start_tag_unknown_attributes(token)
+    # check for recognized attribute names
+    name = token[:name].downcase
+    allowed_attributes = @@global_attributes | @@allowed_attribute_map.fetch(name, [])
+    for attr_name, attr_value in token.fetch(:data, [])
+      if !allowed_attributes.include?(attr_name.downcase())
+        yield( {:type => "ParseError",
+             :data => "unknown-attribute",
+             :datavars => {"tagName" => name,
+                  "attributeName" => attr_name}})
+      end
+    end
+  end
+
+  ##########################################################################
+  # Attribute validation helpers
+  ##########################################################################
+
+#  def checkURI(token, tag_name, attr_name, attr_value)
+#    is_valid, error_code = rfc3987.is_valid_uri(attr_value)
+#    if not is_valid
+#      yield {:type => "ParseError",
+#           :data => error_code,
+#           :datavars => {"tagName" => tag_name,
+#                "attributeName" => attr_name}}
+#      yield {:type => "ParseError",
+#           :data => "invalid-attribute-value",
+#           :datavars => {"tagName" => tag_name,
+#                "attributeName" => attr_name}}
+
+  def check_iri(token, tag_name, attr_name, attr_value)
+    is_valid, error_code = is_valid_iri(attr_value)
+    if !is_valid
+      yield({:type => "ParseError",
+             :data => error_code,
+             :datavars => {"tagName" => tag_name,
+                "attributeName" => attr_name}})
+      yield({:type => "ParseError",
+             :data => "invalid-attribute-value",
+             :datavars => {"tagName" => tag_name,
+                "attributeName" => attr_name}})
+    end
+  end
+
+  def check_id(token, tag_name, attr_name, attr_value)
+    if !attr_value || attr_value.length == 0
+      yield({:type => "ParseError",
+              :data => "attribute-value-can-not-be-blank",
+              :datavars => {"tagName" => tag_name,
+                "attributeName" => attr_name}})
+    end
+    attr_value.each_byte do |b|
+      c = [b].pack('c*')
+      if HTML5::SPACE_CHARACTERS.include?(c)
+        yield( {:type => "ParseError",
+             :data => "space-in-id",
+             :datavars => {"tagName" => tag_name,
+                  "attributeName" => attr_name}})
+        yield( {:type => "ParseError",
+             :data => "invalid-attribute-value",
+             :datavars => {"tagName" => tag_name,
+                  "attributeName" => attr_name}})
+        break
+      end
+    end
+  end
+
+  def parse_token_list(value)
+    valueList = []
+    currentValue = ''
+    (value + ' ').each_byte do |b|
+      c = [b].pack('c*')
+      if HTML5::SPACE_CHARACTERS.include?(c)
+        if currentValue.length > 0
+          valueList << currentValue
+          currentValue = ''
+        end
+      else
+        currentValue += c
+      end
+    end
+    if currentValue.length > 0
+      valueList << currentValue
+    end
+    valueList
+  end
+
+  def check_token_list(tag_name, attr_name, attr_value)
+    # The "token" in the method name refers to tokens in an attribute value
+    # i.e. http://www.whatwg.org/specs/web-apps/current-work/#set-of
+    # but the "token" parameter refers to the token generated from
+    # HTMLTokenizer.  Sorry for the confusion.
+    value_list = parse_token_list(attr_value)
+    value_dict = {}
+    for current_value in value_list
+      if value_dict.has_key?(current_value)
+        yield({:type => "ParseError",
+             :data => "duplicate-value-in-token-list",
+             :datavars => {"tagName" => tag_name,
+                  "attributeName" => attr_name,
+                  "attributeValue" => current_value}})
+        break
+      end
+      value_dict[current_value] = 1
+    end
+  end
+
+  def check_enumerated_value(token, tag_name, attr_name, attr_value, enumerated_values)
+    if !attr_value || attr_value.length == 0
+      yield( {:type => "ParseError",
+           :data => "attribute-value-can-not-be-blank",
+           :datavars => {"tagName" => tag_name,
+                "attributeName" => attr_name}})
+      return
+    end
+    attr_value.downcase!
+    if !enumerated_values.include?(attr_value)
+      yield( {:type => "ParseError",
+           :data => "invalid-enumerated-value",
+           :datavars => {"tagName" => tag_name,
+                "attribute_name" => attr_name,
+                "enumeratedValues" => enumerated_values}})
+      yield( {:type => "ParseError",
+           :data => "invalid-attribute-value",
+           :datavars => {"tagName" => tag_name,
+                "attributeName" => attr_name}})
+    end
+  end
+
+  def check_boolean(token, tag_name, attr_name, attr_value)
+    enumerated_values = [attr_name, '']
+    if !enumerated_values.include?(attr_value)
+      yield( {:type => "ParseError",
+           :data => "invalid-boolean-value",
+           :datavars => {"tagName" => tag_name,
+                "attributeName" => attr_name,
+                "enumeratedValues" => enumerated_values}})
+      yield( {:type => "ParseError",
+           :data => "invalid-attribute-value",
+           :datavars => {"tagName" => tag_name,
+                "attributeName" => attr_name}})
+    end
+  end
+
+  def check_integer(token, tag_name, attr_name, attr_value)
+    sign = 1
+    number_string = ''
+    state = 'begin' # ('begin', 'initial-number', 'number', 'trailing-junk')
+    error = {:type => "ParseError",
+         :data => "invalid-integer-value",
+         :datavars => {"tagName" => tag_name,
+                "attributeName" => attr_name,
+                "attributeValue" => attr_value}}
+    attr_value.scan(/./) do |c|
+      if state == 'begin'
+        if HTML5::SPACE_CHARACTERS.include?(c)
+          next
+        elsif c == '-'
+          sign  = -1
+          state = 'initial-number'
+        elsif HTML5::DIGITS.include?(c)
+          number_string += c
+          state = 'in-number'
+        else
+          yield error
+          return
+        end
+      elsif state == 'initial-number'
+        if !HTML5::DIGITS.include?(c)
+          yield error
+          return
+        end
+        number_string += c
+        state = 'in-number'
+      elsif state == 'in-number'
+        if HTML5::DIGITS.include?(c)
+          number_string += c
+        else
+          state = 'trailing-junk'
+        end
+      elsif state == 'trailing-junk'
+        next
+      end
+    end
+    if number_string.length == 0
+      yield( {:type => "ParseError",
+           :data => "attribute-value-can-not-be-blank",
+           :datavars => {"tagName" => tag_name,
+                "attributeName" => attr_name}})
+    end
+  end
+
+  def check_floating_point_number(token, tag_name, attr_name, attr_value)
+    # XXX
+  end
+
+  def check_browsing_context(token, tag_name, attr_name, attr_value)
+    return if not attr_value
+    return if attr_value[0] != ?_
+    attr_value.downcase!
+    return if ['_self', '_parent', '_top', '_blank'].include?(attr_value)
+    yield({:type => "ParseError",
+         :data => "invalid-browsing-context",
+         :datavars => {"tagName" => tag_name,
+              "attributeName" => attr_name}})
+  end
+
+  def check_lang_code(token, tag_name, attr_name, attr_value)
+    return if !attr_value || attr_value == '' # blank is OK
+    if not is_valid_lang_code(attr_value)
+      yield( {:type => "ParseError",
+           :data => "invalid-lang-code",
+           :datavars => {"tagName" => tag_name,
+                "attributeName" => attr_name,
+                "attributeValue" => attr_value}})
+    end
+  end
+  
+  def check_mime_type(token, tag_name, attr_name, attr_value)
+    # XXX needs tests
+    if not attr_value
+      yield( {:type => "ParseError",
+           :data => "attribute-value-can-not-be-blank",
+           :datavars => {"tagName" => tag_name,
+                "attributeName" => attr_name}})
+    end
+    if not is_valid_mime_type(attr_value)
+      yield( {:type => "ParseError",
+           :data => "invalid-mime-type",
+           :datavars => {"tagName" => tag_name,
+                "attributeName" => attr_name,
+                "attributeValue" => attr_value}})
+    end
+  end
+
+  def check_media_query(token, tag_name, attr_name, attr_value)
+    # XXX
+  end
+
+  def check_link_relation(token, tag_name, attr_name, attr_value)
+    check_token_list(tag_name, attr_name, attr_value) do |t|
+      yield t
+    end
+    value_list = parse_token_list(attr_value)
+    allowed_values = tag_name == 'link' ? @@link_rel_values : @@a_rel_values
+    for current_value in value_list
+      if !allowed_values.include?(current_value)
+        yield({:type => "ParseError",
+             :data => "invalid-rel",
+             :datavars => {"tagName" => tag_name,
+                  "attributeName" => attr_name}})
+      end
+    end
+  end
+
+  def check_date_time(token, tag_name, attr_name, attr_value)
+    # XXX
+    state = 'begin' # ('begin', '...
+#    for c in attr_value
+#      if state == 'begin' =>
+#        if SPACE_CHARACTERS.include?(c)
+#          continue
+#        elsif digits.include?(c)
+#          state = ...
+  end
+
+  ##########################################################################
+  # Attribute validation
+  ##########################################################################
+
+  def check_attribute_values(token)
+    tag_name = token.fetch(:name, "")
+    for attr_name, attr_value in token.fetch(:data, [])
+      attr_name = attr_name.downcase
+      method = "validate_attribute_value_#{tag_name.to_s.underscore}_#{attr_name.to_s.underscore}"
+      if respond_to?(method)
+        send(method, token, tag_name, attr_name, attr_value) do |t|
+          yield t
+        end
+      else
+        method = "validate_attribute_value_#{attr_name.to_s.underscore}"
+        if respond_to?(method)
+          send(method, token, tag_name, attr_name, attr_value) do |t|
+            yield t
+          end
+        end
+      end
+    end
+  end
+
+  def validate_attribute_value_class(token, tag_name, attr_name, attr_value)
+    check_token_list(tag_name, attr_name, attr_value) do |t|
+      yield t
+      yield( {:type => "ParseError",
+           :data => "invalid-attribute-value",
+           :datavars => {"tagName" => tag_name,
+                "attributeName" => attr_name}})
+    end
+  end
+
+  def validate_attribute_value_contenteditable(token, tag_name, attr_name, attr_value)
+    check_enumerated_value(token, tag_name, attr_name, attr_value, ['true', 'false', '']) do |t|
+      yield t
+    end
+  end
+
+  def validate_attribute_value_dir(token, tag_name, attr_name, attr_value)
+    check_enumerated_value(token, tag_name, attr_name, attr_value, ['ltr', 'rtl']) do |t|
+      yield t
+    end
+  end
+
+  def validate_attribute_value_draggable(token, tag_name, attr_name, attr_value)
+    check_enumerated_value(token, tag_name, attr_name, attr_value, ['true', 'false']) do |t|
+      yield t
+    end
+  end
+
+  alias validate_attribute_value_irrelevant check_boolean
+  alias validate_attribute_value_lang       check_lang_code
+
+  def validate_attribute_value_contextmenu(token, tag_name, attr_name, attr_value)
+    check_id(token, tag_name, attr_name, attr_value) do |t|
+      yield t
+    end
+    @things_that_point_to_an_id << token
+  end
+
+  def validate_attribute_value_id(token, tag_name, attr_name, attr_value)
+    # This method has side effects.  It adds 'token' to the list of
+    # things that define an ID (@things_that_define_an_id) so that we can
+    # later check 1) whether an ID is duplicated, and 2) whether all the
+    # things that point to something else by ID (like <label for> or
+    # <span contextmenu>) point to an ID that actually exists somewhere.
+    check_id(token, tag_name, attr_name, attr_value) do |t|
+      yield t
+    end
+    return if not attr_value
+    if @ids_we_have_known_and_loved.include?(attr_value)
+      yield( {:type => "ParseError",
+           :data => "duplicate-id",
+           :datavars => {"tagName" => tag_name}})
+    end
+    @ids_we_have_known_and_loved << attr_value
+    @things_that_define_an_id << token
+  end
+
+  alias validate_attribute_value_tabindex check_integer
+
+  def validate_attribute_value_ref(token, tag_name, attr_name, attr_value)
+    # XXX
+  end
+
+  def validate_attribute_value_template(token, tag_name, attr_name, attr_value)
+    # XXX
+  end
+
+  def validate_attribute_value_html_xmlns(token, tag_name, attr_name, attr_value)
+    if attr_value != "http://www.w3.org/1999/xhtml"
+      yield( {:type => "ParseError",
+           :data => "invalid-root-namespace",
+           :datavars => {"tagName" => tag_name,
+                "attributeName" => attr_name}})
+    end
+  end
+
+  alias validate_attribute_value_base_href       check_iri
+  alias validate_attribute_value_base_target     check_browsing_context
+  alias validate_attribute_value_link_href       check_iri
+  alias validate_attribute_value_link_rel        check_link_relation
+  alias validate_attribute_value_link_media      check_media_query
+  alias validate_attribute_value_link_hreflang   check_lang_code
+  alias validate_attribute_value_link_type       check_mime_type
+  # XXX <meta> attributes
+  alias validate_attribute_value_style_media     check_media_query
+  alias validate_attribute_value_style_type      check_mime_type
+  alias validate_attribute_value_style_scoped    check_boolean
+  alias validate_attribute_value_blockquote_cite check_iri
+  alias validate_attribute_value_ol_start        check_integer
+  alias validate_attribute_value_li_value        check_integer
+  # XXX need tests from here on
+  alias validate_attribute_value_a_href          check_iri
+  alias validate_attribute_value_a_target        check_browsing_context
+
+  def validate_attribute_value_a_ping(token, tag_name, attr_name, attr_value)
+    value_list = parse_token_list(attr_value)
+    for current_value in value_list
+      checkIRI(token, tag_name, attr_name, attr_value) do |t|
+        yield t
+      end
+    end
+  end
+
+  alias validate_attribute_value_a_rel           check_link_relation
+  alias validate_attribute_value_a_media         check_media_query
+  alias validate_attribute_value_a_hreflang      check_lang_code
+  alias validate_attribute_value_a_type          check_mime_type
+  alias validate_attribute_value_q_cite          check_iri
+  alias validate_attribute_value_time_datetime   check_date_time
+  alias validate_attribute_value_meter_value     check_floating_point_number
+  alias validate_attribute_value_meter_min       check_floating_point_number
+  alias validate_attribute_value_meter_low       check_floating_point_number
+  alias validate_attribute_value_meter_high      check_floating_point_number
+  alias validate_attribute_value_meter_max       check_floating_point_number
+  alias validate_attribute_value_meter_optimum   check_floating_point_number
+  alias validate_attribute_value_progress_value  check_floating_point_number
+  alias validate_attribute_value_progress_max    check_floating_point_number
+  alias validate_attribute_value_ins_cite        check_iri
+  alias validate_attribute_value_ins_datetime    check_date_time
+  alias validate_attribute_value_del_cite        check_iri
+  alias validate_attribute_value_del_datetime    check_date_time
+
+  ##########################################################################
+  # Whole document validation (IDs, etc.)
+  ##########################################################################
+
+  def eof
+    for token in @things_that_point_to_an_id
+      tag_name = token.fetch(:name, "").downcase
+      attrs_dict = token[:data] # by now html5parser has "normalized" the attrs list into a dict.
+                    # hooray for obscure side effects!
+      attr_value = attrs_dict.fetch("contextmenu", "")
+      if attr_value and (!@ids_we_have_known_and_loved.include?(attr_value))
+        yield( {:type => "ParseError",
+             :data => "id-does-not-exist",
+             :datavars => {"tagName" => tag_name,
+                  "attributeName" => "contextmenu",
+                  "attributeValue" => attr_value}})
+      else
+        for ref_token in @things_that_define_an_id
+          id = ref_token.fetch(:data, {}).fetch("id", "")
+          if not id
+            continue
+          end
+          if id == attr_value
+            if ref_token.fetch(:name, "").downcase != "men"
+              yield( {:type => "ParseError",
+                   :data => "contextmenu-must-point-to-menu"})
+            end
+            break
+          end
+        end
+      end
+    end
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/filters/whitespace.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/filters/whitespace.rb
@ -0,0 +1,36 @@
+require 'html5/constants'
+require 'html5/filters/base'
+
+module HTML5
+  module Filters
+    class WhitespaceFilter < Base
+
+      SPACE_PRESERVE_ELEMENTS = %w[pre textarea] + RCDATA_ELEMENTS
+      SPACES = /[#{SPACE_CHARACTERS.join('')}]+/m
+
+      def each
+        preserve = 0
+        __getobj__.each do |token|
+          case token[:type]
+          when :StartTag
+            if preserve > 0 or SPACE_PRESERVE_ELEMENTS.include?(token[:name])
+              preserve += 1
+            end
+
+          when :EndTag
+            preserve -= 1 if preserve > 0
+
+          when :SpaceCharacters
+            token[:data] = " " if preserve == 0 && token[:data]
+
+          when :Characters
+            token[:data] = token[:data].sub(SPACES,' ') if preserve == 0
+          end
+
+          yield token
+        end
+      end
+    end
+  end
+end
+
--- a/attic/vendor/plugins/HTML5lib/lib/html5/html5parser.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/html5parser.rb
@ -0,0 +1,248 @@
+require 'html5/constants'
+require 'html5/tokenizer'
+require 'html5/treebuilders/rexml'
+
+Dir.glob(File.join(File.dirname(__FILE__), 'html5parser', '*_phase.rb')).each do |path|
+  require 'html5/html5parser/' + File.basename(path)
+end
+
+module HTML5
+
+  # Error in parsed document
+  class ParseError < Exception; end
+  class AssertionError < Exception; end
+
+  # HTML parser. Generates a tree structure from a stream of (possibly malformed) HTML
+  #
+  class HTMLParser
+
+    attr_accessor :phase, :first_start_tag, :inner_html, :last_phase, :insert_from_table
+
+    attr_reader :phases, :tokenizer, :tree, :errors
+
+    def self.parse(stream, options = {})
+      encoding = options.delete(:encoding)
+      new(options).parse(stream,encoding)
+    end
+
+    def self.parse_fragment(stream, options = {})
+      container = options.delete(:container) || 'div'
+      encoding = options.delete(:encoding)
+      new(options).parse_fragment(stream, container, encoding)
+    end
+
+    @@phases = %w( initial rootElement beforeHead inHead afterHead inBody inTable inCaption
+      inColumnGroup inTableBody inRow inCell inSelect afterBody inFrameset afterFrameset trailingEnd )
+
+    # :strict - raise an exception when a parse error is encountered
+    # :tree - a treebuilder class controlling the type of tree that will be
+    # returned. Built in treebuilders can be accessed through
+    # HTML5::TreeBuilders[treeType]
+    def initialize(options = {})
+      @strict = false
+      @errors = []
+     
+      @tokenizer =  HTMLTokenizer
+      @tree = TreeBuilders::REXML::TreeBuilder
+
+      options.each {|name, value| instance_variable_set("@#{name}", value) }
+      @lowercase_attr_name    = nil unless instance_variable_defined?("@lowercase_attr_name")
+      @lowercase_element_name = nil unless instance_variable_defined?("@lowercase_element_name")
+
+      @tree = @tree.new
+
+      @phases = @@phases.inject({}) do |phases, phase_name|
+        phase_class_name = phase_name.sub(/(.)/) { $1.upcase } + 'Phase'
+        phases[phase_name.to_sym] = HTML5.const_get(phase_class_name).new(self, @tree)
+        phases
+      end
+    end
+
+    def _parse(stream, inner_html, encoding, container = 'div')
+      @tree.reset
+      @first_start_tag = false
+      @errors = []
+
+      @tokenizer = @tokenizer.class unless Class === @tokenizer
+      @tokenizer = @tokenizer.new(stream, :encoding => encoding,
+        :parseMeta => !inner_html, :lowercase_attr_name => @lowercase_attr_name, :lowercase_element_name => @lowercase_element_name)
+
+      if inner_html
+        case @inner_html = container.downcase
+        when 'title', 'textarea'
+          @tokenizer.content_model_flag = :RCDATA
+        when 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript'
+          @tokenizer.content_model_flag = :CDATA
+        when 'plaintext'
+          @tokenizer.content_model_flag = :PLAINTEXT
+        else
+          # content_model_flag already is PCDATA
+          @tokenizer.content_model_flag = :PCDATA
+        end
+      
+        @phase = @phases[:rootElement]
+        @phase.insert_html_element
+        reset_insertion_mode
+      else
+        @inner_html = false
+        @phase = @phases[:initial]
+      end
+
+      # We only seem to have InBodyPhase testcases where the following is
+      # relevant ... need others too
+      @last_phase = nil
+
+      # XXX This is temporary for the moment so there isn't any other
+      # changes needed for the parser to work with the iterable tokenizer
+      @tokenizer.each do |token|
+        token = normalize_token(token)
+
+        method = 'process%s' % token[:type]
+
+        case token[:type]
+        when :Characters, :SpaceCharacters, :Comment
+          @phase.send method, token[:data]
+        when :StartTag
+          @phase.send method, token[:name], token[:data]
+        when :EndTag
+          @phase.send method, token[:name]
+        when :Doctype
+          @phase.send method, token[:name], token[:publicId],
+            token[:systemId], token[:correct]
+        else
+          parse_error(token[:data], token[:datavars])
+        end
+      end
+
+      # When the loop finishes it's EOF
+      @phase.process_eof
+    end
+
+    # Parse a HTML document into a well-formed tree
+    #
+    # stream - a filelike object or string containing the HTML to be parsed
+    #
+    # The optional encoding parameter must be a string that indicates
+    # the encoding.  If specified, that encoding will be used,
+    # regardless of any BOM or later declaration (such as in a meta
+    # element)
+    def parse(stream, encoding=nil)
+      _parse(stream, false, encoding)
+      @tree.get_document
+    end
+
+    # Parse a HTML fragment into a well-formed tree fragment
+
+    # container - name of the element we're setting the inner_html property
+    # if set to nil, default to 'div'
+    #
+    # stream - a filelike object or string containing the HTML to be parsed
+    #
+    # The optional encoding parameter must be a string that indicates
+    # the encoding.  If specified, that encoding will be used,
+    # regardless of any BOM or later declaration (such as in a meta
+    # element)
+    def parse_fragment(stream, container='div', encoding=nil)
+      _parse(stream, true, encoding, container)
+      @tree.get_fragment
+    end
+
+    def parse_error(code = 'XXX-undefined-error', data = {})
+      # XXX The idea is to make data mandatory.
+      @errors.push([@tokenizer.stream.position, code, data])
+      raise ParseError if @strict
+    end
+
+    # HTML5 specific normalizations to the token stream
+    def normalize_token(token)
+
+      if token[:type] == :EmptyTag
+        # When a solidus (/) is encountered within a tag name what happens
+        # depends on whether the current tag name matches that of a void
+        # element.  If it matches a void element atheists did the wrong
+        # thing and if it doesn't it's wrong for everyone.
+
+        unless VOID_ELEMENTS.include?(token[:name])
+          parse_error("incorrectly-placed-solidus")
+        end
+
+        token[:type] = :StartTag
+      end
+
+      if token[:type] == :StartTag
+        token[:name] = token[:name].downcase
+
+        # We need to remove the duplicate attributes and convert attributes
+        # to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
+
+        unless token[:data].empty?
+          data = token[:data].reverse.map {|attr, value| [attr.downcase, value] }
+          token[:data] = Hash[*data.flatten]
+        end
+
+      elsif token[:type] == :EndTag
+        parse_error("attributes-in-end-tag") unless token[:data].empty?
+        token[:name] = token[:name].downcase
+      end
+
+      token
+    end
+
+    @@new_modes = {
+      'select'   => :inSelect,
+      'td'       => :inCell,
+      'th'       => :inCell,
+      'tr'       => :inRow,
+      'tbody'    => :inTableBody,
+      'thead'    => :inTableBody,
+      'tfoot'    => :inTableBody,
+      'caption'  => :inCaption,
+      'colgroup' => :inColumnGroup,
+      'table'    => :inTable,
+      'head'     => :inBody,
+      'body'     => :inBody,
+      'frameset' => :inFrameset
+    }
+
+    def reset_insertion_mode
+      # The name of this method is mostly historical. (It's also used in the
+      # specification.)
+      last = false
+
+      @tree.open_elements.reverse.each do |node|
+        node_name = node.name
+
+        if node == @tree.open_elements.first
+          last = true
+          unless ['td', 'th'].include?(node_name)
+            # XXX
+            # assert @inner_html
+            node_name = @inner_html
+          end
+        end
+
+        # Check for conditions that should only happen in the inner_html
+        # case
+        if ['select', 'colgroup', 'head', 'frameset'].include?(node_name)
+          # XXX
+          # assert @inner_html
+        end
+
+        if @@new_modes.has_key?(node_name)
+          @phase = @phases[@@new_modes[node_name]]
+        elsif node_name == 'html'
+          @phase = @phases[@tree.head_pointer.nil?? :beforeHead : :afterHead]
+        elsif last
+          @phase = @phases[:inBody]
+        else
+          next
+        end
+
+        break
+      end
+    end
+
+    def _(string); string; end
+  end
+
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/after_body_phase.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/after_body_phase.rb
@ -0,0 +1,46 @@
+require 'html5/html5parser/phase'
+
+module HTML5
+  class AfterBodyPhase < Phase
+
+    handle_end 'html'
+
+    def processComment(data)
+      # This is needed because data is to be appended to the <html> element
+      # here and not to whatever is currently open.
+      @tree.insert_comment(data, @tree.open_elements.first)
+    end
+
+    def processCharacters(data)
+      parse_error("unexpected-char-after-body")
+      @parser.phase = @parser.phases[:inBody]
+      @parser.phase.processCharacters(data)
+    end
+
+    def processStartTag(name, attributes)
+      parse_error("unexpected-start-tag-after-body", {"name" => name})
+      @parser.phase = @parser.phases[:inBody]
+      @parser.phase.processStartTag(name, attributes)
+    end
+
+    def endTagHtml(name)
+      if @parser.inner_html
+        parse_error "end-html-in-innerhtml"
+      else
+        # XXX: This may need to be done, not sure
+        # Don't set last_phase to the current phase but to the inBody phase
+        # instead. No need for extra parse errors if there's something after </html>.
+        # Try "<!doctype html>X</html>X" for instance.
+        @parser.last_phase = @parser.phase
+        @parser.phase      = @parser.phases[:trailingEnd]
+      end
+    end
+
+    def endTagOther(name)
+      parse_error("unexpected-end-tag-after-body", {"name" => name})
+      @parser.phase = @parser.phases[:inBody]
+      @parser.phase.processEndTag(name)
+    end
+
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/after_frameset_phase.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/after_frameset_phase.rb
@ -0,0 +1,33 @@
+require 'html5/html5parser/phase'
+
+module HTML5
+  class AfterFramesetPhase < Phase
+
+    # http://www.whatwg.org/specs/web-apps/current-work/#after3
+
+    handle_start 'html', 'noframes'
+
+    handle_end 'html'
+
+    def processCharacters(data)
+      parse_error("unexpected-char-after-frameset")
+    end
+
+    def startTagNoframes(name, attributes)
+      @parser.phases[:inBody].processStartTag(name, attributes)
+    end
+
+    def startTagOther(name, attributes)
+      parse_error("unexpected-start-tag-after-frameset", {"name" => name})
+    end
+
+    def endTagHtml(name)
+      @parser.last_phase = @parser.phase
+      @parser.phase      = @parser.phases[:trailingEnd]
+    end
+
+    def endTagOther(name)
+      parse_error("unexpected-end-tag-after-frameset", {"name" => name})
+    end
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/after_head_phase.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/after_head_phase.rb
@ -0,0 +1,50 @@
+require 'html5/html5parser/phase'
+
+module HTML5
+  class AfterHeadPhase < Phase
+
+    handle_start 'html', 'body', 'frameset', %w( base link meta script style title ) => 'FromHead'
+
+    def process_eof
+      anything_else
+      @parser.phase.process_eof
+    end
+
+    def processCharacters(data)
+      anything_else
+      @parser.phase.processCharacters(data)
+    end
+
+    def startTagBody(name, attributes)
+      @tree.insert_element(name, attributes)
+      @parser.phase = @parser.phases[:inBody]
+    end
+
+    def startTagFrameset(name, attributes)
+      @tree.insert_element(name, attributes)
+      @parser.phase = @parser.phases[:inFrameset]
+    end
+
+    def startTagFromHead(name, attributes)
+      parse_error("unexpected-start-tag-out-of-my-head", {"name" => name})
+      @parser.phase = @parser.phases[:inHead]
+      @parser.phase.processStartTag(name, attributes)
+    end
+
+    def startTagOther(name, attributes)
+      anything_else
+      @parser.phase.processStartTag(name, attributes)
+    end
+
+    def processEndTag(name)
+      anything_else
+      @parser.phase.processEndTag(name)
+    end
+
+    def anything_else
+      @tree.insert_element('body', {})
+      @parser.phase = @parser.phases[:inBody]
+    end
+
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/before_head_phase.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/before_head_phase.rb
@ -0,0 +1,41 @@
+require 'html5/html5parser/phase'
+
+module HTML5
+  class BeforeHeadPhase < Phase
+
+    handle_start 'html', 'head'
+
+    handle_end %w( html head body br p ) => 'ImplyHead'
+
+    def process_eof
+      startTagHead('head', {})
+      @parser.phase.process_eof
+    end
+
+    def processCharacters(data)
+      startTagHead('head', {})
+      @parser.phase.processCharacters(data)
+    end
+
+    def startTagHead(name, attributes)
+      @tree.insert_element(name, attributes)
+      @tree.head_pointer = @tree.open_elements[-1]
+      @parser.phase = @parser.phases[:inHead]
+    end
+
+    def startTagOther(name, attributes)
+      startTagHead('head', {})
+      @parser.phase.processStartTag(name, attributes)
+    end
+
+    def endTagImplyHead(name)
+      startTagHead('head', {})
+      @parser.phase.processEndTag(name)
+    end
+
+    def endTagOther(name)
+      parse_error("end-tag-after-implied-root", {"name" => name})
+    end
+
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_body_phase.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_body_phase.rb
@ -0,0 +1,609 @@
+require 'html5/html5parser/phase'
+
+module HTML5
+  class InBodyPhase < Phase
+
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-body
+
+    handle_start 'html'
+    handle_start %w(base link meta script style) => 'ProcessInHead'
+    handle_start 'title'
+
+    handle_start 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image'
+
+    handle_start 'input', 'textarea', 'select', 'isindex', %w(marquee object)
+
+    handle_start %w(li dd dt) => 'ListItem'
+
+    handle_start %w(address blockquote center dir div dl fieldset listing menu ol p pre ul) => 'CloseP'
+
+    handle_start %w(b big em font i s small strike strong tt u) => 'Formatting'
+    handle_start 'nobr'
+
+    handle_start %w(area basefont bgsound br embed img param spacer wbr) => 'VoidFormatting'
+
+    handle_start %w(iframe noembed noframes noscript) => 'Cdata', HEADING_ELEMENTS => 'Heading'
+
+    handle_start %w(caption col colgroup frame frameset head option optgroup tbody td tfoot th thead tr) => 'Misplaced'
+
+    handle_start %w(event-source section nav article aside header footer datagrid command) => 'New'
+
+    handle_end 'p', 'body', 'html', 'form', %w(button marquee object), %w(dd dt li) => 'ListItem'
+
+    handle_end %w(address blockquote center div dl fieldset listing menu ol pre ul) => 'Block'
+
+    handle_end HEADING_ELEMENTS => 'Heading'
+
+    handle_end %w(a b big em font i nobr s small strike strong tt u) => 'Formatting'
+
+    handle_end %w(head frameset select optgroup option table caption colgroup col thead tfoot tbody tr td th) => 'Misplaced'
+
+    handle_end 'br'
+
+    handle_end %w(area basefont bgsound embed hr image img input isindex param spacer wbr frame) => 'None'
+
+    handle_end %w(noframes noscript noembed textarea xmp iframe ) => 'CdataTextAreaXmp'
+
+    handle_end %w(event-source section nav article aside header footer datagrid command) => 'New'
+
+    def initialize(parser, tree)
+      super(parser, tree)
+
+      # for special handling of whitespace in <pre>
+      class << self
+        alias processSpaceCharactersNonPre processSpaceCharacters
+      end
+    end
+
+    def processSpaceCharactersDropNewline(data)
+      # #Sometimes (start of <pre> blocks) we want to drop leading newlines
+
+      class << self
+        remove_method :processSpaceCharacters rescue nil
+        alias processSpaceCharacters processSpaceCharactersNonPre
+      end
+      
+      if (data.length > 0 and data[0] == ?\n && 
+        %w[pre textarea].include?(@tree.open_elements.last.name) && !@tree.open_elements.last.hasContent)
+        data = data[1..-1]
+      end
+
+      if data.length > 0
+        @tree.reconstructActiveFormattingElements
+        @tree.insertText(data)
+      end
+    end
+
+    def processSpaceCharacters(data)
+      @tree.reconstructActiveFormattingElements()
+      @tree.insertText(data)
+    end
+
+    def processCharacters(data)
+      # XXX The specification says to do this for every character at the
+      # moment, but apparently that doesn't match the real world so we don't
+      # do it for space characters.
+      @tree.reconstructActiveFormattingElements
+      @tree.insertText(data)
+    end
+
+    def startTagProcessInHead(name, attributes)
+      @parser.phases[:inHead].processStartTag(name, attributes)
+    end
+
+    def startTagTitle(name, attributes)
+      parse_error("unexpected-start-tag-out-of-my-head", {"name" => name})
+      @parser.phases[:inHead].processStartTag(name, attributes)
+    end
+
+    def startTagBody(name, attributes)
+      parse_error("unexpected-start-tag", {"name" => "body"})
+
+      if @tree.open_elements.length == 1 || @tree.open_elements[1].name != 'body'
+        assert @parser.inner_html
+      else
+        attributes.each do |attr, value|
+          unless @tree.open_elements[1].attributes.has_key?(attr)
+            @tree.open_elements[1].attributes[attr] = value
+          end
+        end
+      end
+    end
+
+    def startTagCloseP(name, attributes)
+      endTagP('p') if in_scope?('p')
+      @tree.insert_element(name, attributes)
+      if name == 'pre'
+        class << self
+          remove_method :processSpaceCharacters rescue nil
+          alias processSpaceCharacters processSpaceCharactersDropNewline
+        end
+      end
+    end
+
+    def startTagForm(name, attributes)
+      if @tree.formPointer
+        parse_error("unexpected-start-tag", {"name" => name})
+      else
+        endTagP('p') if in_scope?('p')
+        @tree.insert_element(name, attributes)
+        @tree.formPointer = @tree.open_elements.last
+      end
+    end
+
+    def startTagListItem(name, attributes)
+      endTagP('p') if in_scope?('p')
+      stopNames = {'li' => ['li'], 'dd' => ['dd', 'dt'], 'dt' => ['dd', 'dt']}
+      stopName = stopNames[name]
+
+      @tree.open_elements.reverse.each_with_index do |node, i|
+        if stopName.include?(node.name)
+          poppedNodes = (0..i).collect { @tree.open_elements.pop }
+          if i >= 1
+            parse_error(
+                i == 1 ? "missing-end-tag" : "missing-end-tags",
+                {"name" => poppedNodes[0..-1].collect{|n| n.name}.join(", ")})
+
+          end
+          break
+        end
+
+        # Phrasing elements are all non special, non scoping, non
+        # formatting elements
+        break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) && !%w[address div].include?(node.name))
+      end
+
+      # Always insert an <li> element.
+      @tree.insert_element(name, attributes)
+    end
+
+    def startTagPlaintext(name, attributes)
+      endTagP('p') if in_scope?('p')
+      @tree.insert_element(name, attributes)
+      @parser.tokenizer.content_model_flag = :PLAINTEXT
+    end
+
+    def startTagHeading(name, attributes)
+      endTagP('p') if in_scope?('p')
+
+      # Uncomment the following for IE7 behavior:
+      # HEADING_ELEMENTS.each do |element|
+      #   if in_scope?(element)
+      #     parse_error("unexpected-start-tag", {"name" => name})
+      # 
+      #     remove_open_elements_until do |element|
+      #       HEADING_ELEMENTS.include?(element.name)
+      #     end
+      #
+      #     break
+      #   end
+      # end
+      @tree.insert_element(name, attributes)
+    end
+
+    def startTagA(name, attributes)
+      if afeAElement = @tree.elementInActiveFormattingElements('a')
+        parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "a", "endName" => "a"})
+        endTagFormatting('a')
+        @tree.open_elements.delete(afeAElement) if @tree.open_elements.include?(afeAElement)
+        @tree.activeFormattingElements.delete(afeAElement) if @tree.activeFormattingElements.include?(afeAElement)
+      end
+      @tree.reconstructActiveFormattingElements
+      addFormattingElement(name, attributes)
+    end
+
+    def startTagFormatting(name, attributes)
+      @tree.reconstructActiveFormattingElements
+      addFormattingElement(name, attributes)
+    end
+
+    def startTagNobr(name, attributes)
+      @tree.reconstructActiveFormattingElements
+      if in_scope?('nobr')
+        parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "nobr", "endName" => "nobr"})
+        processEndTag('nobr')
+        # XXX Need tests that trigger the following
+        @tree.reconstructActiveFormattingElements
+      end
+      addFormattingElement(name, attributes)
+    end
+
+    def startTagButton(name, attributes)
+      if in_scope?('button')
+        parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "button", "endName" => "button"})
+        processEndTag('button')
+        @parser.phase.processStartTag(name, attributes)
+      else
+        @tree.reconstructActiveFormattingElements
+        @tree.insert_element(name, attributes)
+        @tree.activeFormattingElements.push(Marker)
+      end
+    end
+
+    def startTagMarqueeObject(name, attributes)
+      @tree.reconstructActiveFormattingElements
+      @tree.insert_element(name, attributes)
+      @tree.activeFormattingElements.push(Marker)
+    end
+
+    def startTagXmp(name, attributes)
+      @tree.reconstructActiveFormattingElements
+      @tree.insert_element(name, attributes)
+      @parser.tokenizer.content_model_flag = :CDATA
+    end
+
+    def startTagTable(name, attributes)
+      processEndTag('p') if in_scope?('p')
+      @tree.insert_element(name, attributes)
+      @parser.phase = @parser.phases[:inTable]
+    end
+
+    def startTagVoidFormatting(name, attributes)
+      @tree.reconstructActiveFormattingElements
+      @tree.insert_element(name, attributes)
+      @tree.open_elements.pop
+    end
+
+    def startTagHr(name, attributes)
+      endTagP('p') if in_scope?('p')
+      @tree.insert_element(name, attributes)
+      @tree.open_elements.pop
+    end
+
+    def startTagImage(name, attributes)
+      # No really...
+      parse_error("unexpected-start-tag-treated-as", {"originalName" => "image", "newName" => "img"})
+      processStartTag('img', attributes)
+    end
+
+    def startTagInput(name, attributes)
+      @tree.reconstructActiveFormattingElements
+      @tree.insert_element(name, attributes)
+      if @tree.formPointer
+        # XXX Not exactly sure what to do here
+        # @tree.open_elements[-1].form = @tree.formPointer
+      end
+      @tree.open_elements.pop
+    end
+
+    def startTagIsindex(name, attributes)
+      parse_error("deprecated-tag", {"name" => "isindex"})
+      return if @tree.formPointer
+      processStartTag('form', {})
+      processStartTag('hr', {})
+      processStartTag('p', {})
+      processStartTag('label', {})
+      # XXX Localization ...
+      processCharacters('This is a searchable index. Insert your search keywords here: ')
+      attributes['name'] = 'isindex'
+      attrs = attributes.to_a
+      processStartTag('input', attributes)
+      processEndTag('label')
+      processEndTag('p')
+      processStartTag('hr', {})
+      processEndTag('form')
+    end
+
+    def startTagTextarea(name, attributes)
+      # XXX Form element pointer checking here as well...
+      @tree.insert_element(name, attributes)
+      @parser.tokenizer.content_model_flag = :RCDATA
+      class << self
+        remove_method :processSpaceCharacters rescue nil
+        alias processSpaceCharacters processSpaceCharactersDropNewline
+      end
+    end
+
+    # iframe, noembed noframes, noscript(if scripting enabled)
+    def startTagCdata(name, attributes)
+      @tree.insert_element(name, attributes)
+      @parser.tokenizer.content_model_flag = :CDATA
+    end
+
+    def startTagSelect(name, attributes)
+      @tree.reconstructActiveFormattingElements
+      @tree.insert_element(name, attributes)
+      @parser.phase = @parser.phases[:inSelect]
+    end
+
+    def startTagMisplaced(name, attributes)
+      # Elements that should be children of other elements that have a
+      # different insertion mode; here they are ignored
+      # "caption", "col", "colgroup", "frame", "frameset", "head",
+      # "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
+      # "tr", "noscript"
+      parse_error("unexpected-start-tag-ignored", {"name" => name})
+    end
+
+    def startTagNew(name, attributes)
+      # New HTML5 elements, "event-source", "section", "nav",
+      # "article", "aside", "header", "footer", "datagrid", "command"
+      # $stderr.puts("Warning: Undefined behaviour for start tag #{name}")
+      startTagOther(name, attributes)
+      #raise NotImplementedError
+    end
+
+    def startTagOther(name, attributes)
+      @tree.reconstructActiveFormattingElements
+      @tree.insert_element(name, attributes)
+    end
+
+    def endTagP(name)
+      @tree.generateImpliedEndTags('p') if in_scope?('p')
+      parse_error("unexpected-end-tag", {"name" => "p"}) unless @tree.open_elements.last.name == 'p'
+      if in_scope?('p')
+        @tree.open_elements.pop while in_scope?('p')
+      else
+        startTagCloseP('p', {})
+        endTagP('p')
+      end
+    end
+
+    def endTagBody(name)
+      # XXX Need to take open <p> tags into account here. We shouldn't imply
+      # </p> but we should not throw a parse error either. Specification is
+      # likely to be updated.
+      unless @tree.open_elements[1] && @tree.open_elements[1].name == 'body'
+        # inner_html case
+        parse_error "unexpected-end-tag", {:name => 'body'}
+        return
+      end
+      unless @tree.open_elements.last.name == 'body'
+        parse_error("expected-one-end-tag-but-got-another",
+                {"expectedName" => "body",
+                 "gotName" => @tree.open_elements.last.name})
+      end
+      @parser.phase = @parser.phases[:afterBody]
+    end
+
+    def endTagHtml(name)
+      endTagBody(name)
+      @parser.phase.processEndTag(name) unless @parser.inner_html
+    end
+
+    def endTagBlock(name)
+      @tree.generateImpliedEndTags if in_scope?(name)
+
+      unless @tree.open_elements.last.name == name
+        parse_error("end-tag-too-early", {"name" => name})
+      end
+
+      if in_scope?(name)
+        remove_open_elements_until(name)
+      end
+    end
+
+    def endTagForm(name)
+      if in_scope?(name)
+        @tree.generateImpliedEndTags
+      end
+      if @tree.open_elements.last.name != name
+        parse_error("end-tag-too-early-ignored", {"name" => "form"})
+      else
+        @tree.open_elements.pop
+      end
+      @tree.formPointer = nil
+    end
+
+    def endTagListItem(name)
+      # AT Could merge this with the Block case
+      @tree.generateImpliedEndTags(name) if in_scope?(name)
+
+      unless @tree.open_elements.last.name == name
+        parse_error("end-tag-too-early", {"name" => name})
+      end
+
+      remove_open_elements_until(name) if in_scope?(name)
+    end  
+
+    def endTagHeading(name)
+      HEADING_ELEMENTS.each do |element|
+        if in_scope?(element)
+          @tree.generateImpliedEndTags
+          break
+        end
+      end
+
+      unless @tree.open_elements.last.name == name
+        parse_error("end-tag-too-early", {"name" => name})
+      end
+
+      HEADING_ELEMENTS.each do |element|
+        if in_scope?(element)
+          remove_open_elements_until {|element| HEADING_ELEMENTS.include?(element.name)}
+          break
+        end
+      end
+    end
+
+    # The much-feared adoption agency algorithm
+    def endTagFormatting(name)
+      # http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
+      # XXX Better parse_error messages appreciated.
+      while true
+        # Step 1 paragraph 1
+        afeElement = @tree.elementInActiveFormattingElements(name)
+        if !afeElement or (@tree.open_elements.include?(afeElement) && !in_scope?(afeElement.name))
+          parse_error("adoption-agency-1.1", {"name" => name})
+          return
+        # Step 1 paragraph 2
+        elsif not @tree.open_elements.include?(afeElement)
+          parse_error("adoption-agency-1.2", {"name" => name})
+          @tree.activeFormattingElements.delete(afeElement)
+          return
+        end
+
+        # Step 1 paragraph 3
+        if afeElement != @tree.open_elements.last
+          parse_error("adoption-agency-1.3", {"name" => name})
+        end
+
+        # Step 2
+        # Start of the adoption agency algorithm proper
+        afeIndex = @tree.open_elements.index(afeElement)
+        furthestBlock = nil
+        @tree.open_elements[afeIndex..-1].each do |element|
+          if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(element.name)
+            furthestBlock = element
+            break
+          end
+        end
+
+        # Step 3
+        if furthestBlock.nil?
+          element = remove_open_elements_until {|element| element == afeElement }
+          @tree.activeFormattingElements.delete(element)
+          return
+        end
+        commonAncestor = @tree.open_elements[afeIndex - 1]
+
+        # Step 5
+        furthestBlock.parent.removeChild(furthestBlock) if furthestBlock.parent
+
+        # Step 6
+        # The bookmark is supposed to help us identify where to reinsert
+        # nodes in step 12. We have to ensure that we reinsert nodes after
+        # the node before the active formatting element. Note the bookmark
+        # can move in step 7.4
+        bookmark = @tree.activeFormattingElements.index(afeElement)
+
+        # Step 7
+        lastNode = node = furthestBlock
+        while true
+          # AT replace this with a function and recursion?
+          # Node is element before node in open elements
+          node = @tree.open_elements[@tree.open_elements.index(node) - 1]
+          until @tree.activeFormattingElements.include?(node)
+            tmpNode = node
+            node = @tree.open_elements[@tree.open_elements.index(node) - 1]
+            @tree.open_elements.delete(tmpNode)
+          end
+          # Step 7.3
+          break if node == afeElement
+          # Step 7.4
+          if lastNode == furthestBlock
+            # XXX should this be index(node) or index(node)+1
+            # Anne: I think +1 is ok. Given x = [2,3,4,5]
+            # x.index(3) gives 1 and then x[1 +1] gives 4...
+            bookmark = @tree.activeFormattingElements.index(node) + 1
+          end
+          # Step 7.5
+          cite = node.parent
+          if node.hasContent
+            clone = node.cloneNode
+            # Replace node with clone
+            @tree.activeFormattingElements[@tree.activeFormattingElements.index(node)] = clone
+            @tree.open_elements[@tree.open_elements.index(node)] = clone
+            node = clone
+          end
+          # Step 7.6
+          # Remove lastNode from its parents, if any
+          lastNode.parent.removeChild(lastNode) if lastNode.parent
+          node.appendChild(lastNode)
+          # Step 7.7
+          lastNode = node
+          # End of inner loop
+        end
+
+        # Step 8
+        lastNode.parent.removeChild(lastNode) if lastNode.parent
+        commonAncestor.appendChild(lastNode)
+
+        # Step 9
+        clone = afeElement.cloneNode
+
+        # Step 10
+        furthestBlock.reparentChildren(clone)
+
+        # Step 11
+        furthestBlock.appendChild(clone)
+
+        # Step 12
+        @tree.activeFormattingElements.delete(afeElement)
+        @tree.activeFormattingElements.insert([bookmark,@tree.activeFormattingElements.length].min, clone)
+
+        # Step 13
+        @tree.open_elements.delete(afeElement)
+        @tree.open_elements.insert(@tree.open_elements.index(furthestBlock) + 1, clone)
+      end
+    end
+
+    def endTagButtonMarqueeObject(name)
+      @tree.generateImpliedEndTags if in_scope?(name)
+
+      unless @tree.open_elements.last.name == name
+        parse_error("end-tag-too-early", {"name" => name})
+      end
+
+      if in_scope?(name)
+        remove_open_elements_until(name)
+
+        @tree.clearActiveFormattingElements
+      end
+    end
+
+    def endTagMisplaced(name)
+      # This handles elements with end tags in other insertion modes.
+      parse_error("unexpected-end-tag", {"name" => name})
+    end
+
+    def endTagBr(name)
+      parse_error("unexpected-end-tag-treated-as",
+            {"originalName" => "br", "newName" => "br element"})
+      @tree.reconstructActiveFormattingElements
+      @tree.insert_element(name, {})
+      @tree.open_elements.pop()
+    end
+
+    def endTagNone(name)
+      # This handles elements with no end tag.
+      parse_error("no-end-tag", {"name" => name})
+    end
+
+    def endTagCdataTextAreaXmp(name)
+      if @tree.open_elements.last.name == name
+        @tree.open_elements.pop
+      else
+        parse_error("unexpected-end-tag", {"name" => name})
+      end
+    end
+
+    def endTagNew(name)
+      # New HTML5 elements, "event-source", "section", "nav",
+      # "article", "aside", "header", "footer", "datagrid", "command"
+      # STDERR.puts "Warning: Undefined behaviour for end tag #{name}"
+      endTagOther(name)
+      #raise NotImplementedError
+    end
+
+    def endTagOther(name)
+      # XXX This logic should be moved into the treebuilder
+      @tree.open_elements.reverse.each do |node|
+        if node.name == name
+          @tree.generateImpliedEndTags
+
+          unless @tree.open_elements.last.name == name
+            parse_error("unexpected-end-tag", {"name" => name})
+          end
+
+          remove_open_elements_until {|element| element == node }
+
+          break
+        else
+          if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name)
+            parse_error("unexpected-end-tag", {"name" => name})
+            break
+          end
+        end
+      end
+    end
+
+    protected
+
+    def addFormattingElement(name, attributes)
+      @tree.insert_element(name, attributes)
+      @tree.activeFormattingElements.push(@tree.open_elements.last)
+    end
+
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_caption_phase.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_caption_phase.rb
@ -0,0 +1,69 @@
+require 'html5/html5parser/phase'
+
+module HTML5
+  class InCaptionPhase < Phase
+
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-caption
+
+    handle_start 'html', %w(caption col colgroup tbody td tfoot th thead tr) => 'TableElement'
+
+    handle_end 'caption', 'table', %w(body col colgroup html tbody td tfoot th thead tr) => 'Ignore'
+
+    def ignoreEndTagCaption
+      !in_scope?('caption', true)
+    end
+
+    def processCharacters(data)
+      @parser.phases[:inBody].processCharacters(data)
+    end
+
+    def startTagTableElement(name, attributes)
+      parse_error "unexpected-end-tag", {"name" => name}
+      #XXX Have to duplicate logic here to find out if the tag is ignored
+      ignoreEndTag = ignoreEndTagCaption
+      @parser.phase.processEndTag('caption')
+      @parser.phase.processStartTag(name, attributes) unless ignoreEndTag
+    end
+
+    def startTagOther(name, attributes)
+      @parser.phases[:inBody].processStartTag(name, attributes)
+    end
+
+    def endTagCaption(name)
+      if ignoreEndTagCaption
+        # inner_html case
+        assert @parser.inner_html
+        parse_error "unexpected-end-tag", {"name" => name}
+      else
+        # AT this code is quite similar to endTagTable in "InTable"
+        @tree.generateImpliedEndTags
+
+        unless @tree.open_elements[-1].name == 'caption'
+          parse_error("expected-one-end-tag-but-got-another",
+                    {"gotName" => "caption",
+                     "expectedName" => @tree.open_elements.last.name})
+        end
+
+        remove_open_elements_until('caption')
+
+        @tree.clearActiveFormattingElements
+        @parser.phase = @parser.phases[:inTable]
+      end
+    end
+
+    def endTagTable(name)
+      parse_error "unexpected-end-table-in-caption"
+      ignoreEndTag = ignoreEndTagCaption
+      @parser.phase.processEndTag('caption')
+      @parser.phase.processEndTag(name) unless ignoreEndTag
+    end
+
+    def endTagIgnore(name)
+      parse_error("unexpected-end-tag", {"name" => name})
+    end
+
+    def endTagOther(name)
+      @parser.phases[:inBody].processEndTag(name)
+    end
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_cell_phase.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_cell_phase.rb
@ -0,0 +1,78 @@
+require 'html5/html5parser/phase'
+
+module HTML5
+  class InCellPhase < Phase
+
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-cell
+
+    handle_start 'html', %w( caption col colgroup tbody td tfoot th thead tr ) => 'TableOther'
+
+    handle_end %w( td th ) => 'TableCell', %w( body caption col colgroup html ) => 'Ignore'
+
+    handle_end %w( table tbody tfoot thead tr ) => 'Imply'
+
+    def processCharacters(data)
+      @parser.phases[:inBody].processCharacters(data)
+    end
+
+    def startTagTableOther(name, attributes)
+      if in_scope?('td', true) or in_scope?('th', true)
+        closeCell
+        @parser.phase.processStartTag(name, attributes)
+      else
+        # inner_html case
+        parse_error
+      end
+    end
+
+    def startTagOther(name, attributes)
+      @parser.phases[:inBody].processStartTag(name, attributes)
+    end
+
+    def endTagTableCell(name)
+      if in_scope?(name, true)
+        @tree.generateImpliedEndTags(name)
+        if @tree.open_elements.last.name != name
+          parse_error("unexpected-cell-end-tag", {"name" => name})
+
+          remove_open_elements_until(name)
+        else
+          @tree.open_elements.pop
+        end
+        @tree.clearActiveFormattingElements
+        @parser.phase = @parser.phases[:inRow]
+      else
+        parse_error("unexpected-end-tag", {"name" => name})
+      end
+    end
+
+    def endTagIgnore(name)
+      parse_error("unexpected-end-tag", {"name" => name})
+    end
+
+    def endTagImply(name)
+      if in_scope?(name, true)
+        closeCell
+        @parser.phase.processEndTag(name)
+      else
+        # sometimes inner_html case
+        parse_error "unexpected-end-tag", {:name => name}
+      end
+    end
+
+    def endTagOther(name)
+      @parser.phases[:inBody].processEndTag(name)
+    end
+
+    protected
+
+    def closeCell
+      if in_scope?('td', true)
+        endTagTableCell('td')
+      elsif in_scope?('th', true)
+        endTagTableCell('th')
+      end
+    end
+
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_column_group_phase.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_column_group_phase.rb
@ -0,0 +1,55 @@
+require 'html5/html5parser/phase'
+
+module HTML5
+  class InColumnGroupPhase < Phase
+
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-column
+
+    handle_start 'html', 'col'
+
+    handle_end 'colgroup', 'col'
+
+    def ignoreEndTagColgroup
+      @tree.open_elements[-1].name == 'html'
+    end
+
+    def processCharacters(data)
+      ignoreEndTag = ignoreEndTagColgroup
+      endTagColgroup("colgroup")
+      @parser.phase.processCharacters(data) unless ignoreEndTag
+    end
+
+    def startTagCol(name, attributes)
+      @tree.insert_element(name, attributes)
+      @tree.open_elements.pop
+    end
+
+    def startTagOther(name, attributes)
+      ignoreEndTag = ignoreEndTagColgroup
+      endTagColgroup('colgroup')
+      @parser.phase.processStartTag(name, attributes) unless ignoreEndTag
+    end
+
+    def endTagColgroup(name)
+      if ignoreEndTagColgroup
+        # inner_html case
+        assert @parser.inner_html
+        parse_error "unexpected-end-tag", {:name => name}
+      else
+        @tree.open_elements.pop
+        @parser.phase = @parser.phases[:inTable]
+      end
+    end
+
+    def endTagCol(name)
+      parse_error("no-end-tag", {"name" => "col"})
+    end
+
+    def endTagOther(name)
+      ignoreEndTag = ignoreEndTagColgroup
+      endTagColgroup('colgroup')
+      @parser.phase.processEndTag(name) unless ignoreEndTag
+    end
+
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_frameset_phase.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_frameset_phase.rb
@ -0,0 +1,56 @@
+require 'html5/html5parser/phase'
+
+module HTML5
+  class InFramesetPhase < Phase
+
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
+
+    handle_start 'html', 'frameset', 'frame', 'noframes'
+
+    handle_end 'frameset', 'noframes'
+
+    def processCharacters(data)
+      parse_error("unexpected-char-in-frameset")
+    end
+
+    def startTagFrameset(name, attributes)
+      @tree.insert_element(name, attributes)
+    end
+
+    def startTagFrame(name, attributes)
+      @tree.insert_element(name, attributes)
+      @tree.open_elements.pop
+    end
+
+    def startTagNoframes(name, attributes)
+      @parser.phases[:inBody].processStartTag(name, attributes)
+    end
+
+    def startTagOther(name, attributes)
+      parse_error("unexpected-start-tag-in-frameset", {"name" => name})
+    end
+
+    def endTagFrameset(name)
+      if @tree.open_elements.last.name == 'html'
+        # inner_html case
+        parse_error("unexpected-frameset-in-frameset-innerhtml")
+      else
+        @tree.open_elements.pop
+      end
+      if (not @parser.inner_html and
+        @tree.open_elements.last.name != 'frameset')
+        # If we're not in inner_html mode and the the current node is not a
+        # "frameset" element (anymore) then switch.
+        @parser.phase = @parser.phases[:afterFrameset]
+      end
+    end
+
+    def endTagNoframes(name)
+      @parser.phases[:inBody].processEndTag(name)
+    end
+
+    def endTagOther(name)
+      parse_error("unexpected-end-tag-in-frameset", {"name" => name})
+    end
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_head_phase.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_head_phase.rb
@ -0,0 +1,138 @@
+require 'html5/html5parser/phase'
+
+module HTML5
+  class InHeadPhase < Phase
+
+    handle_start 'html', 'head', 'title', 'style', 'script', 'noscript'
+    handle_start %w( base link meta )
+
+    handle_end 'head'
+    handle_end %w( html body br p ) => 'ImplyAfterHead'
+    handle_end %w( title style script noscript )
+
+    def process_eof
+      if ['title', 'style', 'script'].include?(name = @tree.open_elements.last.name)
+        parse_error("expected-named-closing-tag-but-got-eof", {"name" => @tree.open_elements.last.name})
+        @tree.open_elements.pop
+      end
+      anything_else
+      @parser.phase.process_eof
+    end
+
+    def processCharacters(data)
+      if %w[title style script noscript].include?(@tree.open_elements.last.name)
+        @tree.insertText(data)
+      else
+        anything_else
+        @parser.phase.processCharacters(data)
+      end
+    end
+
+    def startTagHead(name, attributes)
+      parse_error("two-heads-are-not-better-than-one")
+    end
+
+    def startTagTitle(name, attributes)
+      element = @tree.createElement(name, attributes)
+      appendToHead(element)
+      @tree.open_elements.push(element)
+      @parser.tokenizer.content_model_flag = :RCDATA
+    end
+
+    def startTagStyle(name, attributes)
+      element = @tree.createElement(name, attributes)
+      if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
+        appendToHead(element)
+      else
+        @tree.open_elements.last.appendChild(element)
+      end
+      @tree.open_elements.push(element)
+      @parser.tokenizer.content_model_flag = :CDATA
+    end
+
+    def startTagNoscript(name, attributes)
+      # XXX Need to decide whether to implement the scripting disabled case.
+      element = @tree.createElement(name, attributes)
+      if @tree.head_pointer !=nil and @parser.phase == @parser.phases[:inHead]
+        appendToHead(element)
+      else
+        @tree.open_elements.last.appendChild(element)
+      end
+      @tree.open_elements.push(element)
+      @parser.tokenizer.content_model_flag = :CDATA
+    end
+
+    def startTagScript(name, attributes)
+      #XXX Inner HTML case may be wrong
+      element = @tree.createElement(name, attributes)
+      element._flags.push("parser-inserted")
+      if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
+        appendToHead(element)
+      else
+        @tree.open_elements.last.appendChild(element)
+      end
+      @tree.open_elements.push(element)
+      @parser.tokenizer.content_model_flag = :CDATA
+    end
+
+    def startTagBaseLinkMeta(name, attributes)
+      element = @tree.createElement(name, attributes)
+      if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
+        appendToHead(element)
+      else
+        @tree.open_elements.last.appendChild(element)
+      end
+    end
+
+    def startTagOther(name, attributes)
+      anything_else
+      @parser.phase.processStartTag(name, attributes)
+    end
+
+    def endTagHead(name)
+      if @tree.open_elements.last.name == 'head'
+        @tree.open_elements.pop
+      else
+        parse_error("unexpected-end-tag", {"name" => "head"})
+      end
+      @parser.phase = @parser.phases[:afterHead]
+    end
+
+    def endTagImplyAfterHead(name)
+      anything_else
+      @parser.phase.processEndTag(name)
+    end
+
+    def endTagTitleStyleScriptNoscript(name)
+      if @tree.open_elements.last.name == name
+        @tree.open_elements.pop
+      else
+        parse_error("unexpected-end-tag", {"name" => name})
+      end
+    end
+
+    def endTagOther(name)
+      parse_error("unexpected-end-tag", {"name" => name})
+    end
+
+    def anything_else
+      if @tree.open_elements.last.name == 'head'
+        endTagHead('head')
+      else
+        @parser.phase = @parser.phases[:afterHead]
+      end
+    end
+
+    protected
+
+    def appendToHead(element)
+      if @tree.head_pointer.nil?
+        assert @parser.inner_html
+        @tree.open_elements.last.appendChild(element)
+      else
+        @tree.head_pointer.appendChild(element)
+      end
+    end
+
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_row_phase.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_row_phase.rb
@ -0,0 +1,88 @@
+require 'html5/html5parser/phase'
+
+module HTML5
+  class InRowPhase < Phase
+
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-row
+
+    handle_start 'html', %w( td th ) => 'TableCell', %w( caption col colgroup tbody tfoot thead tr ) => 'TableOther'
+
+    handle_end 'tr', 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th ) => 'Ignore'
+
+    def processCharacters(data)
+      @parser.phases[:inTable].processCharacters(data)
+    end
+
+    def startTagTableCell(name, attributes)
+      clearStackToTableRowContext
+      @tree.insert_element(name, attributes)
+      @parser.phase = @parser.phases[:inCell]
+      @tree.activeFormattingElements.push(Marker)
+    end
+
+    def startTagTableOther(name, attributes)
+      ignoreEndTag = ignoreEndTagTr
+      endTagTr('tr')
+      # XXX how are we sure it's always ignored in the inner_html case?
+      @parser.phase.processStartTag(name, attributes) unless ignoreEndTag
+    end
+
+    def startTagOther(name, attributes)
+      @parser.phases[:inTable].processStartTag(name, attributes)
+    end
+
+    def endTagTr(name)
+      if ignoreEndTagTr
+        # inner_html case
+        assert @parser.inner_html
+        parse_error "unexpected-end-tag", {:name => name}
+      else
+        clearStackToTableRowContext
+        @tree.open_elements.pop
+        @parser.phase = @parser.phases[:inTableBody]
+      end
+    end
+
+    def endTagTable(name)
+      ignoreEndTag = ignoreEndTagTr
+      endTagTr('tr')
+      # Reprocess the current tag if the tr end tag was not ignored
+      # XXX how are we sure it's always ignored in the inner_html case?
+      @parser.phase.processEndTag(name) unless ignoreEndTag
+    end
+
+    def endTagTableRowGroup(name)
+      if in_scope?(name, true)
+        endTagTr('tr')
+        @parser.phase.processEndTag(name)
+      else
+        # inner_html case
+        parse_error "unexpected-end-tag", {:name => name}
+      end
+    end
+
+    def endTagIgnore(name)
+      parse_error("unexpected-end-tag-in-table-row",
+              {"name" => name})
+    end
+
+    def endTagOther(name)
+      @parser.phases[:inTable].processEndTag(name)
+    end
+
+    protected
+
+    # XXX unify this with other table helper methods
+    def clearStackToTableRowContext
+      until %w[tr html].include?(name = @tree.open_elements.last.name)
+        parse_error("unexpected-implied-end-tag-in-table-row", {"name" => @tree.open_elements.last.name})
+        @tree.open_elements.pop
+      end
+    end
+
+    def ignoreEndTagTr
+      not in_scope?('tr', :tableVariant => true)
+    end
+
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_select_phase.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_select_phase.rb
@ -0,0 +1,85 @@
+require 'html5/html5parser/phase'
+
+module HTML5
+  class InSelectPhase < Phase
+
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-select
+
+    handle_start 'html', 'option', 'optgroup', 'select'
+
+    handle_end 'option', 'optgroup', 'select', %w( caption table tbody tfoot thead tr td th ) => 'TableElements'
+
+    def processCharacters(data)
+      @tree.insertText(data)
+    end
+
+    def startTagOption(name, attributes)
+      # We need to imply </option> if <option> is the current node.
+      @tree.open_elements.pop if @tree.open_elements.last.name == 'option'
+      @tree.insert_element(name, attributes)
+    end
+
+    def startTagOptgroup(name, attributes)
+      @tree.open_elements.pop if @tree.open_elements.last.name == 'option'
+      @tree.open_elements.pop if @tree.open_elements.last.name == 'optgroup'
+      @tree.insert_element(name, attributes)
+    end
+
+    def startTagSelect(name, attributes)
+      parse_error("unexpected-select-in-select")
+      endTagSelect('select')
+    end
+
+    def startTagOther(name, attributes)
+      parse_error("unexpected-start-tag-in-select", {"name" => name})
+    end
+
+    def endTagOption(name)
+      if @tree.open_elements.last.name == 'option'
+        @tree.open_elements.pop
+      else
+        parse_error("unexpected-end-tag-in-select", {"name" => "option"})
+      end
+    end
+
+    def endTagOptgroup(name)
+      # </optgroup> implicitly closes <option>
+      if @tree.open_elements.last.name == 'option' and @tree.open_elements[-2].name == 'optgroup'
+        @tree.open_elements.pop
+      end
+      # It also closes </optgroup>
+      if @tree.open_elements.last.name == 'optgroup'
+        @tree.open_elements.pop
+      # But nothing else
+      else
+        parse_error("unexpected-end-tag-in-select",
+                {"name" => "optgroup"})
+      end
+    end
+
+    def endTagSelect(name)
+      if in_scope?('select', true)
+        remove_open_elements_until('select')
+
+        @parser.reset_insertion_mode
+      else
+        # inner_html case
+        parse_error
+      end
+    end
+
+    def endTagTableElements(name)
+      parse_error("unexpected-end-tag-in-select", {"name" => name})
+
+      if in_scope?(name, true)
+        endTagSelect('select')
+        @parser.phase.processEndTag(name)
+      end
+    end
+
+    def endTagOther(name)
+      parse_error("unexpected-end-tag-in-select", {"name" => name})
+    end
+
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_table_body_phase.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_table_body_phase.rb
@ -0,0 +1,84 @@
+require 'html5/html5parser/phase'
+
+module HTML5
+  class InTableBodyPhase < Phase
+
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-table0
+
+    handle_start 'html', 'tr', %w( td th ) => 'TableCell', %w( caption col colgroup tbody tfoot thead ) => 'TableOther'
+
+    handle_end 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th tr ) => 'Ignore'
+
+    def processCharacters(data)
+      @parser.phases[:inTable].processCharacters(data)
+    end
+
+    def startTagTr(name, attributes)
+      clearStackToTableBodyContext
+      @tree.insert_element(name, attributes)
+      @parser.phase = @parser.phases[:inRow]
+    end
+
+    def startTagTableCell(name, attributes)
+      parse_error("unexpected-cell-in-table-body", {"name" => name})
+      startTagTr('tr', {})
+      @parser.phase.processStartTag(name, attributes)
+    end
+
+    def startTagTableOther(name, attributes)
+      # XXX AT Any ideas on how to share this with endTagTable?
+      if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
+        clearStackToTableBodyContext
+        endTagTableRowGroup(@tree.open_elements.last.name)
+        @parser.phase.processStartTag(name, attributes)
+      else
+        # inner_html case
+        parse_error "unexpected-start-tag", {:name => name}
+      end
+    end
+
+    def startTagOther(name, attributes)
+      @parser.phases[:inTable].processStartTag(name, attributes)
+    end
+
+    def endTagTableRowGroup(name)
+      if in_scope?(name, true)
+        clearStackToTableBodyContext
+        @tree.open_elements.pop
+        @parser.phase = @parser.phases[:inTable]
+      else
+        parse_error("unexpected-end-tag-in-table-body", {"name" => name})
+      end
+    end
+
+    def endTagTable(name)
+      if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
+        clearStackToTableBodyContext
+        endTagTableRowGroup(@tree.open_elements.last.name)
+        @parser.phase.processEndTag(name)
+      else
+        # inner_html case
+        parse_error "unexpected-end-tag", {:name => name}
+      end
+    end
+
+    def endTagIgnore(name)
+      parse_error("unexpected-end-tag-in-table-body", {"name" => name})
+    end
+
+    def endTagOther(name)
+      @parser.phases[:inTable].processEndTag(name)
+    end
+
+    protected
+
+    def clearStackToTableBodyContext
+      until %w[tbody tfoot thead html].include?(name = @tree.open_elements.last.name)
+        parse_error("unexpected-implied-end-tag-in-table",
+                {"name" => @tree.open_elements.last.name})
+        @tree.open_elements.pop
+      end
+    end
+
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_table_phase.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_table_phase.rb
@ -0,0 +1,115 @@
+require 'html5/html5parser/phase'
+
+module HTML5
+  class InTablePhase < Phase
+
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-table
+
+    handle_start 'html', 'caption', 'colgroup', 'col', 'table'
+
+    handle_start %w( tbody tfoot thead ) => 'RowGroup', %w( td th tr ) => 'ImplyTbody'
+
+    handle_end 'table', %w( body caption col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
+
+    def processCharacters(data)
+      parse_error("unexpected-char-implies-table-voodoo")
+      # Make all the special element rearranging voodoo kick in
+      @tree.insert_from_table = true
+      # Process the character in the "in body" mode
+      @parser.phases[:inBody].processCharacters(data)
+      @tree.insert_from_table = false
+    end
+
+    def startTagCaption(name, attributes)
+      clearStackToTableContext
+      @tree.activeFormattingElements.push(Marker)
+      @tree.insert_element(name, attributes)
+      @parser.phase = @parser.phases[:inCaption]
+    end
+
+    def startTagColgroup(name, attributes)
+      clearStackToTableContext
+      @tree.insert_element(name, attributes)
+      @parser.phase = @parser.phases[:inColumnGroup]
+    end
+
+    def startTagCol(name, attributes)
+      startTagColgroup('colgroup', {})
+      @parser.phase.processStartTag(name, attributes)
+    end
+
+    def startTagRowGroup(name, attributes)
+      clearStackToTableContext
+      @tree.insert_element(name, attributes)
+      @parser.phase = @parser.phases[:inTableBody]
+    end
+
+    def startTagImplyTbody(name, attributes)
+      startTagRowGroup('tbody', {})
+      @parser.phase.processStartTag(name, attributes)
+    end
+
+    def startTagTable(name, attributes)
+      parse_error("unexpected-start-tag-implies-end-tag",
+            {"startName" => "table", "endName" => "table"})
+      @parser.phase.processEndTag('table')
+      @parser.phase.processStartTag(name, attributes) unless @parser.inner_html
+    end
+
+    def startTagOther(name, attributes)
+      parse_error("unexpected-start-tag-implies-table-voodoo",
+              {"name" => name})
+      # Make all the special element rearranging voodoo kick in
+      @tree.insert_from_table = true
+      # Process the start tag in the "in body" mode
+      @parser.phases[:inBody].processStartTag(name, attributes)
+      @tree.insert_from_table = false
+    end
+
+    def endTagTable(name)
+      if in_scope?('table', true)
+        @tree.generateImpliedEndTags
+
+        unless @tree.open_elements.last.name == 'table'
+          parse_error("end-tag-too-early-named",
+                    {"gotName" => "table",
+                     "expectedName" => @tree.open_elements.last.name})
+        end
+
+        remove_open_elements_until('table')
+
+        @parser.reset_insertion_mode
+      else
+        # inner_html case
+        assert @parser.inner_html
+        parse_error "unexpected-end-tag", {:name => name}
+      end
+    end
+
+    def endTagIgnore(name)
+      parse_error("unexpected-end-tag", {"name" => name})
+    end
+
+    def endTagOther(name)
+      parse_error("unexpected-end-tag-implies-table-voodoo", {"name" => name})
+      # Make all the special element rearranging voodoo kick in
+      @tree.insert_from_table = true
+      # Process the end tag in the "in body" mode
+      @parser.phases[:inBody].processEndTag(name)
+      @tree.insert_from_table = false
+    end
+
+    protected
+
+    def clearStackToTableContext
+      # "clear the stack back to a table context"
+      until %w[table html].include?(name = @tree.open_elements.last.name)
+        parse_error("unexpected-implied-end-tag-in-table",
+                {"name" =>  @tree.open_elements.last.name})
+        @tree.open_elements.pop
+      end
+      # When the current node is <html> it's an inner_html case
+    end
+
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/initial_phase.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/initial_phase.rb
@ -0,0 +1,133 @@
+require 'html5/html5parser/phase'
+
+module HTML5
+  class InitialPhase < Phase
+
+    # This phase deals with error handling as well which is currently not
+    # covered in the specification. The error handling is typically known as
+    # "quirks mode". It is expected that a future version of HTML5 will define this.
+
+    def process_eof
+      parse_error("expected-doctype-but-got-eof")
+      @parser.phase = @parser.phases[:rootElement]
+      @parser.phase.process_eof
+    end
+
+    def processComment(data)
+      @tree.insert_comment(data, @tree.document)
+    end
+
+    def processDoctype(name, publicId, systemId, correct)
+      if name.downcase != 'html' or publicId or systemId
+        parse_error("unknown-doctype")
+      end
+      # XXX need to update DOCTYPE tokens
+      @tree.insertDoctype(name, publicId, systemId)
+
+      publicId = publicId.to_s.upcase
+
+      if name.downcase != 'html'
+        # XXX quirks mode
+      else
+        if ["+//silmaril//dtd html pro v0r11 19970101//en",
+            "-//advasoft ltd//dtd html 3.0 aswedit + extensions//en",
+            "-//as//dtd html 3.0 aswedit + extensions//en",
+            "-//ietf//dtd html 2.0 level 1//en",
+            "-//ietf//dtd html 2.0 level 2//en",
+            "-//ietf//dtd html 2.0 strict level 1//en",
+            "-//ietf//dtd html 2.0 strict level 2//en",
+            "-//ietf//dtd html 2.0 strict//en",
+            "-//ietf//dtd html 2.0//en",
+            "-//ietf//dtd html 2.1e//en",
+            "-//ietf//dtd html 3.0//en",
+            "-//ietf//dtd html 3.0//en//",
+            "-//ietf//dtd html 3.2 final//en",
+            "-//ietf//dtd html 3.2//en",
+            "-//ietf//dtd html 3//en",
+            "-//ietf//dtd html level 0//en",
+            "-//ietf//dtd html level 0//en//2.0",
+            "-//ietf//dtd html level 1//en",
+            "-//ietf//dtd html level 1//en//2.0",
+            "-//ietf//dtd html level 2//en",
+            "-//ietf//dtd html level 2//en//2.0",
+            "-//ietf//dtd html level 3//en",
+            "-//ietf//dtd html level 3//en//3.0",
+            "-//ietf//dtd html strict level 0//en",
+            "-//ietf//dtd html strict level 0//en//2.0",
+            "-//ietf//dtd html strict level 1//en",
+            "-//ietf//dtd html strict level 1//en//2.0",
+            "-//ietf//dtd html strict level 2//en",
+            "-//ietf//dtd html strict level 2//en//2.0",
+            "-//ietf//dtd html strict level 3//en",
+            "-//ietf//dtd html strict level 3//en//3.0",
+            "-//ietf//dtd html strict//en",
+            "-//ietf//dtd html strict//en//2.0",
+            "-//ietf//dtd html strict//en//3.0",
+            "-//ietf//dtd html//en",
+            "-//ietf//dtd html//en//2.0",
+            "-//ietf//dtd html//en//3.0",
+            "-//metrius//dtd metrius presentational//en",
+            "-//microsoft//dtd internet explorer 2.0 html strict//en",
+            "-//microsoft//dtd internet explorer 2.0 html//en",
+            "-//microsoft//dtd internet explorer 2.0 tables//en",
+            "-//microsoft//dtd internet explorer 3.0 html strict//en",
+            "-//microsoft//dtd internet explorer 3.0 html//en",
+            "-//microsoft//dtd internet explorer 3.0 tables//en",
+            "-//netscape comm. corp.//dtd html//en",
+            "-//netscape comm. corp.//dtd strict html//en",
+            "-//o'reilly and associates//dtd html 2.0//en",
+            "-//o'reilly and associates//dtd html extended 1.0//en",
+            "-//spyglass//dtd html 2.0 extended//en",
+            "-//sq//dtd html 2.0 hotmetal + extensions//en",
+            "-//sun microsystems corp.//dtd hotjava html//en",
+            "-//sun microsystems corp.//dtd hotjava strict html//en",
+            "-//w3c//dtd html 3 1995-03-24//en",
+            "-//w3c//dtd html 3.2 draft//en",
+            "-//w3c//dtd html 3.2 final//en",
+            "-//w3c//dtd html 3.2//en",
+            "-//w3c//dtd html 3.2s draft//en",
+            "-//w3c//dtd html 4.0 frameset//en",
+            "-//w3c//dtd html 4.0 transitional//en",
+            "-//w3c//dtd html experimental 19960712//en",
+            "-//w3c//dtd html experimental 970421//en",
+            "-//w3c//dtd w3 html//en",
+            "-//w3o//dtd w3 html 3.0//en",
+            "-//w3o//dtd w3 html 3.0//en//",
+            "-//w3o//dtd w3 html strict 3.0//en//",
+            "-//webtechs//dtd mozilla html 2.0//en",
+            "-//webtechs//dtd mozilla html//en",
+            "-/w3c/dtd html 4.0 transitional/en",
+            "html"].include?(publicId) or
+          (systemId == nil and
+            ["-//w3c//dtd html 4.01 frameset//EN",
+             "-//w3c//dtd html 4.01 transitional//EN"].include?(publicId)) or
+          (systemId == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")
+            #XXX quirks mode
+          end
+      end
+
+      @parser.phase = @parser.phases[:rootElement]
+    end
+
+    def processSpaceCharacters(data)
+    end
+
+    def processCharacters(data)
+      parse_error("expected-doctype-but-got-chars")
+      @parser.phase = @parser.phases[:rootElement]
+      @parser.phase.processCharacters(data)
+    end
+
+    def processStartTag(name, attributes)
+      parse_error("expected-doctype-but-got-start-tag", {"name" => name})
+      @parser.phase = @parser.phases[:rootElement]
+      @parser.phase.processStartTag(name, attributes)
+    end
+
+    def processEndTag(name)
+      parse_error("expected-doctype-but-got-end-tag", {"name" => name})
+      @parser.phase = @parser.phases[:rootElement]
+      @parser.phase.processEndTag(name)
+    end
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/phase.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/phase.rb
@ -0,0 +1,154 @@
+module HTML5
+  # Base class for helper objects that implement each phase of processing.
+  #
+  # Handler methods should be in the following order (they can be omitted):
+  #
+  #   * EOF
+  #   * Comment
+  #   * Doctype
+  #   * SpaceCharacters
+  #   * Characters
+  #   * StartTag
+  #     - startTag* methods
+  #   * EndTag
+  #     - endTag* methods
+  #
+  class Phase
+
+    extend Forwardable
+    def_delegators :@parser, :parse_error
+
+    # The following example call:
+    #
+    #   tag_handlers('startTag', 'html', %w( base link meta ), %w( li dt dd ) => 'ListItem')
+    #
+    # ...would return a hash equal to this:
+    #
+    #   { 'html' => 'startTagHtml',
+    #     'base' => 'startTagBaseLinkMeta',
+    #     'link' => 'startTagBaseLinkMeta',
+    #     'meta' => 'startTagBaseLinkMeta',
+    #     'li'   => 'startTagListItem',
+    #     'dt'   => 'startTagListItem',
+    #     'dd'   => 'startTagListItem'  }
+    #
+    def self.tag_handlers(prefix, *tags)
+      mapping = {}
+      if tags.last.is_a?(Hash)
+        tags.pop.each do |names, handler_method_suffix|
+          handler_method = prefix + handler_method_suffix
+          Array(names).each {|name| mapping[name] = handler_method }
+        end
+      end
+      tags.each do |names|
+        names = Array(names)
+        handler_method = prefix + names.map {|name| name.capitalize }.join
+        names.each {|name| mapping[name] = handler_method }
+      end
+      mapping
+    end
+
+    def self.start_tag_handlers
+      @start_tag_handlers ||= Hash.new('startTagOther')
+    end
+
+    # Declare what start tags this Phase handles. Can be called more than once.
+    #
+    # Example usage:
+    #
+    #   handle_start 'html'
+    #   # html start tags will be handled by a method named 'startTagHtml'
+    #
+    #   handle_start %( base link meta )
+    #   # base, link and meta start tags will be handled by a method named 'startTagBaseLinkMeta'
+    #
+    #   handle_start %( li dt dd ) => 'ListItem'
+    #   # li, dt, and dd start tags will be handled by a method named 'startTagListItem'
+    #
+    def self.handle_start(*tags)
+      start_tag_handlers.update tag_handlers('startTag', *tags)
+    end
+
+    def self.end_tag_handlers
+      @end_tag_handlers ||= Hash.new('endTagOther')
+    end
+
+    # Declare what end tags this Phase handles. Behaves like handle_start.
+    #
+    def self.handle_end(*tags)
+      end_tag_handlers.update tag_handlers('endTag', *tags)
+    end
+
+    def initialize(parser, tree)
+      @parser, @tree = parser, tree
+    end
+
+    def process_eof
+      @tree.generateImpliedEndTags
+
+      if @tree.open_elements.length > 2
+        parse_error("expected-closing-tag-but-got-eof")
+      elsif @tree.open_elements.length == 2 and @tree.open_elements[1].name != 'body'
+        # This happens for framesets or something?
+        parse_error("expected-closing-tag-but-got-eof")
+      elsif @parser.inner_html and @tree.open_elements.length > 1 
+        # XXX This is not what the specification says. Not sure what to do here.
+        parse_error("eof-in-innerhtml")
+      end
+      # Betting ends.
+    end
+
+    def processComment(data)
+      # For most phases the following is correct. Where it's not it will be
+      # overridden.
+      @tree.insert_comment(data, @tree.open_elements.last)
+    end
+
+    def processDoctype(name, publicId, systemId, correct)
+      parse_error("unexpected-doctype")
+    end
+
+    def processSpaceCharacters(data)
+      @tree.insertText(data)
+    end
+
+    def processStartTag(name, attributes)
+      send self.class.start_tag_handlers[name], name, attributes
+    end
+
+    def startTagHtml(name, attributes)
+      if @parser.first_start_tag == false and name == 'html'
+         parse_error("non-html-root")
+      end
+      # XXX Need a check here to see if the first start tag token emitted is
+      # this token... If it's not, invoke parse_error.
+      attributes.each do |attr, value|
+        unless @tree.open_elements.first.attributes.has_key?(attr)
+          @tree.open_elements.first.attributes[attr] = value
+        end
+      end
+      @parser.first_start_tag = false
+    end
+
+    def processEndTag(name)
+      send self.class.end_tag_handlers[name], name
+    end
+
+    def assert(value)
+      throw AssertionError.new unless value
+    end
+
+    def in_scope?(*args)
+      @tree.elementInScope(*args)
+    end
+
+    def remove_open_elements_until(name=nil)
+      finished = false
+      until finished || @tree.open_elements.length == 0
+        element = @tree.open_elements.pop
+        finished = name.nil? ? yield(element) : element.name == name
+      end
+      return element
+    end
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/root_element_phase.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/root_element_phase.rb
@ -0,0 +1,41 @@
+require 'html5/html5parser/phase'
+
+module HTML5
+  class RootElementPhase < Phase
+
+    def process_eof
+      insert_html_element
+      @parser.phase.process_eof
+    end
+
+    def processComment(data)
+      @tree.insert_comment(data, @tree.document)
+    end
+
+    def processSpaceCharacters(data)
+    end
+
+    def processCharacters(data)
+      insert_html_element
+      @parser.phase.processCharacters(data)
+    end
+
+    def processStartTag(name, attributes)
+      @parser.first_start_tag = true if name == 'html'
+      insert_html_element
+      @parser.phase.processStartTag(name, attributes)
+    end
+
+    def processEndTag(name)
+      insert_html_element
+      @parser.phase.processEndTag(name)
+    end
+
+    def insert_html_element
+      element = @tree.createElement('html', {})
+      @tree.open_elements << element
+      @tree.document.appendChild(element)
+      @parser.phase = @parser.phases[:beforeHead]
+    end
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/trailing_end_phase.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/html5parser/trailing_end_phase.rb
@ -0,0 +1,35 @@
+require 'html5/html5parser/phase'
+
+module HTML5
+  class TrailingEndPhase < Phase
+
+    def process_eof
+    end
+
+    def processComment(data)
+      @tree.insert_comment(data, @tree.document)
+    end
+
+    def processSpaceCharacters(data)
+      @parser.last_phase.processSpaceCharacters(data)
+    end
+
+    def processCharacters(data)
+      parse_error("expected-eof-but-got-char")
+      @parser.phase = @parser.last_phase
+      @parser.phase.processCharacters(data)
+    end
+
+    def processStartTag(name, attributes)
+      parse_error("expected-eof-but-got-start-tag", {"name" => name})
+      @parser.phase = @parser.last_phase
+      @parser.phase.processStartTag(name, attributes)
+    end
+
+    def processEndTag(name)
+      parse_error("expected-eof-but-got-end-tag", {"name" => name})
+      @parser.phase = @parser.last_phase
+      @parser.phase.processEndTag(name)
+    end
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/inputstream.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/inputstream.rb
@ -0,0 +1,701 @@
+require 'stringio'
+require 'html5/constants'
+
+module HTML5
+
+  # Provides a unicode stream of characters to the HTMLTokenizer.
+
+  # This class takes care of character encoding and removing or replacing
+  # incorrect byte-sequences and also provides column and line tracking.
+
+  class HTMLInputStream
+
+    attr_accessor :queue, :char_encoding, :errors
+
+    # Initialises the HTMLInputStream.
+    # 
+    # HTMLInputStream(source, [encoding]) -> Normalized stream from source
+    # for use by the HTML5Lib.
+    # 
+    # source can be either a file-object, local filename or a string.
+    # 
+    # The optional encoding parameter must be a string that indicates
+    # the encoding.  If specified, that encoding will be used,
+    # regardless of any BOM or later declaration (such as in a meta
+    # element)
+    #  
+    # parseMeta - Look for a <meta> element containing encoding information
+
+    def initialize(source, options = {})
+      @encoding   = nil
+      @parse_meta = true
+      @chardet    = true
+
+      options.each {|name, value| instance_variable_set("@#{name}", value) }
+
+      # partial Ruby 1.9 support
+      if @encoding and source.respond_to? :force_encoding
+        source.force_encoding(@encoding) rescue nil
+      end
+
+      # Raw Stream
+      @raw_stream = open_stream(source)
+
+      # Encoding Information
+      #Number of bytes to use when looking for a meta element with
+      #encoding information
+      @NUM_BYTES_META = 512
+      #Number of bytes to use when using detecting encoding using chardet
+      @NUM_BYTES_CHARDET = 256
+      #Number of bytes to use when reading content
+      @NUM_BYTES_BUFFER = 1024
+
+      #Encoding to use if no other information can be found
+      @DEFAULT_ENCODING = 'windows-1252'
+
+      #Detect encoding iff no explicit "transport level" encoding is supplied
+      if @encoding.nil? or not HTML5.is_valid_encoding(@encoding)
+        @char_encoding = detect_encoding
+      else
+        @char_encoding = @encoding
+      end
+
+      # Read bytes from stream decoding them into Unicode
+      @buffer = @raw_stream.read(@NUM_BYTES_BUFFER) || ''
+      if @char_encoding == 'windows-1252'
+        @win1252 = true
+      elsif @char_encoding != 'utf-8'
+        require 'iconv'
+        begin
+          @buffer << @raw_stream.read unless @raw_stream.eof?
+          @buffer = Iconv.iconv('utf-8', @char_encoding, @buffer).first
+        rescue
+          @win1252 = true
+        end
+      end
+
+      @queue = []
+      @errors = []
+
+      # Reset position in the list to read from
+      @tell = 0
+      @line = @col = 0
+      @line_lengths = []
+    end
+
+    # Produces a file object from source.
+    #
+    # source can be either a file object, local filename or a string.
+    def open_stream(source)
+      # Already an IO like object
+      if source.respond_to?(:read)
+        source
+      else
+        # Treat source as a string and wrap in StringIO
+        StringIO.new(source)
+      end
+    end
+
+    def detect_encoding
+
+      #First look for a BOM
+      #This will also read past the BOM if present
+      encoding = detect_bom
+
+      #If there is no BOM need to look for meta elements with encoding 
+      #information
+      if encoding.nil? and @parse_meta
+        encoding = detect_encoding_meta
+      end
+
+      #Guess with chardet, if avaliable
+      if encoding.nil? and @chardet
+        begin
+          require 'rubygems'
+          require 'UniversalDetector' # gem install chardet
+          buffers = []
+          detector = UniversalDetector::Detector.instance
+          detector.reset
+          until @raw_stream.eof?
+            buffer = @raw_stream.read(@NUM_BYTES_CHARDET)
+            break if !buffer or buffer.empty?
+            buffers << buffer
+            detector.feed(buffer)
+            break if detector.instance_eval {@done}
+            detector.instance_eval {
+              @_mLastChar = @_mLastChar.chr if Fixnum === @_mLastChar
+            }
+          end
+          detector.close
+          encoding = detector.result['encoding']
+          seek(buffers*'', 0)
+        rescue LoadError
+        end
+      end
+
+      # If all else fails use the default encoding
+      if encoding.nil?
+        encoding = @DEFAULT_ENCODING
+      end
+    
+      #Substitute for equivalent encoding
+      if 'iso-8859-1' == encoding.downcase
+        encoding = 'windows-1252'
+      end
+
+      encoding
+    end
+
+    # Attempts to detect at BOM at the start of the stream. If
+    # an encoding can be determined from the BOM return the name of the
+    # encoding otherwise return nil
+    def detect_bom
+      bom_dict = {
+        "\xef\xbb\xbf"     => 'utf-8',
+        "\xff\xfe"         => 'utf-16le',
+        "\xfe\xff"         => 'utf-16be',
+        "\xff\xfe\x00\x00" => 'utf-32le',
+        "\x00\x00\xfe\xff" => 'utf-32be'
+      }
+
+      # Go to beginning of file and read in 4 bytes
+      string = @raw_stream.read(4)
+      return nil unless string
+
+      # Try detecting the BOM using bytes from the string
+      encoding = bom_dict[string[0...3]]      # UTF-8
+      seek = 3
+      unless encoding
+        # Need to detect UTF-32 before UTF-16
+        encoding = bom_dict[string]       # UTF-32
+        seek = 4
+        unless encoding
+          encoding = bom_dict[string[0...2]]  # UTF-16
+          seek = 2
+        end
+      end
+
+      # Set the read position past the BOM if one was found, otherwise
+      # set it to the start of the stream
+      seek(string, encoding ? seek : 0)
+
+      return encoding
+    end
+
+    def seek(buffer, n)
+      if @raw_stream.respond_to?(:unget)
+        @raw_stream.unget(buffer[n..-1])
+        return
+      end
+
+      if @raw_stream.respond_to?(:seek)
+        begin
+          @raw_stream.seek(n)
+          return
+        rescue Errno::ESPIPE
+        end
+      end
+
+      #TODO: huh?
+      require 'delegate'
+      @raw_stream = SimpleDelegator.new(@raw_stream)
+
+      class << @raw_stream
+        def read(chars=-1)
+          if chars == -1 or chars > @data.length
+            result = @data
+            @data = ''
+            return result if __getobj__.eof?
+            return result + __getobj__.read if chars == -1
+            return result + __getobj__.read(chars-result.length)
+          elsif @data.empty?
+            return __getobj__.read(chars)
+          else
+            result = @data[1...chars]
+            @data = @data[chars..-1]
+            return result
+          end
+        end
+
+        def unget(data)
+          if !@data or @data.empty?
+            @data = data
+          else
+            @data += data
+          end
+        end
+      end
+
+      @raw_stream.unget(buffer[n .. -1])
+    end
+
+    # Report the encoding declared by the meta element
+    def detect_encoding_meta
+      buffer = @raw_stream.read(@NUM_BYTES_META)
+      parser = EncodingParser.new(buffer)
+      seek(buffer, 0)
+      return parser.get_encoding
+    end
+
+    # Returns (line, col) of the current position in the stream.
+    def position
+      line, col = @line, @col
+      if @queue and @queue.last != :EOF
+        @queue.reverse.each do |c|
+          if c == "\n"
+            line -= 1
+            raise RuntimeError.new("col=#{col}") unless col == 0
+            col = @line_lengths[line]
+          else
+            col -= 1
+          end 
+        end
+      end
+      return [line + 1, col]
+    end
+
+    # Read one character from the stream or queue if available. Return
+    # EOF when EOF is reached.
+    def char
+      unless @queue.empty?
+        return @queue.shift
+      else
+        if @tell + 3 > @buffer.length && !@raw_stream.eof?
+          # read next block
+          @buffer = @buffer[@tell..-1] + @raw_stream.read(@NUM_BYTES_BUFFER)
+          @tell = 0
+        end
+
+        c = @buffer[@tell]
+        @tell += 1
+
+        case c
+
+        when String
+          # partial Ruby 1.9 support
+          case c
+          when "\0"
+            @errors.push("null-character")
+            c = "\uFFFD" # null characters are invalid
+          when "\r"
+            @tell += 1 if @buffer[@tell] == "\n"
+            c = "\n"
+          when "\x80" .. "\x9F"
+            c = ENTITIES_WINDOWS1252[c.ord-0x80].chr('utf-8')
+          when "\xA0" .. "\xFF"
+            if c.encoding == Encoding::ASCII_8BIT
+              c = c.encode('utf-8','iso-8859-1')
+            end
+          end
+
+          if c == "\x0D"
+            # normalize newlines
+            @tell += 1 if @buffer[@tell] == 0x0A
+            c = 0x0A
+          end
+
+          # update position in stream
+          if c == "\x0a"
+            @line_lengths << @col
+            @line += 1
+            @col = 0
+          else
+            @col += 1
+          end
+
+          c
+
+        when 0x01..0x7F
+          if c == 0x0D
+            # normalize newlines
+            @tell += 1 if @buffer[@tell] == 0x0A
+            c = 0x0A
+          end
+
+          # update position in stream
+          if c == 0x0a
+            @line_lengths << @col
+            @line += 1
+            @col = 0
+          else
+            @col += 1
+          end
+
+          c.chr
+
+        when 0x80..0xBF
+          if !@win1252
+            [0xFFFD].pack('U') # invalid utf-8
+          elsif c <= 0x9f
+            [ENTITIES_WINDOWS1252[c-0x80]].pack('U')
+          else
+            "\xC2" + c.chr # convert to utf-8
+          end
+
+        when 0xC0..0xFF
+          if instance_variable_defined?("@win1252") && @win1252
+            "\xC3" + (c - 64).chr # convert to utf-8
+          # from http://www.w3.org/International/questions/qa-forms-utf-8.en.php
+          elsif @buffer[@tell - 1..@tell + 3] =~ /^
+                ( [\xC2-\xDF][\x80-\xBF]             # non-overlong 2-byte
+                |  \xE0[\xA0-\xBF][\x80-\xBF]        # excluding overlongs
+                | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}  # straight 3-byte
+                |  \xED[\x80-\x9F][\x80-\xBF]        # excluding surrogates
+                |  \xF0[\x90-\xBF][\x80-\xBF]{2}     # planes 1-3
+                | [\xF1-\xF3][\x80-\xBF]{3}          # planes 4-15
+                |  \xF4[\x80-\x8F][\x80-\xBF]{2}     # plane 16
+                )/x
+            @tell += $1.length - 1
+            $1
+          else
+            [0xFFFD].pack('U') # invalid utf-8
+          end
+
+        when 0x00
+          @errors.push("null-character")
+          [0xFFFD].pack('U') # null characters are invalid
+
+        else
+          :EOF
+        end
+      end
+    end
+
+    # Returns a string of characters from the stream up to but not
+    # including any character in characters or EOF. characters can be
+    # any container that supports the in method being called on it.
+    def chars_until(characters, opposite=false)
+      char_stack = [char]
+
+      while char_stack.last != :EOF
+        break unless (characters.include?(char_stack.last)) == opposite
+        char_stack.push(char)
+      end
+
+      # Put the character stopped on back to the front of the queue
+      # from where it came.
+      c = char_stack.pop
+      @queue.insert(0, c) unless c == :EOF
+      return char_stack.join('')
+    end
+
+    def unget(characters)
+      return if characters == :EOF
+      if characters.respond_to? :to_a
+        @queue.unshift(*characters.to_a)
+      else
+        characters.reverse.each_char {|c| @queue.unshift(c)}
+      end
+    end
+  end
+
+  # String-like object with an assosiated position and various extra methods
+  # If the position is ever greater than the string length then an exception is raised
+  class EncodingBytes < String
+
+    attr_accessor :position
+
+    def initialize(value)
+      super(value)
+      @position = -1
+    end
+  
+    def each
+      while @position < length
+        @position += 1
+        yield self[@position]
+      end
+    rescue EOF
+    end
+  
+    def current_byte
+      raise EOF if @position >= length
+      return self[@position].chr
+    end
+  
+    # Skip past a list of characters
+    def skip(chars=SPACE_CHARACTERS)
+      while chars.include?(current_byte)
+        @position += 1
+      end
+    end
+
+    # Look for a sequence of bytes at the start of a string. If the bytes 
+    # are found return true and advance the position to the byte after the 
+    # match. Otherwise return false and leave the position alone
+    def match_bytes(bytes, lower=false)
+      data = self[position ... position+bytes.length]
+      data.downcase! if lower
+      rv = (data == bytes)
+      @position += bytes.length if rv == true
+      return rv
+    end
+  
+    # Look for the next sequence of bytes matching a given sequence. If
+    # a match is found advance the position to the last byte of the match
+    def jump_to(bytes)
+      new_position = self[position .. -1].index(bytes)
+      if new_position
+        @position += (new_position + bytes.length-1)
+        return true
+      else
+        raise EOF
+      end
+    end
+  
+    # Move the pointer so it points to the next byte in a set of possible
+    # bytes
+    def find_next(byte_list)
+      until byte_list.include?(current_byte)
+        @position += 1
+      end
+    end
+  end
+
+  # Mini parser for detecting character encoding from meta elements
+  class EncodingParser
+
+    # string - the data to work on for encoding detection
+    def initialize(data)
+      @data = EncodingBytes.new(data.to_s)
+      @encoding = nil
+    end
+
+    @@method_dispatch = [
+      ['<!--', :handle_comment],
+      ['<meta', :handle_meta],
+      ['</', :handle_possible_end_tag],
+      ['<!', :handle_other],
+      ['<?', :handle_other],
+      ['<', :handle_possible_start_tag]
+    ]
+
+    def get_encoding
+      @data.each do |byte|
+        keep_parsing = true
+        @@method_dispatch.each do |(key, method)|
+          if @data.match_bytes(key, lower = true)
+            keep_parsing = send(method)
+            break
+          end
+        end
+        break unless keep_parsing
+      end
+      unless @encoding.nil?
+        @encoding = @encoding.strip 
+        if ["UTF-16", "UTF-16BE", "UTF-16LE", "UTF-32", "UTF-32BE", "UTF-32LE"].include?(@encoding.upcase)
+          @encoding = 'utf-8'
+        end
+      end
+      
+      return @encoding
+    end
+
+    # Skip over comments
+    def handle_comment
+      return @data.jump_to('-->')
+    end
+
+    def handle_meta
+      # if we have <meta not followed by a space so just keep going
+      return true unless SPACE_CHARACTERS.include?(@data.current_byte)
+
+      #We have a valid meta element we want to search for attributes
+      while true
+        #Try to find the next attribute after the current position
+        attr = get_attribute
+
+        return true if attr.nil?
+        
+        if attr[0] == 'charset'
+          tentative_encoding = attr[1]
+          if HTML5.is_valid_encoding(tentative_encoding)
+            @encoding = tentative_encoding  
+            return false
+          end
+        elsif attr[0] == 'content'
+          content_parser = ContentAttrParser.new(EncodingBytes.new(attr[1]))
+          tentative_encoding = content_parser.parse
+          if HTML5.is_valid_encoding(tentative_encoding)
+            @encoding = tentative_encoding
+            return false
+          end
+        end
+      end
+    end
+
+    def handle_possible_start_tag
+      return handle_possible_tag(false)
+    end
+
+    def handle_possible_end_tag
+      @data.position += 1
+      return handle_possible_tag(true)
+    end
+
+    def handle_possible_tag(end_tag)
+      unless ASCII_LETTERS.include?(@data.current_byte)
+        #If the next byte is not an ascii letter either ignore this
+        #fragment (possible start tag case) or treat it according to 
+        #handleOther
+        if end_tag
+          @data.position -= 1
+          handle_other
+        end
+        return true
+      end
+    
+      @data.find_next(SPACE_CHARACTERS + ['<', '>'])
+
+      if @data.current_byte == '<'
+        #return to the first step in the overall "two step" algorithm
+        #reprocessing the < byte
+        @data.position -= 1  
+      else
+        #Read all attributes
+        {} until get_attribute.nil?
+      end
+      return true
+    end
+
+    def handle_other
+      return @data.jump_to('>')
+    end
+
+    # Return a name,value pair for the next attribute in the stream,
+    # if one is found, or nil
+    def get_attribute
+      @data.skip(SPACE_CHARACTERS + ['/'])
+
+      if @data.current_byte == '<'
+        @data.position -= 1
+        return nil
+      elsif @data.current_byte == '>'
+        return nil
+      end
+
+      attr_name = []
+      attr_value = []
+      space_found = false
+      #Step 5 attribute name
+      while true
+        if @data.current_byte == '=' and attr_name
+          break
+        elsif SPACE_CHARACTERS.include?(@data.current_byte)
+          space_found = true
+          break
+        elsif ['/', '<', '>'].include?(@data.current_byte)
+          return [attr_name.join(''), '']
+        elsif ASCII_UPPERCASE.include?(@data.current_byte)
+          attr_name.push(@data.current_byte.downcase)
+        else
+          attr_name.push(@data.current_byte)
+        end
+        #Step 6
+        @data.position += 1
+      end
+      #Step 7
+      if space_found
+        @data.skip
+        #Step 8
+        unless @data.current_byte == '='
+          @data.position -= 1
+          return [attr_name.join(''), '']
+        end
+      end
+      #XXX need to advance position in both spaces and value case
+      #Step 9
+      @data.position += 1
+      #Step 10
+      @data.skip
+      #Step 11
+      if ["'", '"'].include?(@data.current_byte)
+        #11.1
+        quote_char = @data.current_byte
+        while true
+          @data.position+=1
+          #11.3
+          if @data.current_byte == quote_char
+            @data.position += 1
+            return [attr_name.join(''), attr_value.join('')]
+          #11.4
+          elsif ASCII_UPPERCASE.include?(@data.current_byte)
+            attr_value.push(@data.current_byte.downcase)
+          #11.5
+          else
+            attr_value.push(@data.current_byte)
+          end
+        end
+      elsif ['>', '<'].include?(@data.current_byte)
+        return [attr_name.join(''), '']
+      elsif ASCII_UPPERCASE.include?(@data.current_byte)
+        attr_value.push(@data.current_byte.downcase)
+      else
+        attr_value.push(@data.current_byte)
+      end
+      while true
+        @data.position += 1
+        if (SPACE_CHARACTERS + ['>', '<']).include?(@data.current_byte)
+          return [attr_name.join(''), attr_value.join('')]
+        elsif ASCII_UPPERCASE.include?(@data.current_byte)
+          attr_value.push(@data.current_byte.downcase)
+        else
+          attr_value.push(@data.current_byte)
+        end
+      end
+    end
+  end
+
+  class ContentAttrParser
+    def initialize(data)
+      @data = data
+    end
+
+    def parse
+      begin
+        #Skip to the first ";"
+        @data.position = 0
+        @data.jump_to(';')
+        @data.position += 1
+        @data.skip
+        #Check if the attr name is charset 
+        #otherwise return
+        @data.jump_to('charset')
+        @data.position += 1
+        @data.skip
+        unless @data.current_byte == '='
+          #If there is no = sign keep looking for attrs
+          return nil
+        end
+        @data.position += 1
+        @data.skip
+        #Look for an encoding between matching quote marks
+        if ['"', "'"].include?(@data.current_byte)
+          quote_mark = @data.current_byte
+          @data.position += 1
+          old_position = @data.position
+          @data.jump_to(quote_mark)
+          return @data[old_position ... @data.position]
+        else
+          #Unquoted value
+          old_position = @data.position
+          begin
+            @data.find_next(SPACE_CHARACTERS)
+            return @data[old_position ... @data.position]
+          rescue EOF
+            #Return the whole remaining value
+            return @data[old_position .. -1]
+          end
+        end
+      rescue EOF
+        return nil
+      end
+    end
+  end
+
+  # Determine if a string is a supported encoding
+  def self.is_valid_encoding(encoding)
+    (not encoding.nil? and encoding.kind_of?(String) and ENCODINGS.include?(encoding.downcase.strip))
+  end
+
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/liberalxmlparser.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/liberalxmlparser.rb
@ -0,0 +1,158 @@
+# Warning: this module is experimental and subject to change and even removal
+# at any time. 
+# 
+# For background/rationale, see:
+#  * http://www.intertwingly.net/blog/2007/01/08/Xhtml5lib
+#  * http://tinyurl.com/ylfj8k (and follow-ups)
+# 
+# References:
+#  * http://googlereader.blogspot.com/2005/12/xml-errors-in-feeds.html
+#  * http://wiki.whatwg.org/wiki/HtmlVsXhtml
+# 
+# @@TODO:
+# * Selectively lowercase only XHTML, but not foreign markup
+require 'html5/html5parser'
+require 'html5/constants'
+
+module HTML5
+
+  # liberal XML parser
+  class XMLParser < HTMLParser
+
+    def initialize(options = {})
+      super options
+      @phases[:initial] = XmlRootPhase.new(self, @tree)
+    end
+
+    def normalize_token(token)
+      case token[:type]
+      when :StartTag, :EmptyTag
+        # We need to remove the duplicate attributes and convert attributes
+        # to a Hash so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
+
+        token[:data] = Hash[*token[:data].reverse.flatten]
+
+        # For EmptyTags, process both a Start and an End tag
+        if token[:type] == :EmptyTag
+          save = @tokenizer.content_model_flag
+          @phase.processStartTag(token[:name], token[:data])
+          @tokenizer.content_model_flag = save
+          token[:data] = {}
+          token[:type] = :EndTag
+        end
+
+      when :Characters
+        # un-escape RCDATA_ELEMENTS (e.g. style, script)
+        if @tokenizer.content_model_flag == :CDATA
+          token[:data] = token[:data].
+            gsub('&lt;','<').gsub('&gt;','>').gsub('&amp;','&')
+        end
+
+      when :EndTag
+        if token[:data]
+           parse_error("attributes-in-end-tag")
+        end
+
+      when :Comment
+        # Rescue CDATA from the comments
+        if token[:data][0..6] == "[CDATA[" and token[:data][-2..-1] == "]]"
+          token[:type] = :Characters
+          token[:data] = token[:data][7 ... -2]
+        end
+      end
+
+      return token
+    end
+  end
+
+  # liberal XMTHML parser
+  class XHTMLParser < XMLParser
+
+    def initialize(options = {})
+      super options
+      @phases[:initial] = InitialPhase.new(self, @tree)
+      @phases[:rootElement] = XhmlRootPhase.new(self, @tree)
+    end
+
+    def normalize_token(token)
+      super(token)
+
+      # ensure that non-void XHTML elements have content so that separate
+      # open and close tags are emitted
+      if token[:type]  == :EndTag
+        if VOID_ELEMENTS.include? token[:name]
+          if @tree.open_elements[-1].name != token["name"]
+            token[:type] = :EmptyTag
+            token["data"] ||= {}
+          end
+        else
+          if token[:name] == @tree.open_elements[-1].name and \
+            not @tree.open_elements[-1].hasContent
+            @tree.insertText('') unless
+              @tree.open_elements.any? {|e|
+                e.attributes.keys.include? 'xmlns' and
+                e.attributes['xmlns'] != 'http://www.w3.org/1999/xhtml'
+              }
+           end
+        end
+      end
+
+      return token
+    end
+  end
+
+  class XhmlRootPhase < RootElementPhase
+    def insert_html_element
+      element = @tree.createElement("html", {'xmlns' => 'http://www.w3.org/1999/xhtml'})
+      @tree.open_elements.push(element)
+      @tree.document.appendChild(element)
+      @parser.phase = @parser.phases[:beforeHead]
+    end
+  end
+
+  class XmlRootPhase < Phase
+    # Prime the Xml parser
+    @start_tag_handlers = Hash.new(:startTagOther)
+    @end_tag_handlers = Hash.new(:endTagOther)
+    def startTagOther(name, attributes)
+      @tree.open_elements.push(@tree.document)
+      element = @tree.createElement(name, attributes)
+      @tree.open_elements[-1].appendChild(element)
+      @tree.open_elements.push(element)
+      @parser.phase = XmlElementPhase.new(@parser,@tree)
+    end
+    def endTagOther(name)
+      super
+      @tree.open_elements.pop
+    end
+  end
+
+  class XmlElementPhase < Phase
+    # Generic handling for all XML elements
+
+    @start_tag_handlers = Hash.new(:startTagOther)
+    @end_tag_handlers = Hash.new(:endTagOther)
+
+    def startTagOther(name, attributes)
+      element = @tree.createElement(name, attributes)
+      @tree.open_elements[-1].appendChild(element)
+      @tree.open_elements.push(element)
+    end
+
+    def endTagOther(name)
+      for node in @tree.open_elements.reverse
+        if node.name == name
+          {} while @tree.open_elements.pop != node
+          break
+        else
+          parse_error
+        end
+      end
+    end
+
+    def processCharacters(data)
+      @tree.insertText(data)
+    end
+  end
+
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/sanitizer.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/sanitizer.rb
@ -0,0 +1,203 @@
+require 'cgi'
+require 'html5/tokenizer'
+require 'set'
+
+module HTML5
+
+# This module provides sanitization of XHTML+MathML+SVG
+# and of inline style attributes.
+#
+# It can be either at the Tokenizer stage:
+#
+#       HTMLParser.parse(html, :tokenizer => HTMLSanitizer)
+#
+# or, if you already have a parse tree (in this example, a REXML tree),
+# at the Serializer stage:
+#
+#     tokens = TreeWalkers.get_tree_walker('rexml').new(tree)
+#     HTMLSerializer.serialize(tokens, {:encoding=>'utf-8',
+#        :sanitize => true})
+
+  module HTMLSanitizeModule
+
+    ACCEPTABLE_ELEMENTS = Set.new %w[a abbr acronym address area audio b big blockquote br
+      button caption center cite code col colgroup dd del dfn dir div dl dt
+      em fieldset font form h1 h2 h3 h4 h5 h6 hr i img input ins kbd label
+      legend li map menu ol optgroup option p pre q s samp select small span
+      strike strong sub sup table tbody td textarea tfoot th thead tr tt u
+      ul var video]
+
+    MATHML_ELEMENTS = Set.new %w[annotation annotation-xml maction math merror mfrac
+      mfenced mi mmultiscripts mn mo mover mpadded mphantom mprescripts mroot mrow
+      mspace msqrt mstyle msub msubsup msup mtable mtd mtext mtr munder
+      munderover none semantics]
+
+    SVG_ELEMENTS = Set.new %w[a animate animateColor animateMotion animateTransform
+      circle clipPath defs desc ellipse font-face font-face-name font-face-src
+      foreignObject g glyph hkern linearGradient line marker metadata
+      missing-glyph mpath path polygon polyline radialGradient rect set
+      stop svg switch text title tspan use]
+
+    ACCEPTABLE_ATTRIBUTES = Set.new %w[abbr accept accept-charset accesskey action
+      align alt axis border cellpadding cellspacing char charoff charset
+      checked cite class clear cols colspan color compact controls coords datetime
+      dir disabled enctype for frame headers height href hreflang hspace id
+      ismap label lang longdesc loop maxlength media method multiple name nohref
+      noshade nowrap poster prompt readonly rel rev rows rowspan rules scope
+      selected shape size span src start style summary tabindex target title
+      type usemap valign value vspace width xml:lang]
+
+    MATHML_ATTRIBUTES = Set.new %w[actiontype align close columnalign columnalign
+      columnalign columnlines columnspacing columnspan depth display
+      displaystyle encoding equalcolumns equalrows fence fontstyle fontweight
+      frame height linethickness lspace mathbackground mathcolor mathvariant
+      mathvariant maxsize minsize open other rowalign rowalign rowalign rowlines
+      rowspacing rowspan rspace scriptlevel selection separator separators
+      stretchy width width xlink:href xlink:show xlink:type xmlns xmlns:xlink]
+
+    SVG_ATTRIBUTES = Set.new %w[accent-height accumulate additive alphabetic
+       arabic-form ascent attributeName attributeType baseProfile bbox begin
+       by calcMode cap-height class clip-path clip-rule color color-rendering
+       content cx cy d dx dy descent display dur end fill fill-opacity fill-rule
+       font-family font-size font-stretch font-style font-variant font-weight from
+       fx fy g1 g2 glyph-name gradientUnits hanging height horiz-adv-x horiz-origin-x
+       id ideographic k keyPoints keySplines keyTimes lang marker-end
+       marker-mid marker-start markerHeight markerUnits markerWidth
+       mathematical max min name offset opacity orient origin
+       overline-position overline-thickness panose-1 path pathLength points
+       preserveAspectRatio r refX refY repeatCount repeatDur
+       requiredExtensions requiredFeatures restart rotate rx ry slope stemh
+       stemv stop-color stop-opacity strikethrough-position
+       strikethrough-thickness stroke stroke-dasharray stroke-dashoffset
+       stroke-linecap stroke-linejoin stroke-miterlimit stroke-opacity
+       stroke-width systemLanguage target text-anchor to transform type u1
+       u2 underline-position underline-thickness unicode unicode-range
+       units-per-em values version viewBox visibility width widths x
+       x-height x1 x2 xlink:actuate xlink:arcrole xlink:href xlink:role
+       xlink:show xlink:title xlink:type xml:base xml:lang xml:space xmlns
+       xmlns:xlink y y1 y2 zoomAndPan]
+
+    ATTR_VAL_IS_URI = Set.new %w[href src cite action longdesc xlink:href xml:base]
+
+    SVG_ATTR_VAL_ALLOWS_REF = Set.new %w[clip-path color-profile cursor fill
+      filter marker marker-start marker-mid marker-end mask stroke]
+
+    SVG_ALLOW_LOCAL_HREF = Set.new %w[altGlyph animate animateColor animateMotion
+      animateTransform cursor feImage filter linearGradient pattern
+      radialGradient textpath tref set use]
+
+    ACCEPTABLE_CSS_PROPERTIES = Set.new %w[azimuth background-color
+      border-bottom-color border-collapse border-color border-left-color
+      border-right-color border-top-color clear color cursor direction
+      display elevation float font font-family font-size font-style
+      font-variant font-weight height letter-spacing line-height overflow
+      pause pause-after pause-before pitch pitch-range richness speak
+      speak-header speak-numeral speak-punctuation speech-rate stress
+      text-align text-decoration text-indent unicode-bidi vertical-align
+      voice-family volume white-space width]
+
+    ACCEPTABLE_CSS_KEYWORDS = Set.new %w[auto aqua black block blue bold both bottom
+      brown center collapse dashed dotted fuchsia gray green !important
+      italic left lime maroon medium none navy normal nowrap olive pointer
+      purple red right solid silver teal top transparent underline white
+      yellow]
+
+    ACCEPTABLE_SVG_PROPERTIES = Set.new %w[fill fill-opacity fill-rule stroke
+      stroke-width stroke-linecap stroke-linejoin stroke-opacity]
+
+    ACCEPTABLE_PROTOCOLS = Set.new %w[ed2k ftp http https irc mailto news gopher nntp
+      telnet webcal xmpp callto feed urn aim rsync tag ssh sftp rtsp afs]
+
+    # subclasses may define their own versions of these constants
+    ALLOWED_ELEMENTS = ACCEPTABLE_ELEMENTS + MATHML_ELEMENTS + SVG_ELEMENTS
+    ALLOWED_ATTRIBUTES = ACCEPTABLE_ATTRIBUTES + MATHML_ATTRIBUTES + SVG_ATTRIBUTES
+    ALLOWED_CSS_PROPERTIES = ACCEPTABLE_CSS_PROPERTIES
+    ALLOWED_CSS_KEYWORDS = ACCEPTABLE_CSS_KEYWORDS
+    ALLOWED_SVG_PROPERTIES = ACCEPTABLE_SVG_PROPERTIES
+    ALLOWED_PROTOCOLS = ACCEPTABLE_PROTOCOLS
+
+    def sanitize_token(token)
+        case token[:type]
+        when :StartTag, :EndTag, :EmptyTag
+          if self.class.const_get("ALLOWED_ELEMENTS").include?(token[:name])
+            if token.has_key? :data
+              attrs = Hash[*token[:data].flatten]
+              attrs.delete_if { |attr,v| !self.class.const_get("ALLOWED_ATTRIBUTES").include?(attr) }
+              ATTR_VAL_IS_URI.each do |attr|
+                val_unescaped = CGI.unescapeHTML(attrs[attr].to_s).gsub(/`|[\000-\040\177\s]+|\302[\200-\240]/,'').downcase
+                if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ and !self.class.const_get("ALLOWED_PROTOCOLS").include?(val_unescaped.split(':')[0])
+                  attrs.delete attr
+                end
+              end
+              SVG_ATTR_VAL_ALLOWS_REF.each do |attr|
+                attrs[attr] = attrs[attr].to_s.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if attrs[attr]
+              end
+              if SVG_ALLOW_LOCAL_HREF.include?(token[:name]) && attrs['xlink:href'] && attrs['xlink:href'] =~ /^\s*[^#\s].*/m
+                 attrs.delete 'xlink:href'
+              end
+              if attrs['style']
+                attrs['style'] = sanitize_css(attrs['style'])
+              end
+              token[:data] = attrs.map {|k,v| [k,v]}
+            end
+            return token
+          else
+            if token[:type] == :EndTag
+              token[:data] = "</#{token[:name]}>"
+            elsif token[:data]
+              attrs = token[:data].map {|k,v| " #{k}=\"#{CGI.escapeHTML(v)}\""}.join('')
+              token[:data] = "<#{token[:name]}#{attrs}>"
+            else
+              token[:data] = "<#{token[:name]}>"
+            end
+            token[:data].insert(-2,'/') if token[:type] == :EmptyTag
+            token[:type] = :Characters
+            token.delete(:name)
+            return token
+          end
+        when :Comment
+          token[:data] = ""
+          return token
+        else
+          return token
+        end
+    end
+
+    def sanitize_css(style)
+      # disallow urls
+      style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ')
+
+      # gauntlet
+      return '' unless style =~ /^([-:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*$/
+      return '' unless style =~ /^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$/
+
+      clean = []
+      style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop, val|
+        next if val.empty?
+        prop.downcase!
+        if self.class.const_get("ALLOWED_CSS_PROPERTIES").include?(prop)
+          clean << "#{prop}: #{val};"
+        elsif %w[background border margin padding].include?(prop.split('-')[0])
+          clean << "#{prop}: #{val};" unless val.split().any? do |keyword|
+            !self.class.const_get("ALLOWED_CSS_KEYWORDS").include?(keyword) and
+            keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/
+          end
+        elsif self.class.const_get("ALLOWED_SVG_PROPERTIES").include?(prop)
+          clean << "#{prop}: #{val};"
+        end
+      end
+
+      style = clean.join(' ')
+    end
+  end
+
+  class HTMLSanitizer < HTMLTokenizer
+    include HTMLSanitizeModule
+    def each
+      super do |token|
+        yield(sanitize_token(token))
+      end
+    end
+  end
+
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/serializer.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/serializer.rb
@ -0,0 +1,2 @@
+require 'html5/serializer/htmlserializer'
+require 'html5/serializer/xhtmlserializer'
--- a/attic/vendor/plugins/HTML5lib/lib/html5/serializer/htmlserializer.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/serializer/htmlserializer.rb
@ -0,0 +1,179 @@
+require 'html5/constants'
+
+module HTML5
+
+  class HTMLSerializer
+
+    def self.serialize(stream, options = {})
+      new(options).serialize(stream, options[:encoding])
+    end
+
+    def escape(string)
+      string.gsub("&", "&amp;").gsub("<", "&lt;").gsub(">", "&gt;")
+    end
+ 
+    def initialize(options={})
+      @quote_attr_values           = false
+      @quote_char                  = '"'
+      @use_best_quote_char         = true
+      @minimize_boolean_attributes = true
+
+      @use_trailing_solidus          = false
+      @space_before_trailing_solidus = true
+      @escape_lt_in_attrs            = false
+      @escape_rcdata                 = false
+
+      @omit_optional_tags = true
+      @sanitize           = false
+
+      @strip_whitespace = false
+
+      @inject_meta_charset = true
+
+      options.each do |name, value|
+        next unless instance_variable_defined?("@#{name}")
+        @use_best_quote_char = false if name.to_s == 'quote_char'
+        instance_variable_set("@#{name}", value)
+      end
+
+      @errors = []
+    end
+
+    def serialize(treewalker, encoding=nil)
+      in_cdata = false
+      @errors = []
+
+      if encoding and @inject_meta_charset
+        require 'html5/filters/inject_meta_charset'
+        treewalker = Filters::InjectMetaCharset.new(treewalker, encoding)
+      end
+
+      if @strip_whitespace
+        require 'html5/filters/whitespace'
+        treewalker = Filters::WhitespaceFilter.new(treewalker)
+      end
+
+      if @sanitize
+        require 'html5/filters/sanitizer'
+        treewalker = Filters::HTMLSanitizeFilter.new(treewalker)
+      end
+
+      if @omit_optional_tags
+        require 'html5/filters/optionaltags'
+        treewalker = Filters::OptionalTagFilter.new(treewalker)
+      end
+
+      result = []
+      treewalker.each do |token|
+        type = token[:type]
+        if type == :Doctype
+          doctype = "<!DOCTYPE %s>" % token[:name]
+          result << doctype
+
+        elsif [:Characters, :SpaceCharacters].include? type
+          if type == :SpaceCharacters or in_cdata
+            if in_cdata and token[:data].include?("</")
+              serialize_error("Unexpected </ in CDATA")
+            end
+            result << token[:data]
+          else
+            result << escape(token[:data])
+          end
+
+        elsif [:StartTag, :EmptyTag].include? type
+          name = token[:name]
+          if RCDATA_ELEMENTS.include?(name) and not @escape_rcdata
+            in_cdata = true
+          elsif in_cdata
+            serialize_error(_("Unexpected child element of a CDATA element"))
+          end
+          attributes = []
+          for k,v in attrs = token[:data].to_a.sort
+            attributes << ' '
+
+            attributes << k
+            if not @minimize_boolean_attributes or \
+                (!(BOOLEAN_ATTRIBUTES[name]||[]).include?(k) \
+                and !BOOLEAN_ATTRIBUTES[:global].include?(k))
+              attributes << "="
+              if @quote_attr_values or v.empty?
+                quote_attr = true
+              else
+                quote_attr = (SPACE_CHARACTERS + %w(< > " ')).any? {|c| v.include?(c)}
+              end
+              v = v.gsub("&", "&amp;")
+              v = v.gsub("<", "&lt;") if @escape_lt_in_attrs
+              if quote_attr
+                quote_char = @quote_char
+                if @use_best_quote_char
+                  if v.index("'") and !v.index('"')
+                    quote_char = '"'
+                  elsif v.index('"') and !v.index("'")
+                    quote_char = "'"
+                  end
+                end
+                if quote_char == "'"
+                  v = v.gsub("'", "&#39;")
+                else
+                  v = v.gsub('"', "&quot;")
+                end
+                attributes << quote_char << v << quote_char
+              else
+                attributes << v
+              end
+            end
+          end
+          if VOID_ELEMENTS.include?(name) and @use_trailing_solidus
+            if @space_before_trailing_solidus
+              attributes << " /"
+            else
+              attributes << "/"
+            end
+          end
+          result << "<%s%s>" % [name, attributes.join('')]
+
+        elsif type == :EndTag
+          name = token[:name]
+          if RCDATA_ELEMENTS.include?(name)
+            in_cdata = false
+          elsif in_cdata
+            serialize_error(_("Unexpected child element of a CDATA element"))
+          end
+          end_tag = "</#{name}>"
+          result << end_tag
+
+        elsif type == :Comment
+          data = token[:data]
+          serialize_error("Comment contains --") if data.index("--")
+          comment = "<!--%s-->" % token[:data]
+          result << comment
+
+        else
+          serialize_error(token[:data])
+        end
+      end
+
+      if encoding and encoding != 'utf-8'
+        require 'iconv'
+        Iconv.iconv(encoding, 'utf-8', result.join('')).first
+      else
+        result.join('')
+      end
+    end
+
+    alias :render :serialize
+
+    def serialize_error(data="XXX ERROR MESSAGE NEEDED")
+      # XXX The idea is to make data mandatory.
+      @errors.push(data)
+      if @strict
+        raise SerializeError
+      end
+    end
+
+  end
+
+  # Error in serialized tree
+  class SerializeError < Exception
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/serializer/xhtmlserializer.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/serializer/xhtmlserializer.rb
@ -0,0 +1,20 @@
+require 'html5/serializer/htmlserializer'
+
+module HTML5
+
+  class XHTMLSerializer < HTMLSerializer
+    DEFAULTS = {
+      :quote_attr_values           => true,
+      :minimize_boolean_attributes => false,
+      :use_trailing_solidus        => true,
+      :escape_lt_in_attrs          => true,
+      :omit_optional_tags          => false,
+      :escape_rcdata               => true
+    }
+
+    def initialize(options={})
+      super(DEFAULTS.clone.update(options))
+    end
+  end
+
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/sniffer.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/sniffer.rb
@ -0,0 +1,45 @@
+module HTML5
+module Sniffer
+  # 4.7.4
+  def html_or_feed str
+    s = str[0, 512] # steps 1, 2
+    pos = 0
+
+    while pos < s.length
+      case s[pos]
+      when ?\t, ?\ , ?\n, ?\r # 0x09, 0x20, 0x0A, 0x0D == tab, space, LF, CR
+        pos += 1
+      when ?< # 0x3C
+        pos += 1
+        if s[pos..pos+2] == "!--" # [0x21, 0x2D, 0x2D]
+          pos += 3
+          until s[pos..pos+2] == "-->" or pos >= s.length
+            pos += 1
+          end
+          pos += 3
+        elsif s[pos] == ?! # 0x21
+          pos += 1
+          until s[pos] == ?> or pos >= s.length # 0x3E
+            pos += 1 
+          end
+          pos += 1
+        elsif s[pos] == ?? # 0x3F
+          until s[pos..pos+1] == "?>" or pos >= s.length # [0x3F, 0x3E]
+            pos +=  1
+          end
+          pos += 2
+        elsif s[pos..pos+2] == "rss"   # [0x72, 0x73, 0x73]
+          return "application/rss+xml"
+        elsif s[pos..pos+3] == "feed"  # [0x66, 0x65, 0x65, 0x64]
+          return "application/atom+xml"
+        elsif s[pos..pos+6] == "rdf:RDF" # [0x72, 0x64, 0x66, 0x3A, 0x52, 0x44, 0x46]
+          raise NotImplementedError
+        end
+      else
+        break
+      end
+    end
+    "text/html"
+  end
+end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/tokenizer.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/tokenizer.rb
@ -0,0 +1,970 @@
+require 'html5/constants'
+require 'html5/inputstream'
+
+module HTML5
+
+  # This class takes care of tokenizing HTML.
+  #
+  # * @current_token
+  #   Holds the token that is currently being processed.
+  #
+  # * @state
+  #   Holds a reference to the method to be invoked... XXX
+  #
+  # * @states
+  #   Holds a mapping between states and methods that implement the state.
+  #
+  # * @stream
+  #   Points to HTMLInputStream object.
+
+  class HTMLTokenizer
+    attr_accessor :content_model_flag, :current_token
+    attr_reader :stream
+
+    # XXX need to fix documentation
+
+    def initialize(stream, options = {})
+      @stream = HTMLInputStream.new(stream, options)
+
+      # Setup the initial tokenizer state
+      @content_model_flag = :PCDATA
+      @state              = :data_state
+      @escapeFlag         = false
+      @lastFourChars      = []
+
+      # The current token being created
+      @current_token = nil
+
+      # Tokens to be processed.
+      @token_queue             = []
+      @lowercase_element_name = options[:lowercase_element_name] != false
+      @lowercase_attr_name    = options[:lowercase_attr_name]    != false
+    end
+
+    # This is where the magic happens.
+    #
+    # We do our usually processing through the states and when we have a token
+    # to return we yield the token which pauses processing until the next token
+    # is requested.
+    def each
+      @token_queue = []
+      # Start processing. When EOF is reached @state will return false
+      # instead of true and the loop will terminate.
+      while send @state
+        yield :type => :ParseError, :data => @stream.errors.shift until @stream.errors.empty?
+        yield @token_queue.shift until @token_queue.empty?
+      end
+    end
+
+    # Below are various helper functions the tokenizer states use worked out.
+  
+    # If the next character is a '>', convert the current_token into
+    # an EmptyTag
+
+    def process_solidus_in_tag
+
+      # We need to consume another character to make sure it's a ">"
+      data = @stream.char
+
+      if @current_token[:type] == :StartTag and data == ">"
+        @current_token[:type] = :EmptyTag
+      else
+        @token_queue << {:type => :ParseError, :data => "incorrectly-placed-solidus"}
+      end
+
+      # The character we just consumed need to be put back on the stack so it
+      # doesn't get lost...
+      @stream.unget(data)
+    end
+
+    # This function returns either U+FFFD or the character based on the
+    # decimal or hexadecimal representation. It also discards ";" if present.
+    # If not present @token_queue << {:type => :ParseError}" is invoked.
+
+    def consume_number_entity(isHex)
+
+      # XXX More need to be done here. For instance, #13 should prolly be
+      # converted to #10 so we don't get \r (#13 is \r right?) in the DOM and
+      # such. Thoughts on this appreciated.
+      allowed = DIGITS
+      radix = 10
+      if isHex
+        allowed = HEX_DIGITS
+        radix = 16
+      end
+
+      char_stack = []
+
+      # Consume all the characters that are in range while making sure we
+      # don't hit an EOF.
+      c = @stream.char
+      while allowed.include?(c) and c != :EOF
+        char_stack.push(c)
+        c = @stream.char
+      end
+
+      # Convert the set of characters consumed to an int.
+      charAsInt = char_stack.join('').to_i(radix)
+
+      if charAsInt == 13
+        @token_queue << {:type => :ParseError, :data => "incorrect-cr-newline-entity"}
+        charAsInt = 10
+      elsif (128..159).include? charAsInt
+        # If the integer is between 127 and 160 (so 128 and bigger and 159
+        # and smaller) we need to do the "windows trick".
+        @token_queue << {:type => :ParseError, :data => "illegal-windows-1252-entity"}
+
+        charAsInt = ENTITIES_WINDOWS1252[charAsInt - 128]
+      end
+
+      if 0 < charAsInt and charAsInt <= 1114111 and not (55296 <= charAsInt and charAsInt <= 57343)
+        if String.method_defined? :force_encoding
+          char = charAsInt.chr('utf-8')
+        else
+          char = [charAsInt].pack('U')
+        end
+      else
+        char = [0xFFFD].pack('U')
+        @token_queue << {:type => :ParseError, :data => "cant-convert-numeric-entity", :datavars => {"charAsInt" => charAsInt}}
+      end
+
+      # Discard the ; if present. Otherwise, put it back on the queue and
+      # invoke parse_error on parser.
+      if c != ";"
+        @token_queue << {:type => :ParseError, :data => "numeric-entity-without-semicolon"}
+        @stream.unget(c)
+      end
+
+      return char
+    end
+
+    def consume_entity(from_attribute=false)
+      char = nil
+      char_stack = [@stream.char]
+      if SPACE_CHARACTERS.include?(char_stack[0]) or [:EOF, '<', '&'].include?(char_stack[0])
+        @stream.unget(char_stack)
+      elsif char_stack[0] == '#'
+        # We might have a number entity here.
+        char_stack += [@stream.char, @stream.char]
+        if char_stack[0 .. 1].include? :EOF
+          # If we reach the end of the file put everything up to :EOF
+          # back in the queue
+          char_stack = char_stack[0...char_stack.index(:EOF)]
+          @stream.unget(char_stack)
+          @token_queue << {:type => :ParseError, :data => "expected-numeric-entity-but-got-eof"}
+        else
+          if char_stack[1].downcase == "x" and HEX_DIGITS.include? char_stack[2]
+            # Hexadecimal entity detected.
+            @stream.unget(char_stack[2])
+            char = consume_number_entity(true)
+          elsif DIGITS.include? char_stack[1]
+            # Decimal entity detected.
+            @stream.unget(char_stack[1..-1])
+            char = consume_number_entity(false)
+          else
+            # No number entity detected.
+            @stream.unget(char_stack)
+            @token_queue << {:type => :ParseError, :data => "expected-numeric-entity"}
+          end
+        end
+      else
+        # At this point in the process might have named entity. Entities
+        # are stored in the global variable "entities".
+        #
+        # Consume characters and compare to these to a substring of the
+        # entity names in the list until the substring no longer matches.
+        filteredEntityList = ENTITIES.keys
+        filteredEntityList.reject! {|e| e[0].chr != char_stack[0]}
+        entityName = nil
+
+        # Try to find the longest entity the string will match to take care
+        # of &noti for instance.
+        while char_stack.last != :EOF
+          name = char_stack.join('')
+          if filteredEntityList.any? {|e| e[0...name.length] == name}
+            filteredEntityList.reject! {|e| e[0...name.length] != name}
+            char_stack.push(@stream.char)
+          else
+            break
+          end
+
+          if ENTITIES.include? name
+            entityName = name
+            break if entityName[-1] == ';'
+          end
+        end
+
+        if entityName != nil
+          char = ENTITIES[entityName]
+
+          # Check whether or not the last character returned can be
+          # discarded or needs to be put back.
+          if entityName[-1] != ?;
+            @token_queue << {:type => :ParseError, :data => "named-entity-without-semicolon"}
+          end
+
+          if entityName[-1] != ";" and from_attribute and
+             (ASCII_LETTERS.include?(char_stack[entityName.length]) or
+              DIGITS.include?(char_stack[entityName.length]))
+            @stream.unget(char_stack)
+            char = '&'
+          else
+            @stream.unget(char_stack[entityName.length..-1])
+          end
+        else
+          @token_queue << {:type => :ParseError, :data => "expected-named-entity"}
+          @stream.unget(char_stack)
+        end
+      end
+      return char
+    end
+
+    # This method replaces the need for "entityInAttributeValueState".
+    def process_entity_in_attribute
+      entity = consume_entity()
+      if entity
+        @current_token[:data][-1][1] += entity
+      else
+        @current_token[:data][-1][1] += "&"
+      end
+    end
+
+    # This method is a generic handler for emitting the tags. It also sets
+    # the state to "data" because that's what's needed after a token has been
+    # emitted.
+    def emit_current_token
+      # Add token to the queue to be yielded
+      token = @current_token
+      if [:StartTag, :EndTag, :EmptyTag].include?(token[:type])
+        if @lowercase_element_name
+          token[:name] = token[:name].downcase
+        end
+        @token_queue << token
+        @state = :data_state
+      end
+      
+    end
+
+    # Below are the various tokenizer states worked out.
+
+    # XXX AT Perhaps we should have Hixie run some evaluation on billions of
+    # documents to figure out what the order of the various if and elsif
+    # statements should be.
+    def data_state
+      data = @stream.char
+
+      if @content_model_flag == :CDATA or @content_model_flag == :RCDATA
+        @lastFourChars << data
+        @lastFourChars.shift if @lastFourChars.length > 4
+      end
+
+      if data == "&" and [:PCDATA,:RCDATA].include?(@content_model_flag) and !@escapeFlag
+          @state = :entity_data_state
+      elsif data == "-" && [:CDATA, :RCDATA].include?(@content_model_flag) && !@escapeFlag && @lastFourChars.join('') == "<!--"
+          @escapeFlag = true
+          @token_queue << {:type => :Characters, :data => data}
+      elsif data == "<" and !@escapeFlag and
+        [:PCDATA,:CDATA,:RCDATA].include?(@content_model_flag)
+          @state = :tag_open_state
+      elsif data == ">" and @escapeFlag and 
+        [:CDATA,:RCDATA].include?(@content_model_flag) and
+        @lastFourChars[1..-1].join('') == "-->"
+          @escapeFlag = false
+          @token_queue << {:type => :Characters, :data => data}
+
+      elsif data == :EOF
+        # Tokenization ends.
+        return false
+
+      elsif SPACE_CHARACTERS.include? data
+        # Directly after emitting a token you switch back to the "data
+        # state". At that point SPACE_CHARACTERS are important so they are
+        # emitted separately.
+        # XXX need to check if we don't need a special "spaces" flag on
+        # characters.
+        @token_queue << {:type => :SpaceCharacters, :data => data + @stream.chars_until(SPACE_CHARACTERS, true)}
+      else
+        @token_queue << {:type => :Characters, :data => data + @stream.chars_until(%w[& < > -])}
+      end
+      return true
+    end
+
+    def entity_data_state
+      entity = consume_entity
+      if entity
+        @token_queue << {:type => :Characters, :data => entity}
+      else
+        @token_queue << {:type => :Characters, :data => "&"}
+      end
+      @state = :data_state
+      return true
+    end
+
+    def tag_open_state
+      data = @stream.char
+      if @content_model_flag == :PCDATA
+        if data == "!"
+          @state = :markup_declaration_open_state
+        elsif data == "/"
+          @state = :close_tag_open_state
+        elsif data != :EOF and ASCII_LETTERS.include? data
+          @current_token = {:type => :StartTag, :name => data, :data => []}
+          @state = :tag_name_state
+        elsif data == ">"
+          # XXX In theory it could be something besides a tag name. But
+          # do we really care?
+          @token_queue << {:type => :ParseError, :data =>       "expected-tag-name-but-got-right-bracket"}
+          @token_queue << {:type => :Characters, :data => "<>"}
+          @state = :data_state
+        elsif data == "?"
+          # XXX In theory it could be something besides a tag name. But
+          # do we really care?
+          @token_queue.push({:type => :ParseError, :data => "expected-tag-name-but-got-question-mark"})
+          @stream.unget(data)
+          @state = :bogus_comment_state
+        else
+          # XXX
+          @token_queue << {:type => :ParseError, :data => "expected-tag-name"}
+          @token_queue << {:type => :Characters, :data => "<"}
+          @stream.unget(data)
+          @state = :data_state
+        end
+      else
+        # We know the content model flag is set to either RCDATA or CDATA
+        # now because this state can never be entered with the PLAINTEXT
+        # flag.
+        if data == "/"
+          @state = :close_tag_open_state
+        else
+          @token_queue << {:type => :Characters, :data => "<"}
+          @stream.unget(data)
+          @state = :data_state
+        end
+      end
+      return true
+    end
+
+    def close_tag_open_state
+      if (@content_model_flag == :RCDATA or @content_model_flag == :CDATA)
+        if @current_token
+          char_stack = []
+
+          # So far we know that "</" has been consumed. We now need to know
+          # whether the next few characters match the name of last emitted
+          # start tag which also happens to be the current_token. We also need
+          # to have the character directly after the characters that could
+          # match the start tag name.
+          (@current_token[:name].length + 1).times do
+            char_stack.push(@stream.char)
+            # Make sure we don't get hit by :EOF
+            break if char_stack[-1] == :EOF
+          end
+
+          # Since this is just for checking. We put the characters back on
+          # the stack.
+          @stream.unget(char_stack)
+        end
+
+        if @current_token and
+          @current_token[:name].downcase == 
+          char_stack[0...-1].join('').downcase and
+          (SPACE_CHARACTERS + [">", "/", "<", :EOF]).include? char_stack[-1]
+          # Because the characters are correct we can safely switch to
+          # PCDATA mode now. This also means we don't have to do it when
+          # emitting the end tag token.
+          @content_model_flag = :PCDATA
+        else
+          @token_queue << {:type => :Characters, :data => "</"}
+          @state = :data_state
+
+          # Need to return here since we don't want the rest of the
+          # method to be walked through.
+          return true
+        end
+      end
+
+      data = @stream.char
+      if data == :EOF
+        @token_queue << {:type => :ParseError, :data => "expected-closing-tag-but-got-eof"}
+        @token_queue << {:type => :Characters, :data => "</"}
+        @state = :data_state
+      elsif ASCII_LETTERS.include? data
+        @current_token = {:type => :EndTag, :name => data, :data => []}
+        @state = :tag_name_state
+      elsif data == ">"
+        @token_queue << {:type => :ParseError, :data => "expected-closing-tag-but-got-right-bracket"}
+        @state = :data_state
+      else
+        # XXX data can be _'_...
+        @token_queue << {:type => :ParseError, :data => "expected-closing-tag-but-got-char", :datavars => {:data => data}}
+        @stream.unget(data)
+        @state = :bogus_comment_state
+      end
+
+      return true
+    end
+
+    def tag_name_state
+      data = @stream.char
+      if SPACE_CHARACTERS.include? data
+        @state = :before_attribute_name_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => "eof-in-tag-name"}
+        emit_current_token
+      elsif ASCII_LETTERS.include? data
+        @current_token[:name] += data + @stream.chars_until(ASCII_LETTERS, true)
+      elsif data == ">"
+        emit_current_token
+      elsif data == "/"
+        process_solidus_in_tag
+        @state = :before_attribute_name_state
+      else
+        @current_token[:name] += data
+      end
+      return true
+    end
+
+    def before_attribute_name_state
+      data = @stream.char
+      if SPACE_CHARACTERS.include? data
+        @stream.chars_until(SPACE_CHARACTERS, true)
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => "expected-attribute-name-but-got-eof"}
+        emit_current_token
+      elsif ASCII_LETTERS.include? data
+        @current_token[:data].push([data, ""])
+        @state = :attribute_name_state
+      elsif data == ">"
+        emit_current_token
+      elsif data == "/"
+        process_solidus_in_tag
+      else
+        @current_token[:data].push([data, ""])
+        @state = :attribute_name_state
+      end
+      return true
+    end
+
+    def attribute_name_state
+      data = @stream.char
+      leavingThisState = true
+      emitToken = false
+      if data == "="
+        @state = :before_attribute_value_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => "eof-in-attribute-name"}
+        @state = :data_state
+        emitToken = true
+      elsif ASCII_LETTERS.include? data
+        @current_token[:data][-1][0] += data + @stream.chars_until(ASCII_LETTERS, true)
+        leavingThisState = false
+      elsif data == ">"
+        # XXX If we emit here the attributes are converted to a dict
+        # without being checked and when the code below runs we error
+        # because data is a dict not a list
+        emitToken = true
+      elsif SPACE_CHARACTERS.include? data
+        @state = :after_attribute_name_state
+      elsif data == "/"
+        process_solidus_in_tag
+        @state = :before_attribute_name_state
+      else
+        @current_token[:data][-1][0] += data
+        leavingThisState = false
+      end
+
+      if leavingThisState
+        # Attributes are not dropped at this stage. That happens when the
+        # start tag token is emitted so values can still be safely appended
+        # to attributes, but we do want to report the parse error in time.
+        if @lowercase_attr_name
+            @current_token[:data][-1][0] = @current_token[:data].last.first.downcase
+        end
+        @current_token[:data][0...-1].each {|name,value|
+          if @current_token[:data].last.first == name
+            @token_queue << {:type => :ParseError, :data => "duplicate-attribute"}
+            break # don't report an error more than once
+          end
+        }
+        # XXX Fix for above XXX
+        emit_current_token if emitToken
+      end
+      return true
+    end
+
+    def after_attribute_name_state
+      data = @stream.char
+      if SPACE_CHARACTERS.include? data
+        @stream.chars_until(SPACE_CHARACTERS, true)
+      elsif data == "="
+        @state = :before_attribute_value_state
+      elsif data == ">"
+        emit_current_token
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => "expected-end-of-tag-but-got-eof"}
+        emit_current_token
+      elsif ASCII_LETTERS.include? data
+        @current_token[:data].push([data, ""])
+        @state = :attribute_name_state
+      elsif data == "/"
+        process_solidus_in_tag
+        @state = :before_attribute_name_state
+      else
+        @current_token[:data].push([data, ""])
+        @state = :attribute_name_state
+      end
+      return true
+    end
+
+    def before_attribute_value_state
+      data = @stream.char
+      if SPACE_CHARACTERS.include? data
+        @stream.chars_until(SPACE_CHARACTERS, true)
+      elsif data == "\""
+        @state = :attribute_value_double_quoted_state
+      elsif data == "&"
+        @state = :attribute_value_unquoted_state
+        @stream.unget(data);
+      elsif data == "'"
+        @state = :attribute_value_single_quoted_state
+      elsif data == ">"
+        emit_current_token
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => "expected-attribute-value-but-got-eof"}
+        emit_current_token
+      else
+        @current_token[:data][-1][1] += data
+        @state = :attribute_value_unquoted_state
+      end
+      return true
+    end
+
+    def attribute_value_double_quoted_state
+      data = @stream.char
+      if data == "\""
+        @state = :before_attribute_name_state
+      elsif data == "&"
+        process_entity_in_attribute
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => "eof-in-attribute-value-double-quote"}
+        emit_current_token
+      else
+        @current_token[:data][-1][1] += data + @stream.chars_until(["\"", "&"])
+      end
+      return true
+    end
+
+    def attribute_value_single_quoted_state
+      data = @stream.char
+      if data == "'"
+        @state = :before_attribute_name_state
+      elsif data == "&"
+        process_entity_in_attribute
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => "eof-in-attribute-value-single-quote"}
+        emit_current_token
+      else
+        @current_token[:data][-1][1] += data +\
+          @stream.chars_until(["'", "&"])
+      end
+      return true
+    end
+
+    def attribute_value_unquoted_state
+      data = @stream.char
+      if SPACE_CHARACTERS.include? data
+        @state = :before_attribute_name_state
+      elsif data == "&"
+        process_entity_in_attribute
+      elsif data == ">"
+        emit_current_token
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => "eof-in-attribute-value-no-quotes"}
+        emit_current_token
+      else
+        @current_token[:data][-1][1] += data +  @stream.chars_until(["&", ">","<"] + SPACE_CHARACTERS)
+      end
+      return true
+    end
+
+    def bogus_comment_state
+      # Make a new comment token and give it as value all the characters
+      # until the first > or :EOF (chars_until checks for :EOF automatically)
+      # and emit it.
+      @token_queue << {:type => :Comment, :data => @stream.chars_until((">"))}
+
+      # Eat the character directly after the bogus comment which is either a
+      # ">" or an :EOF.
+      @stream.char
+      @state = :data_state
+      return true
+    end
+
+    def markup_declaration_open_state
+      char_stack = [@stream.char, @stream.char]
+      if char_stack == ["-", "-"]
+        @current_token = {:type => :Comment, :data => ""}
+        @state = :comment_start_state
+      else
+        5.times { char_stack.push(@stream.char) }
+        # Put in explicit :EOF check
+        if !char_stack.include?(:EOF) && char_stack.join("").upcase == "DOCTYPE"
+          @current_token = {:type => :Doctype, :name => "", :publicId => nil, :systemId => nil, :correct => true}
+          @state = :doctype_state
+        else
+          @token_queue << {:type => :ParseError, :data => "expected-dashes-or-doctype"}
+          @stream.unget(char_stack)
+          @state = :bogus_comment_state
+        end
+      end
+      return true
+    end
+
+    def comment_start_state
+        data = @stream.char
+        if data == "-"
+            @state = :comment_start_dash_state
+        elsif data == ">"
+            @token_queue << {:type => :ParseError, :data => "incorrect-comment"}
+            @token_queue << @current_token
+            @state = :data_state
+        elsif data == :EOF
+            @token_queue << {:type => :ParseError, :data => "eof-in-comment"}
+            @token_queue << @current_token
+            @state = :data_state
+        else
+            @current_token[:data] += data + @stream.chars_until("-")
+            @state = :comment_state
+        end
+        return true
+    end
+    
+    def comment_start_dash_state
+        data = @stream.char
+        if data == "-"
+            @state = :comment_end_state
+        elsif data == ">"
+            @token_queue << {:type => :ParseError, :data => "incorrect-comment"}
+            @token_queue << @current_token
+            @state = :data_state
+        elsif data == :EOF
+            @token_queue << {:type => :ParseError, :data => "eof-in-comment"}
+            @token_queue << @current_token
+            @state = :data_state
+        else
+            @current_token[:data] += '-' + data + @stream.chars_until("-")
+            @state = :comment_state
+        end
+        return true
+    end
+
+    def comment_state
+      data = @stream.char
+      if data == "-"
+        @state = :comment_end_dash_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => "eof-in-comment"}
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        @current_token[:data] += data + @stream.chars_until("-")
+      end
+      return true
+    end
+
+    def comment_end_dash_state
+      data = @stream.char
+      if data == "-"
+        @state = :comment_end_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => "eof-in-comment-end-dash"}
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        @current_token[:data] += "-" + data +\
+          @stream.chars_until("-")
+        # Consume the next character which is either a "-" or an :EOF as
+        # well so if there's a "-" directly after the "-" we go nicely to
+        # the "comment end state" without emitting a ParseError there.
+        @stream.char
+      end
+      return true
+    end
+
+    def comment_end_state
+      data = @stream.char
+      if data == ">"
+        @token_queue << @current_token
+        @state = :data_state
+      elsif data == "-"
+        @token_queue << {:type => :ParseError, :data => "unexpected-dash-after-double-dash-in-comment"}
+        @current_token[:data] += data
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => "eof-in-comment-double-dash"}
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        # XXX
+        @token_queue << {:type => :ParseError, :data => "unexpected-char-in-comment"}
+        @current_token[:data] += "--" + data
+        @state = :comment_state
+      end
+      return true
+    end
+
+    def doctype_state
+      data = @stream.char
+      if SPACE_CHARACTERS.include? data
+        @state = :before_doctype_name_state
+      else
+        @token_queue << {:type => :ParseError, :data => "need-space-after-doctype"}
+        @stream.unget(data)
+        @state = :before_doctype_name_state
+      end
+      return true
+    end
+
+    def before_doctype_name_state
+      data = @stream.char
+      if SPACE_CHARACTERS.include? data
+      elsif data == ">"
+        @token_queue << {:type => :ParseError, :data => "expected-doctype-name-but-got-right-bracket"}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => "expected-doctype-name-but-got-eof"}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        @current_token[:name] = data
+        @state = :doctype_name_state
+      end
+      return true
+    end
+
+    def doctype_name_state
+      data = @stream.char
+      if SPACE_CHARACTERS.include? data
+        @state = :after_doctype_name_state
+      elsif data == ">"
+        @token_queue << @current_token
+        @state = :data_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => "eof-in-doctype-name"}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        @current_token[:name] += data
+      end
+
+      return true
+    end
+
+    def after_doctype_name_state
+      data = @stream.char
+      if SPACE_CHARACTERS.include? data
+      elsif data == ">"
+        @token_queue << @current_token
+        @state = :data_state
+      elsif data == :EOF
+        @current_token[:correct] = false
+        @stream.unget(data)
+        @token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        char_stack = [data]  
+        5.times { char_stack << stream.char }
+        token = char_stack.join('').tr(ASCII_UPPERCASE,ASCII_LOWERCASE)
+        if token == "public" and !char_stack.include?(:EOF)
+          @state = :before_doctype_public_identifier_state
+        elsif token == "system" and !char_stack.include?(:EOF)
+          @state = :before_doctype_system_identifier_state
+        else
+          @stream.unget(char_stack)
+          @token_queue << {:type => :ParseError, :data => "expected-space-or-right-bracket-in-doctype", "datavars" => {"data" => data}}
+          @state = :bogus_doctype_state
+        end
+      end
+      return true
+    end
+    
+    def before_doctype_public_identifier_state
+      data = @stream.char
+
+      if SPACE_CHARACTERS.include?(data)
+      elsif data == "\""
+        @current_token[:publicId] = ""
+        @state = :doctype_public_identifier_double_quoted_state
+      elsif data == "'"
+        @current_token[:publicId] = ""
+        @state = :doctype_public_identifier_single_quoted_state
+      elsif data == ">"
+        @token_queue << {:type => :ParseError, :data => "unexpected-end-of-doctype"}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        @token_queue << {:type => :ParseError, :data => "unexpected-char-in-doctype"}
+        @state = :bogus_doctype_state
+      end
+
+      return true
+    end
+ 
+    def doctype_public_identifier_double_quoted_state
+      data = @stream.char
+      if data == "\""
+        @state = :after_doctype_public_identifier_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        @current_token[:publicId] += data
+      end
+      return true
+    end
+
+    def doctype_public_identifier_single_quoted_state
+      data = @stream.char
+      if data == "'"
+        @state = :after_doctype_public_identifier_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        @current_token[:publicId] += data
+      end
+      return true
+    end
+
+    def after_doctype_public_identifier_state
+      data = @stream.char
+      if SPACE_CHARACTERS.include?(data)
+      elsif data == "\""
+        @current_token[:systemId] = ""
+        @state = :doctype_system_identifier_double_quoted_state
+      elsif data == "'"
+        @current_token[:systemId] = ""
+        @state = :doctype_system_identifier_single_quoted_state
+      elsif data == ">"
+        @token_queue << @current_token
+        @state = :data_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        @token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
+        @state = :bogus_doctype_state
+      end
+      return true
+    end
+    
+    def before_doctype_system_identifier_state
+      data = @stream.char
+      if SPACE_CHARACTERS.include?(data)
+      elsif data == "\""
+        @current_token[:systemId] = ""
+        @state = :doctype_system_identifier_double_quoted_state
+      elsif data == "'"
+        @current_token[:systemId] = ""
+        @state = :doctype_system_identifier_single_quoted_state
+      elsif data == ">"
+        @token_queue << {:type => :ParseError, :data => "unexpected-char-in-doctype"}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        @token_queue << {:type => :ParseError, :data => "unexpected-char-in-doctype"}
+        @state = :bogus_doctype_state
+      end
+      return true
+    end
+
+    def doctype_system_identifier_double_quoted_state
+      data = @stream.char
+      if data == "\""
+        @state = :after_doctype_system_identifier_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        @current_token[:systemId] += data
+      end
+      return true
+    end
+
+    def doctype_system_identifier_single_quoted_state
+      data = @stream.char
+      if data == "'"
+        @state = :after_doctype_system_identifier_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        @current_token[:systemId] += data
+      end
+      return true
+    end
+
+    def after_doctype_system_identifier_state
+      data = @stream.char
+      if SPACE_CHARACTERS.include?(data)
+      elsif data == ">"
+        @token_queue << @current_token
+        @state = :data_state
+      elsif data == :EOF
+        @token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      else
+        @token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
+        @state = :bogus_doctype_state
+      end
+      return true
+    end
+
+    def bogus_doctype_state
+      data = @stream.char
+      @current_token[:correct] = false
+      if data == ">"
+        @token_queue << @current_token
+        @state = :data_state
+      elsif data == :EOF
+        # XXX EMIT
+        @stream.unget(data)
+        @token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
+        @current_token[:correct] = false
+        @token_queue << @current_token
+        @state = :data_state
+      end
+      return true
+    end
+
+  end
+
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/treebuilders.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/treebuilders.rb
@ -0,0 +1,24 @@
+module HTML5
+  module TreeBuilders
+
+    class << self
+      def [](name)
+        case name.to_s.downcase
+        when 'simpletree' then
+          require 'html5/treebuilders/simpletree'
+          SimpleTree::TreeBuilder
+        when 'rexml' then
+          require 'html5/treebuilders/rexml'
+          REXML::TreeBuilder
+        when 'hpricot' then
+          require 'html5/treebuilders/hpricot'
+          Hpricot::TreeBuilder
+        else
+          raise "Unknown TreeBuilder #{name}"
+        end
+      end
+
+      alias :get_tree_builder :[]
+    end
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/treebuilders/base.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/treebuilders/base.rb
@ -0,0 +1,334 @@
+require 'html5/constants'
+
+#XXX - TODO; make the default interface more ElementTree-like rather than DOM-like
+
+module HTML5
+
+  # The scope markers are inserted when entering buttons, object elements,
+  # marquees, table cells, and table captions, and are used to prevent formatting
+  # from "leaking" into tables, buttons, object elements, and marquees.
+  Marker = nil
+
+  module TreeBuilders
+    module Base
+
+      class Node
+        # The parent of the current node (or nil for the document node)
+        attr_accessor :parent
+
+        # a list of child nodes of the current node. This must 
+        # include all elements but not necessarily other node types
+        attr_accessor :childNodes
+
+        # A list of miscellaneous flags that can be set on the node
+        attr_accessor :_flags
+
+        def initialize(name)
+          @parent     = nil
+          @childNodes = []
+          @_flags     = []
+        end
+
+        # Insert node as a child of the current node
+        def appendChild(node)
+          raise NotImplementedError
+        end
+
+        # Insert data as text in the current node, positioned before the 
+        # start of node insertBefore or to the end of the node's text.
+        def insertText(data, insertBefore=nil)
+          raise NotImplementedError
+        end
+
+        # Insert node as a child of the current node, before refNode in the 
+        # list of child nodes. Raises ValueError if refNode is not a child of 
+        # the current node
+        def insertBefore(node, refNode)
+          raise NotImplementedError
+        end
+
+        # Remove node from the children of the current node
+        def removeChild(node)
+          raise NotImplementedError
+        end
+
+        # Move all the children of the current node to newParent. 
+        # This is needed so that trees that don't store text as nodes move the 
+        # text in the correct way
+        def reparentChildren(newParent)
+          #XXX - should this method be made more general?
+          @childNodes.each { |child| newParent.appendChild(child) }
+          @childNodes = []
+        end
+
+        # Return a shallow copy of the current node i.e. a node with the same
+        # name and attributes but with no parent or child nodes
+        def cloneNode
+          raise NotImplementedError
+        end
+
+        # Return true if the node has children or text, false otherwise
+        def hasContent
+          raise NotImplementedError
+        end
+      end
+
+      # Base treebuilder implementation
+      class TreeBuilder
+
+        attr_accessor :open_elements
+
+        attr_accessor :activeFormattingElements
+
+        attr_accessor :document
+
+        attr_accessor :head_pointer
+
+        attr_accessor :formPointer
+
+        # Class to use for document root
+        documentClass = nil
+
+        # Class to use for HTML elements
+        elementClass = nil
+
+        # Class to use for comments
+        commentClass = nil
+
+        # Class to use for doctypes
+        doctypeClass = nil
+
+        # Fragment class
+        fragmentClass = nil
+
+        def initialize
+          reset
+        end
+
+        def reset
+          @open_elements = []
+          @activeFormattingElements = []
+
+          #XXX - rename these to headElement, formElement
+          @head_pointer = nil
+          @formPointer = nil
+
+          self.insert_from_table = false
+
+          @document = @documentClass.new
+        end
+
+        def elementInScope(target, tableVariant=false)
+          # Exit early when possible.
+          return true if @open_elements[-1].name == target
+
+          # AT How about while true and simply set node to [-1] and set it to
+          # [-2] at the end...
+          @open_elements.reverse.each do |element|
+            if element.name == target
+              return true
+            elsif element.name == 'table'
+              return false
+            elsif not tableVariant and SCOPING_ELEMENTS.include?(element.name)
+              return false
+            elsif element.name == 'html'
+              return false
+            end
+          end
+          assert false # We should never reach this point
+        end
+
+        def reconstructActiveFormattingElements
+          # Within this algorithm the order of steps described in the
+          # specification is not quite the same as the order of steps in the
+          # code. It should still do the same though.
+
+          # Step 1: stop the algorithm when there's nothing to do.
+          return if @activeFormattingElements.empty?
+
+          # Step 2 and step 3: we start with the last element. So i is -1.
+          i = -1
+          entry = @activeFormattingElements[i]
+          return if entry == Marker or @open_elements.include?(entry)
+
+          # Step 6
+          until entry == Marker or @open_elements.include?(entry)
+            # Step 5: let entry be one earlier in the list.
+            i -= 1
+            begin
+              entry = @activeFormattingElements[i]
+            rescue
+              # Step 4: at this point we need to jump to step 8. By not doing
+              # i += 1 which is also done in step 7 we achieve that.
+              break
+            end
+          end
+          while true
+            # Step 7
+            i += 1
+
+            # Step 8
+            clone = @activeFormattingElements[i].cloneNode
+
+            # Step 9
+            element = insert_element(clone.name, clone.attributes)
+
+            # Step 10
+            @activeFormattingElements[i] = element
+
+            # Step 11
+            break if element == @activeFormattingElements[-1]
+          end
+        end
+
+        def clearActiveFormattingElements
+          {} until @activeFormattingElements.empty? || @activeFormattingElements.pop == Marker
+        end
+
+        # Check if an element exists between the end of the active
+        # formatting elements and the last marker. If it does, return it, else
+        # return false
+        def elementInActiveFormattingElements(name)
+          @activeFormattingElements.reverse.each do |element|
+            # Check for Marker first because if it's a Marker it doesn't have a
+            # name attribute.
+            break if element == Marker
+            return element if element.name == name
+          end
+          return false
+        end
+
+        def insertDoctype(name, public_id, system_id)
+          doctype = @doctypeClass.new(name)
+          doctype.public_id = public_id
+          doctype.system_id = system_id
+          @document.appendChild(doctype)
+        end
+
+        def insert_comment(data, parent=nil)
+          parent = @open_elements[-1] if parent.nil?
+          parent.appendChild(@commentClass.new(data))
+        end
+               
+        # Create an element but don't insert it anywhere
+        def createElement(name, attributes)
+          element = @elementClass.new(name)
+          element.attributes = attributes
+          return element
+        end
+
+        # Switch the function used to insert an element from the
+        # normal one to the misnested table one and back again
+        def insert_from_table=(value)
+          @insert_from_table = value
+          @insert_element = value ? :insert_elementTable : :insert_elementNormal
+        end
+
+        def insert_element(name, attributes)
+          send(@insert_element, name, attributes)
+        end
+
+        def insert_elementNormal(name, attributes)
+          element = @elementClass.new(name)
+          element.attributes = attributes
+          @open_elements.last.appendChild(element)
+          @open_elements.push(element)
+          return element
+        end
+
+        # Create an element and insert it into the tree
+        def insert_elementTable(name, attributes)
+          element = @elementClass.new(name)
+          element.attributes = attributes
+          if TABLE_INSERT_MODE_ELEMENTS.include?(@open_elements.last.name)
+            #We should be in the InTable mode. This means we want to do
+            #special magic element rearranging
+            parent, insertBefore = getTableMisnestedNodePosition
+            if insertBefore.nil?
+              parent.appendChild(element)
+            else
+              parent.insertBefore(element, insertBefore)
+            end
+            @open_elements.push(element)
+          else
+            return insert_elementNormal(name, attributes)
+          end
+          return element
+        end
+
+        def insertText(data, parent=nil)
+          parent = @open_elements[-1] if parent.nil?
+
+          if (not(@insert_from_table) or (@insert_from_table and not TABLE_INSERT_MODE_ELEMENTS.include?(@open_elements[-1].name)))
+            parent.insertText(data)
+          else
+            #We should be in the InTable mode. This means we want to do
+            #special magic element rearranging
+            parent, insertBefore = getTableMisnestedNodePosition
+            parent.insertText(data, insertBefore)
+          end
+        end
+
+        # Get the foster parent element, and sibling to insert before
+        # (or nil) when inserting a misnested table node
+        def getTableMisnestedNodePosition
+          #The foster parent element is the one which comes before the most
+          #recently opened table element
+          #XXX - this is really inelegant
+          lastTable = nil
+          fosterParent = nil
+          insertBefore = nil
+          @open_elements.reverse.each do |element|
+            if element.name == "table"
+              lastTable = element
+              break
+            end
+          end
+          if lastTable
+            #XXX - we should really check that this parent is actually a
+            #node here
+            if lastTable.parent
+              fosterParent = lastTable.parent
+              insertBefore = lastTable
+            else
+              fosterParent = @open_elements[@open_elements.index(lastTable) - 1]
+            end
+          else
+            fosterParent = @open_elements[0]
+          end
+          return fosterParent, insertBefore
+        end
+
+        def generateImpliedEndTags(exclude=nil)
+          name = @open_elements[-1].name
+
+          # XXX td, th and tr are not actually needed
+          if (%w[dd dt li p td th tr].include?(name) and name != exclude)
+            @open_elements.pop
+            # XXX This is not entirely what the specification says. We should
+            # investigate it more closely.
+            generateImpliedEndTags(exclude)
+          end
+        end
+
+        def get_document
+          @document
+        end
+  
+        def get_fragment
+          #assert @inner_html
+          fragment = @fragmentClass.new
+          @open_elements[0].reparentChildren(fragment)
+          return fragment
+        end
+
+        # Serialize the subtree of node in the format required by unit tests
+        # node - the node from which to start serializing
+        def testSerializer(node)
+          raise NotImplementedError
+        end
+
+      end
+    end
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/treebuilders/hpricot.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/treebuilders/hpricot.rb
@ -0,0 +1,231 @@
+require 'html5/treebuilders/base'
+require 'rubygems'
+require 'hpricot'
+require 'forwardable'
+
+module HTML5
+  module TreeBuilders
+    module Hpricot
+
+      class Node < Base::Node
+        extend Forwardable
+
+        def_delegators :@hpricot, :name
+
+        attr_accessor :hpricot
+
+        def initialize(name)
+          super(name)
+          @hpricot = self.class.hpricot_class.new name
+        end
+
+        def appendChild(node)
+          if node.kind_of?(TextNode) and childNodes.any? and childNodes.last.kind_of?(TextNode)
+            childNodes.last.hpricot.content = childNodes.last.hpricot.content + node.hpricot.content
+          else
+            childNodes << node
+            hpricot.children << node.hpricot
+          end
+          if (oldparent = node.hpricot.parent) != nil
+            oldparent.children.delete_at(oldparent.children.index(node.hpricot))
+          end
+          node.hpricot.parent = hpricot
+          node.parent = self
+        end
+
+        def removeChild(node)
+           childNodes.delete(node)
+           hpricot.children.delete_at(hpricot.children.index(node.hpricot))
+           node.hpricot.parent = nil
+           node.parent = nil
+        end
+
+        def insertText(data, before=nil)
+          if before
+            insertBefore(TextNode.new(data), before)
+          else
+            appendChild(TextNode.new(data))
+          end
+        end
+
+        def insertBefore(node, refNode)
+          index = childNodes.index(refNode)
+          if node.kind_of?(TextNode) and index > 0 and childNodes[index-1].kind_of?(TextNode)
+            childNodes[index-1].hpricot.content = childNodes[index-1].hpricot.to_s + node.hpricot.to_s
+          else
+            refNode.hpricot.parent.insert_before(node.hpricot,refNode.hpricot)
+            childNodes.insert(index, node)
+          end
+        end
+
+        def hasContent
+          childNodes.any?
+        end
+      end
+
+      class Element < Node
+        def self.hpricot_class
+          ::Hpricot::Elem
+        end
+
+        def initialize(name)
+          super(name)
+
+          @hpricot = ::Hpricot::Elem.new(::Hpricot::STag.new(name))
+        end
+
+        def name
+          @hpricot.stag.name
+        end
+
+        def cloneNode
+          attributes.inject(self.class.new(name)) do |node, (name, value)|
+            node.hpricot[name] = value
+            node
+          end
+        end
+
+        # A call to Hpricot::Elem#raw_attributes is built dynamically,
+        # so alterations to the returned value (a hash) will be lost.
+        #
+        # AttributeProxy works around this by forwarding :[]= calls
+        # to the raw_attributes accessor on the element start tag.
+        #
+        class AttributeProxy
+          def initialize(hpricot)
+            @hpricot = hpricot
+          end
+
+          def []=(k, v)
+            @hpricot.stag.send(stag_attributes_method)[k] = v
+          end
+
+          def stag_attributes_method
+            # STag#attributes changed to STag#raw_attributes after Hpricot 0.5
+            @hpricot.stag.respond_to?(:raw_attributes) ? :raw_attributes : :attributes
+          end
+
+          def method_missing(*a, &b)
+            @hpricot.attributes.send(*a, &b)
+          end
+        end
+
+        def attributes
+          AttributeProxy.new(@hpricot)
+        end
+
+        def attributes=(attrs)
+          attrs.each { |name, value| @hpricot[name] = value }
+        end
+
+        def printTree(indent=0)
+          tree = "\n|#{' ' * indent}<#{name}>"
+          indent += 2
+          attributes.each do |name, value|
+            next if name == 'xmlns'
+            tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
+          end
+          childNodes.inject(tree) { |tree, child| tree + child.printTree(indent) }
+        end
+      end
+
+      class Document < Node
+        def self.hpricot_class
+          ::Hpricot::Doc
+        end
+
+        def initialize
+          super(nil)
+        end
+
+        def printTree(indent=0)
+          childNodes.inject('#document') { |tree, child| tree + child.printTree(indent + 2) }
+        end
+      end
+
+      class DocumentType < Node
+        def_delegators :@hpricot, :public_id, :system_id
+
+        def self.hpricot_class
+          ::Hpricot::DocType
+        end
+
+        def initialize(name, public_id, system_id)
+          begin
+            super(name)
+          rescue ArgumentError # needs 3...
+          end
+
+          @hpricot = ::Hpricot::DocType.new(name, public_id, system_id)
+        end
+
+        def printTree(indent=0)
+          if hpricot.target and hpricot.target.any?
+            "\n|#{' ' * indent}<!DOCTYPE #{hpricot.target}>"
+          else
+            "\n|#{' ' * indent}<!DOCTYPE >"
+          end
+        end
+      end
+
+      class DocumentFragment < Element
+        def initialize
+          super('')
+        end
+
+        def printTree(indent=0)
+          childNodes.inject('') {|tree, child| tree + child.printTree(indent + 2) }
+        end
+      end
+
+      class TextNode < Node
+        def initialize(data)
+          @hpricot = ::Hpricot::Text.new(data)
+        end
+
+        def printTree(indent=0)
+          "\n|#{' ' * indent}\"#{hpricot.content}\""
+        end
+      end
+
+      class CommentNode < Node
+        def self.hpricot_class
+          ::Hpricot::Comment
+        end
+
+        def printTree(indent=0)
+          "\n|#{' ' * indent}<!-- #{hpricot.content} -->"
+        end
+      end
+
+      class TreeBuilder < Base::TreeBuilder
+        def initialize
+          @documentClass = Document
+          @doctypeClass  = DocumentType
+          @elementClass  = Element
+          @commentClass  = CommentNode
+          @fragmentClass = DocumentFragment
+        end
+
+        def insertDoctype(name, public_id, system_id)
+          doctype = @doctypeClass.new(name, public_id, system_id)
+          @document.appendChild(doctype)
+        end
+
+        def testSerializer(node)
+          node.printTree
+        end
+
+        def get_document
+          @document.hpricot
+        end
+
+        def get_fragment
+          @document = super
+          return @document.hpricot.children
+        end
+      end
+
+    end
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/treebuilders/rexml.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/treebuilders/rexml.rb
@ -0,0 +1,209 @@
+require 'html5/treebuilders/base'
+require 'rexml/document'
+require 'forwardable'
+
+module HTML5
+  module TreeBuilders
+    module REXML
+
+      class Node < Base::Node
+        extend Forwardable
+        def_delegators :@rxobj, :name, :attributes
+        attr_accessor :rxobj
+
+        def initialize name
+          super name
+          @rxobj = self.class.rxclass.new name
+        end
+
+        def appendChild node
+          if node.kind_of?(TextNode) && childNodes.length > 0 && childNodes.last.kind_of?(TextNode)
+            childNodes.last.rxobj.value = childNodes.last.rxobj.to_s + node.rxobj.to_s
+            childNodes.last.rxobj.raw = true
+          else
+            childNodes.push node
+            rxobj.add node.rxobj
+          end
+          node.parent = self
+        end
+
+        def removeChild node
+           childNodes.delete node
+           rxobj.delete node.rxobj
+           node.parent = nil
+        end
+
+        def insertText data, before=nil
+          if before
+            insertBefore TextNode.new(data), before
+          else
+            appendChild TextNode.new(data)
+          end
+        end
+
+        def insertBefore node, refNode
+          index = childNodes.index(refNode)
+          if node.kind_of?(TextNode) and index > 0 && childNodes[index-1].kind_of?(TextNode)
+            childNodes[index-1].rxobj.value = childNodes[index-1].rxobj.to_s + node.rxobj.to_s
+            childNodes[index-1].rxobj.raw = true
+          else
+            childNodes.insert index, node
+            refNode.rxobj.parent.insert_before(refNode.rxobj,node.rxobj)
+          end
+        end
+
+        def hasContent
+          (childNodes.length > 0)
+        end
+      end
+
+      class Element < Node
+        def self.rxclass
+          ::REXML::Element
+        end
+
+        def initialize name
+          super name
+        end
+
+        def cloneNode
+          newNode = self.class.new name
+          attributes.each {|name,value| newNode.attributes[name] = value}
+          newNode
+        end
+
+        def attributes= value
+          value.each {|name, value| rxobj.attributes[name] = value}
+        end
+
+        def printTree indent=0
+          tree = "\n|#{' ' * indent}<#{name}>"
+          indent += 2
+          for name, value in attributes
+            next if name == 'xmlns'
+            tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
+          end
+          for child in childNodes
+            tree += child.printTree(indent)
+          end
+          tree
+        end
+      end
+
+      class Document < Node
+        def self.rxclass
+          ::REXML::Document
+        end
+
+        def initialize
+          super nil
+        end
+
+        # ryansking: not sure why this was here. removing it doesn't cause any tests to fail
+        # def appendChild node
+        #    if node.kind_of? Element and node.name == 'html'
+        #      node.rxobj.add_namespace('http://www.w3.org/1999/xhtml')
+        #    end
+        #    super node
+        # end
+
+        def printTree indent=0
+          tree = "#document"
+          for child in childNodes
+            tree += child.printTree(indent + 2)
+          end
+          return tree
+        end
+      end
+
+      class DocumentType < Node
+        def_delegator :@rxobj, :public, :public_id
+
+        def_delegator :@rxobj, :system, :system_id
+
+        def self.rxclass
+          ::REXML::DocType
+        end
+
+        def initialize name, public_id, system_id
+            super(name)
+            if public_id
+              @rxobj = ::REXML::DocType.new [name, ::REXML::DocType::PUBLIC, public_id, system_id]
+            elsif system_id
+              @rxobj = ::REXML::DocType.new [name, ::REXML::DocType::SYSTEM, nil, system_id]
+            else
+              @rxobj = ::REXML::DocType.new name
+            end
+        end
+
+        def printTree indent=0
+          "\n|#{' ' * indent}<!DOCTYPE #{name}>"
+        end
+      end
+
+      class DocumentFragment < Element
+        def initialize
+          super nil
+        end
+
+        def printTree indent=0
+          tree = ""
+          for child in childNodes
+            tree += child.printTree(indent+2)
+          end
+          return tree
+        end
+      end
+
+      class TextNode < Node
+        def initialize data
+          raw = data.gsub('&', '&amp;').gsub('<', '&lt;').gsub('>', '&gt;')
+          @rxobj = ::REXML::Text.new(raw, true, nil, true)
+        end
+
+        def printTree indent=0
+          "\n|#{' ' * indent}\"#{rxobj.value}\""
+        end
+      end
+
+      class CommentNode < Node
+        def self.rxclass
+          ::REXML::Comment
+        end
+
+        def printTree indent=0
+          "\n|#{' ' * indent}<!-- #{rxobj.string} -->"
+        end
+      end
+
+      class TreeBuilder < Base::TreeBuilder
+        def initialize
+          @documentClass = Document
+          @doctypeClass  = DocumentType
+          @elementClass  = Element
+          @commentClass  = CommentNode
+          @fragmentClass = DocumentFragment
+        end
+
+        def insertDoctype(name, public_id, system_id)
+          doctype = @doctypeClass.new(name, public_id, system_id)
+          @document.appendChild(doctype)
+        end
+
+        def testSerializer node
+          node.printTree
+        end
+
+        def get_document
+          @document.rxobj
+        end
+
+        def get_fragment
+          @document = super
+          return @document.rxobj.children
+        end
+      end
+
+    end
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/treebuilders/simpletree.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/treebuilders/simpletree.rb
@ -0,0 +1,185 @@
+require 'html5/treebuilders/base'
+
+module HTML5
+  module TreeBuilders
+    module SimpleTree
+
+      class Node < Base::Node
+        # Node representing an item in the tree.
+        # name - The tag name associated with the node
+        attr_accessor :name
+
+        # The value of the current node (applies to text nodes and 
+        # comments
+        attr_accessor :value
+
+        # a dict holding name, value pairs for attributes of the node
+        attr_accessor :attributes
+
+        def initialize name
+          super
+          @name       = name
+          @value      = nil
+          @attributes = {}
+        end
+
+        def appendChild node
+          if node.kind_of? TextNode and 
+            childNodes.length > 0 and childNodes.last.kind_of? TextNode
+            childNodes.last.value += node.value
+          else
+            childNodes << node
+          end
+          node.parent = self
+        end
+
+        def removeChild node
+           childNodes.delete node
+           node.parent = nil
+        end
+
+        def cloneNode
+          newNode = self.class.new name
+          attributes.each {|name,value| newNode.attributes[name] = value}
+          newNode.value = value
+          newNode
+        end
+
+        def insertText data, before=nil
+          if before
+            insertBefore TextNode.new(data), before
+          else
+            appendChild TextNode.new(data)
+          end
+        end
+
+        def insertBefore node, refNode
+          index = childNodes.index(refNode)
+          if node.kind_of?(TextNode) && index > 0 && childNodes[index-1].kind_of?(TextNode)
+            childNodes[index-1].value += node.value
+          else
+            childNodes.insert index, node
+          end
+        end
+
+        def printTree indent=0
+          tree = "\n|%s%s" % [' '* indent, self.to_s]
+          for child in childNodes
+            tree += child.printTree(indent + 2)
+          end
+          return tree
+        end
+
+        def hasContent
+          childNodes.length > 0
+        end
+      end
+
+      class Element < Node
+        def to_s
+           "<#{name}>"
+        end
+
+        def printTree indent=0
+          tree = "\n|%s%s" % [' '* indent, self.to_s]
+          indent += 2
+          for name, value in attributes
+            tree += "\n|%s%s=\"%s\"" % [' ' * indent, name, value]
+          end
+          for child in childNodes
+            tree += child.printTree(indent)
+          end
+          tree
+        end
+      end
+
+      class Document < Node
+        def to_s
+           "#document"
+        end
+
+        def initialize
+          super nil
+        end
+
+        def printTree indent=0
+          tree = to_s
+          for child in childNodes
+            tree += child.printTree(indent + 2)
+          end
+          tree
+        end
+      end
+
+      class DocumentType < Node
+        attr_accessor :public_id, :system_id
+
+        def to_s
+          "<!DOCTYPE #{name}>"
+        end
+
+        def initialize name
+          super name
+          @public_id = nil
+          @system_id = nil
+        end
+      end
+
+      class DocumentFragment < Element
+        def initialize
+          super nil
+        end
+
+        def printTree indent=0
+          tree = ""
+          for child in childNodes
+            tree += child.printTree(indent+2)
+          end
+          return tree
+        end
+      end
+
+      class TextNode < Node
+        def initialize value
+          super nil
+          @value = value
+        end
+
+        def to_s
+           '"%s"' % value
+        end
+      end
+
+      class CommentNode < Node
+        def initialize value
+          super nil
+          @value = value
+        end
+
+        def to_s
+          "<!-- %s -->" % value
+        end
+      end
+
+      class TreeBuilder < Base::TreeBuilder
+        def initialize
+          @documentClass = Document
+          @doctypeClass  = DocumentType
+          @elementClass  = Element
+          @commentClass  = CommentNode
+          @fragmentClass = DocumentFragment
+        end
+
+        def testSerializer node
+          node.printTree
+        end
+
+        def get_fragment
+          @document = super
+          @document
+        end
+      end
+
+    end
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/treewalkers.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/treewalkers.rb
@ -0,0 +1,26 @@
+require 'html5/treewalkers/base'
+
+module HTML5
+  module TreeWalkers
+
+    class << self
+      def [](name)
+        case name.to_s.downcase
+        when 'simpletree'
+          require 'html5/treewalkers/simpletree'
+          SimpleTree::TreeWalker
+        when 'rexml'
+          require 'html5/treewalkers/rexml'
+          REXML::TreeWalker
+        when 'hpricot'
+          require 'html5/treewalkers/hpricot'
+          Hpricot::TreeWalker
+        else
+          raise "Unknown TreeWalker #{name}"
+        end
+      end
+
+      alias :get_tree_walker :[]
+    end
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/treewalkers/base.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/treewalkers/base.rb
@ -0,0 +1,162 @@
+require 'html5/constants'
+module HTML5
+module TreeWalkers
+
+module TokenConstructor
+  def error(msg)
+    {:type => "SerializeError", :data => msg}
+  end
+
+  def normalize_attrs(attrs)
+    attrs.to_a
+  end
+
+  def empty_tag(name, attrs, has_children=false)
+    error(_("Void element has children")) if has_children
+    {:type => :EmptyTag, :name => name, :data => normalize_attrs(attrs)}
+  end
+
+  def start_tag(name, attrs)
+    {:type => :StartTag, :name => name, :data => normalize_attrs(attrs)}
+  end
+
+  def end_tag(name)
+    {:type => :EndTag, :name => name, :data => []}
+  end
+
+  def text(data)
+    if data =~ /\A([#{SPACE_CHARACTERS.join('')}]+)/m
+      yield({:type => :SpaceCharacters, :data => $1})
+      data = data[$1.length .. -1]
+      return if data.empty?
+    end
+
+    if data =~ /([#{SPACE_CHARACTERS.join('')}]+)\Z/m
+      yield({:type => :Characters, :data => data[0 ... -$1.length]})
+      yield({:type => :SpaceCharacters, :data => $1})
+    else
+      yield({:type => :Characters, :data => data})
+    end
+  end
+
+  def comment(data)
+    {:type => :Comment, :data => data}
+  end
+
+  def doctype(name, public_id, system_id, correct=nil)
+    {:type => :Doctype, :name => name, :public_id => public_id, :system_id => system_id, :correct => correct}
+  end
+
+  def unknown(nodeType)
+    error(_("Unknown node type: ") + nodeType.to_s)
+  end
+
+  def _(str)
+    str
+  end
+end
+
+class Base
+    include TokenConstructor
+
+    def initialize(tree)
+      @tree = tree
+    end
+
+    def each
+      raise NotImplementedError
+    end
+
+    alias walk each
+
+    def to_ary
+      a = []
+      each do |i|
+        a << i
+      end
+      a
+    end
+end
+
+class NonRecursiveTreeWalker < TreeWalkers::Base
+  def node_details(node)
+    raise NotImplementedError
+  end
+
+  def first_child(node)
+    raise NotImplementedError
+  end
+
+  def next_sibling(node)
+    raise NotImplementedError
+  end
+
+  def parent(node)
+    raise NotImplementedError
+  end
+
+  def each
+    current_node = @tree
+    while current_node != nil
+      details = node_details(current_node)
+      has_children = false
+
+      case details.shift
+      when :DOCTYPE
+        yield doctype(*details)
+
+      when :TEXT
+        text(*details) {|token| yield token}
+
+      when :ELEMENT
+        name, attributes, has_children = details
+        if VOID_ELEMENTS.include?(name)
+          yield empty_tag(name, attributes.to_a, has_children)
+          has_children = false
+        else
+          yield start_tag(name, attributes.to_a)
+        end
+
+      when :COMMENT
+        yield comment(details[0])
+
+      when :DOCUMENT, :DOCUMENT_FRAGMENT
+        has_children = true
+
+      when nil
+        # ignore (REXML::XMLDecl is an example)
+
+      else
+        yield unknown(details[0])
+      end
+
+      first_child = has_children ? first_child(current_node) : nil
+      if first_child != nil
+        current_node = first_child
+      else
+        while current_node != nil
+          details = node_details(current_node)
+          if details.shift == :ELEMENT
+            name, attributes, has_children = details
+            yield end_tag(name) if !VOID_ELEMENTS.include?(name)
+          end
+
+          if @tree == current_node
+            current_node = nil
+          else
+            next_sibling = next_sibling(current_node)
+            if next_sibling != nil
+              current_node = next_sibling
+              break
+            end
+
+            current_node = parent(current_node)
+          end
+        end
+      end
+    end
+  end
+end
+
+end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/treewalkers/hpricot.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/treewalkers/hpricot.rb
@ -0,0 +1,48 @@
+require 'html5/treewalkers/base'
+require 'rexml/document'
+
+module HTML5
+  module TreeWalkers
+    module Hpricot
+      class TreeWalker < HTML5::TreeWalkers::NonRecursiveTreeWalker
+
+        def node_details(node)
+          case node
+          when ::Hpricot::Elem
+            if node.name.empty?
+              [:DOCUMENT_FRAGMENT]
+            else
+              [:ELEMENT, node.name,
+                node.attributes.map {|name, value| [name, value]},
+                !node.empty?]
+            end
+          when ::Hpricot::Text
+            [:TEXT, node.content]
+          when ::Hpricot::Comment
+            [:COMMENT, node.content]
+          when ::Hpricot::Doc
+            [:DOCUMENT]
+          when ::Hpricot::DocType
+            [:DOCTYPE, node.target, node.public_id, node.system_id]
+          when ::Hpricot::XMLDecl
+            [nil]
+          else
+            [:UNKNOWN, node.class.inspect]
+          end
+        end
+
+        def first_child(node)
+          node.children.first
+        end
+
+        def next_sibling(node)
+          node.next_node
+        end
+
+        def parent(node)
+          node.parent
+        end
+      end
+    end
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/treewalkers/rexml.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/treewalkers/rexml.rb
@ -0,0 +1,48 @@
+require 'html5/treewalkers/base'
+require 'rexml/document'
+
+module HTML5
+  module TreeWalkers
+    module REXML
+      class TreeWalker < HTML5::TreeWalkers::NonRecursiveTreeWalker
+
+        def node_details(node)
+          case node
+          when ::REXML::Document
+            [:DOCUMENT]
+          when ::REXML::Element
+            if !node.name
+              [:DOCUMENT_FRAGMENT]
+            else
+              [:ELEMENT, node.name,
+                node.attributes.map {|name,value| [name,value]},
+                node.has_elements? || node.has_text?]
+            end
+          when ::REXML::Text
+            [:TEXT, node.value]
+          when ::REXML::Comment
+            [:COMMENT, node.string]
+          when ::REXML::DocType
+            [:DOCTYPE, node.name, node.public, node.system]
+          when ::REXML::XMLDecl
+            [nil]
+          else
+            [:UNKNOWN, node.class.inspect]
+          end
+        end
+
+        def first_child(node)
+          node.children.first
+        end
+
+        def next_sibling(node)
+          node.next_sibling
+        end
+
+        def parent(node)
+          node.parent
+        end
+      end
+    end
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/treewalkers/simpletree.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/treewalkers/simpletree.rb
@ -0,0 +1,48 @@
+require 'html5/treewalkers/base'
+
+module HTML5
+  module TreeWalkers
+    module SimpleTree
+      class TreeWalker < HTML5::TreeWalkers::Base
+        include HTML5::TreeBuilders::SimpleTree
+
+        def walk(node)
+          case node
+          when Document, DocumentFragment
+            return
+
+          when DocumentType
+            yield doctype(node.name, node.public_id, node.system_id)
+
+          when TextNode
+            text(node.value) {|token| yield token}
+
+          when Element
+            if VOID_ELEMENTS.include?(node.name)
+              yield empty_tag(node.name, node.attributes, node.hasContent())
+            else
+              yield start_tag(node.name, node.attributes)
+              for child in node.childNodes
+                walk(child) {|token| yield token}
+              end
+              yield end_tag(node.name)
+            end
+
+          when CommentNode
+            yield comment(node.value)
+
+          else
+            puts '?'
+            yield unknown(node.class)
+          end
+        end
+
+        def each
+          for child in @tree.childNodes
+            walk(child) {|node| yield node}
+          end
+        end
+      end
+    end
+  end
+end
--- a/attic/vendor/plugins/HTML5lib/lib/html5/version.rb
+++ b/attic/vendor/plugins/HTML5lib/lib/html5/version.rb
@ -0,0 +1,3 @@
+module HTML5
+  VERSION = '0.10.1'
+end