HTML5lib is Back.

Synced with latest version of HTML5lib, which fixes problem with Astral plane characters. I should really do some tests, but the HTML5lib Sanitizer seems to be 2-5 times slower than the old sanitizer.
2007-05-30 10:45:52 -05:00 · 2007-05-30 10:45:52 -05:00 · 4dd70af5ae
commit 4dd70af5ae
parent e1a6827f1f
39 changed files with 4843 additions and 5576 deletions
--- a/lib/node.rb
+++ b/lib/node.rb
@ -1,530 +0,0 @@
-require 'strscan'
-
-module XHTML #:nodoc:
-  
-  class Conditions < Hash #:nodoc:
-    def initialize(hash)
-      super()
-      hash = { :content => hash } unless Hash === hash
-      hash = keys_to_symbols(hash)
-      hash.each do |k,v|
-        case k
-          when :tag, :content then
-            # keys are valid, and require no further processing
-          when :attributes then
-            hash[k] = keys_to_strings(v)
-          when :parent, :child, :ancestor, :descendant, :sibling, :before,
-                  :after
-            hash[k] = Conditions.new(v)
-          when :children
-            hash[k] = v = keys_to_symbols(v)
-            v.each do |k,v2|
-              case k
-                when :count, :greater_than, :less_than
-                  # keys are valid, and require no further processing
-                when :only
-                  v[k] = Conditions.new(v2)
-                else
-                  raise "illegal key #{k.inspect} => #{v2.inspect}"
-              end
-            end
-          else
-            raise "illegal key #{k.inspect} => #{v.inspect}"
-        end
-      end
-      update hash
-    end
-
-    private
-
-      def keys_to_strings(hash)
-        hash.keys.inject({}) do |h,k|
-          h[k.to_s] = hash[k]
-          h
-        end
-      end
-
-      def keys_to_symbols(hash)
-        hash.keys.inject({}) do |h,k|
-          raise "illegal key #{k.inspect}" unless k.respond_to?(:to_sym)
-          h[k.to_sym] = hash[k]
-          h
-        end
-      end
-  end
-
-  # The base class of all nodes, textual and otherwise, in an HTML document.
-  class Node #:nodoc:
-    # The array of children of this node. Not all nodes have children.
-    attr_reader :children
-    
-    # The parent node of this node. All nodes have a parent, except for the
-    # root node.
-    attr_reader :parent
-    
-    # The line number of the input where this node was begun
-    attr_reader :line
-    
-    # The byte position in the input where this node was begun
-    attr_reader :position
-    
-    # Create a new node as a child of the given parent.
-    def initialize(parent, line=0, pos=0)
-      @parent = parent
-      @children = []
-      @line, @position = line, pos
-    end
-
-    # Return a textual representation of the node.
-    def to_s
-      s = ""
-      @children.each { |child| s << child.to_s }
-      s
-    end
-
-    # Return false (subclasses must override this to provide specific matching
-    # behavior.) +conditions+ may be of any type.
-    def match(conditions)
-      false
-    end
-
-    # Search the children of this node for the first node for which #find
-    # returns non +nil+. Returns the result of the #find call that succeeded.
-    def find(conditions)
-      conditions = validate_conditions(conditions)
-      @children.each do |child|        
-        node = child.find(conditions)
-        return node if node
-      end
-      nil
-    end
-
-    # Search for all nodes that match the given conditions, and return them
-    # as an array.
-    def find_all(conditions)
-      conditions = validate_conditions(conditions)
-
-      matches = []
-      matches << self if match(conditions)
-      @children.each do |child|
-        matches.concat child.find_all(conditions)
-      end
-      matches
-    end
-
-    # Returns +false+. Subclasses may override this if they define a kind of
-    # tag.
-    def tag?
-      false
-    end
-
-    def validate_conditions(conditions)
-      Conditions === conditions ? conditions : Conditions.new(conditions)
-    end
-
-    def ==(node)
-      return false unless self.class == node.class && children.size == node.children.size
-
-      equivalent = true
-
-      children.size.times do |i|
-        equivalent &&= children[i] == node.children[i]
-      end
-
-      equivalent
-    end
-    
-    class <<self
-      def parse(parent, line, pos, content, strict=true)
-        if content !~ /^<\S/
-          Text.new(parent, line, pos, content)
-        else
-          scanner = StringScanner.new(content)
-
-          unless scanner.skip(/</)
-            if strict
-              raise "expected <"
-            else
-              return Text.new(parent, line, pos, content)
-            end
-          end
-
-          if scanner.skip(/!\[CDATA\[/)
-            scanner.scan_until(/\]\]>/)
-            return CDATA.new(parent, line, pos, scanner.pre_match.gsub(/<!\[CDATA\[/, ''))
-          end
-          
-          closing = ( scanner.scan(/\//) ? :close : nil )
-          return Text.new(parent, line, pos, content) unless name = scanner.scan(/[\w:-]+/)
-          name
-  
-          unless closing
-            scanner.skip(/\s*/)
-            attributes = {}
-            while attr = scanner.scan(/[-\w:]+/)
-              value = true
-              if scanner.scan(/\s*=\s*/)
-                if delim = scanner.scan(/['"]/)
-                  value = ""
-                  while text = scanner.scan(/[^#{delim}\\]+|./)
-                    case text
-                      when "\\" then
-                        value << text
-                        value << scanner.getch
-                      when delim
-                        break
-                      else value << text
-                    end
-                  end
-                else
-                  value = scanner.scan(/[^\s>\/]+/)
-                end
-              end
-              attributes[attr] = value
-              scanner.skip(/\s*/)
-            end
-    
-            closing = ( scanner.scan(/\//) ? :self : nil )
-          end
-          
-          unless scanner.scan(/\s*>/)
-            if strict
-              raise "expected > (got #{scanner.rest.inspect} for #{content}, #{attributes.inspect})" 
-            else
-              # throw away all text until we find what we're looking for
-              scanner.skip_until(/>/) or scanner.terminate
-            end
-          end
-
-          Tag.new(parent, line, pos, name, attributes, closing)
-        end
-      end
-    end
-  end
-
-  # A node that represents text, rather than markup.
-  class Text < Node #:nodoc:
-    
-    attr_reader :content
-    
-    # Creates a new text node as a child of the given parent, with the given
-    # content.
-    def initialize(parent, line, pos, content)
-      super(parent, line, pos)
-      @content = content
-    end
-
-    # Returns the content of this node.
-    def to_s
-      @content
-    end
-
-    # Returns +self+ if this node meets the given conditions. Text nodes support
-    # conditions of the following kinds:
-    #
-    # * if +conditions+ is a string, it must be a substring of the node's
-    #   content
-    # * if +conditions+ is a regular expression, it must match the node's
-    #   content
-    # * if +conditions+ is a hash, it must contain a <tt>:content</tt> key that
-    #   is either a string or a regexp, and which is interpreted as described
-    #   above.
-    def find(conditions)
-      match(conditions) && self
-    end
-    
-    # Returns non-+nil+ if this node meets the given conditions, or +nil+
-    # otherwise. See the discussion of #find for the valid conditions.
-    def match(conditions)
-      case conditions
-        when String
-          @content == conditions
-        when Regexp
-          @content =~ conditions
-        when Hash
-          conditions = validate_conditions(conditions)
-
-          # Text nodes only have :content, :parent, :ancestor
-          unless (conditions.keys - [:content, :parent, :ancestor]).empty?
-            return false
-          end
-
-          match(conditions[:content])
-        else
-          nil
-      end
-    end
-
-    def ==(node)
-      return false unless super
-      content == node.content
-    end
-  end
-  
-  # A CDATA node is simply a text node with a specialized way of displaying
-  # itself.
-  class CDATA < Text #:nodoc:
-    def to_s
-      "<![CDATA[#{super}]>"
-    end
-  end
-
-  # A Tag is any node that represents markup. It may be an opening tag, a
-  # closing tag, or a self-closing tag. It has a name, and may have a hash of
-  # attributes.
-  class Tag < Node #:nodoc:
-    
-    # Either +nil+, <tt>:close</tt>, or <tt>:self</tt>
-    attr_reader :closing
-    
-    # Either +nil+, or a hash of attributes for this node.
-    attr_reader :attributes
-
-    # The name of this tag.
-    attr_reader :name
-        
-    # Create a new node as a child of the given parent, using the given content
-    # to describe the node. It will be parsed and the node name, attributes and
-    # closing status extracted.
-    def initialize(parent, line, pos, name, attributes, closing)
-      super(parent, line, pos)
-      @name = name
-      @attributes = attributes
-      @closing = closing
-    end
-
-    # A convenience for obtaining an attribute of the node. Returns +nil+ if
-    # the node has no attributes.
-    def [](attr)
-      @attributes ? @attributes[attr] : nil
-    end
-
-    # Returns non-+nil+ if this tag can contain child nodes.
-    def childless?(xml = false)
-      return false if xml && @closing.nil?
-      !@closing.nil? ||
-        @name =~ /^(img|br|hr|link|meta|area|base|basefont|
-                    col|frame|input|isindex|param)$/ox
-    end
-
-    # Returns a textual representation of the node
-    def to_s
-      if @closing == :close
-        "</#{@name}>"
-      else
-        s = "<#{@name}"
-        @attributes.each do |k,v|
-          s << " #{k}"
-          s << "=\"#{v}\"" if String === v
-        end
-        s << " /" if @closing == :self
-        s << ">"
-        @children.each { |child| s << child.to_s }
-        s << "</#{@name}>" if @closing != :self && !@children.empty?
-        s
-      end
-    end
-
-    # If either the node or any of its children meet the given conditions, the
-    # matching node is returned. Otherwise, +nil+ is returned. (See the
-    # description of the valid conditions in the +match+ method.)
-    def find(conditions)
-      match(conditions) && self || super
-    end
-
-    # Returns +true+, indicating that this node represents an HTML tag.
-    def tag?
-      true
-    end
-    
-    # Returns +true+ if the node meets any of the given conditions. The
-    # +conditions+ parameter must be a hash of any of the following keys
-    # (all are optional):
-    #
-    # * <tt>:tag</tt>: the node name must match the corresponding value
-    # * <tt>:attributes</tt>: a hash. The node's values must match the
-    #   corresponding values in the hash.
-    # * <tt>:parent</tt>: a hash. The node's parent must match the
-    #   corresponding hash.
-    # * <tt>:child</tt>: a hash. At least one of the node's immediate children
-    #   must meet the criteria described by the hash.
-    # * <tt>:ancestor</tt>: a hash. At least one of the node's ancestors must
-    #   meet the criteria described by the hash.
-    # * <tt>:descendant</tt>: a hash. At least one of the node's descendants
-    #   must meet the criteria described by the hash.
-    # * <tt>:sibling</tt>: a hash. At least one of the node's siblings must
-    #   meet the criteria described by the hash.
-    # * <tt>:after</tt>: a hash. The node must be after any sibling meeting
-    #   the criteria described by the hash, and at least one sibling must match.
-    # * <tt>:before</tt>: a hash. The node must be before any sibling meeting
-    #   the criteria described by the hash, and at least one sibling must match.
-    # * <tt>:children</tt>: a hash, for counting children of a node. Accepts the
-    #   keys:
-    # ** <tt>:count</tt>: either a number or a range which must equal (or
-    #    include) the number of children that match.
-    # ** <tt>:less_than</tt>: the number of matching children must be less than
-    #    this number.
-    # ** <tt>:greater_than</tt>: the number of matching children must be
-    #    greater than this number.
-    # ** <tt>:only</tt>: another hash consisting of the keys to use
-    #    to match on the children, and only matching children will be
-    #    counted.
-    #
-    # Conditions are matched using the following algorithm:
-    #
-    # * if the condition is a string, it must be a substring of the value.
-    # * if the condition is a regexp, it must match the value.
-    # * if the condition is a number, the value must match number.to_s.
-    # * if the condition is +true+, the value must not be +nil+.
-    # * if the condition is +false+ or +nil+, the value must be +nil+.
-    #
-    # Usage:
-    #
-    #   # test if the node is a "span" tag
-    #   node.match :tag => "span"
-    #
-    #   # test if the node's parent is a "div"
-    #   node.match :parent => { :tag => "div" }
-    #
-    #   # test if any of the node's ancestors are "table" tags
-    #   node.match :ancestor => { :tag => "table" }
-    #
-    #   # test if any of the node's immediate children are "em" tags
-    #   node.match :child => { :tag => "em" }
-    #
-    #   # test if any of the node's descendants are "strong" tags
-    #   node.match :descendant => { :tag => "strong" }
-    #
-    #   # test if the node has between 2 and 4 span tags as immediate children
-    #   node.match :children => { :count => 2..4, :only => { :tag => "span" } } 
-    #
-    #   # get funky: test to see if the node is a "div", has a "ul" ancestor
-    #   # and an "li" parent (with "class" = "enum"), and whether or not it has
-    #   # a "span" descendant that contains # text matching /hello world/:
-    #   node.match :tag => "div",
-    #              :ancestor => { :tag => "ul" },
-    #              :parent => { :tag => "li",
-    #                           :attributes => { :class => "enum" } },
-    #              :descendant => { :tag => "span",
-    #                               :child => /hello world/ }
-    def match(conditions)
-      conditions = validate_conditions(conditions)
-      # check content of child nodes
-      if conditions[:content]
-        if children.empty?
-          return false unless match_condition("", conditions[:content])
-        else
-          return false unless children.find { |child| child.match(conditions[:content]) }
-        end
-      end
-
-      # test the name
-      return false unless match_condition(@name, conditions[:tag]) if conditions[:tag]
-
-      # test attributes
-      (conditions[:attributes] || {}).each do |key, value|
-        return false unless match_condition(self[key], value)
-      end
-
-      # test parent
-      return false unless parent.match(conditions[:parent]) if conditions[:parent]
-
-      # test children
-      return false unless children.find { |child| child.match(conditions[:child]) } if conditions[:child]
-   
-      # test ancestors
-      if conditions[:ancestor]
-        return false unless catch :found do
-          p = self
-          throw :found, true if p.match(conditions[:ancestor]) while p = p.parent
-        end
-      end
-
-      # test descendants
-      if conditions[:descendant]
-        return false unless children.find do |child|
-          # test the child
-          child.match(conditions[:descendant]) ||
-          # test the child's descendants
-          child.match(:descendant => conditions[:descendant])
-        end
-      end
-      
-      # count children
-      if opts = conditions[:children]
-        matches = children.select do |c|
-          (c.kind_of?(HTML::Tag) and (c.closing == :self or ! c.childless?))
-        end
-        
-        matches = matches.select { |c| c.match(opts[:only]) } if opts[:only]
-        opts.each do |key, value|
-          next if key == :only
-          case key
-            when :count
-              if Integer === value
-                return false if matches.length != value
-              else
-                return false unless value.include?(matches.length)
-              end
-            when :less_than
-              return false unless matches.length < value
-            when :greater_than
-              return false unless matches.length > value
-            else raise "unknown count condition #{key}"
-          end
-        end
-      end
-
-      # test siblings
-      if conditions[:sibling] || conditions[:before] || conditions[:after]
-        siblings = parent ? parent.children : []
-        self_index = siblings.index(self)
-
-        if conditions[:sibling]
-          return false unless siblings.detect do |s| 
-            s != self && s.match(conditions[:sibling])
-          end
-        end
-
-        if conditions[:before]
-          return false unless siblings[self_index+1..-1].detect do |s| 
-            s != self && s.match(conditions[:before])
-          end
-        end
-
-        if conditions[:after]
-          return false unless siblings[0,self_index].detect do |s| 
-            s != self && s.match(conditions[:after])
-          end
-        end
-      end
-  
-      true
-    end
-
-    def ==(node)
-      return false unless super
-      return false unless closing == node.closing && self.name == node.name
-      attributes == node.attributes
-    end
-    
-    private
-      # Match the given value to the given condition.
-      def match_condition(value, condition)
-        case condition
-          when String
-            value && value == condition
-          when Regexp
-            value && value.match(condition)
-          when Numeric
-            value == condition.to_s
-          when true
-            !value.nil?
-          when false, nil
-            value.nil?
-          else
-            false
-        end
-      end
-  end
-end
--- a/lib/sanitize.rb
+++ b/lib/sanitize.rb
@ -3,205 +3,24 @@ module Sanitize
 # This module provides sanitization of XHTML+MathML+SVG 
 # and of inline style attributes.
 #
-# Based heavily on Sam Ruby's code in the Universal FeedParser.
-
-  require 'html/tokenizer'
-  require 'node'
-
-  acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b',
-      'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite',
-      'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt',
-      'em', 'fieldset', 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
-      'hr', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'map',
-      'menu', 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's', 'samp',
-      'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table',
-      'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u',
-      'ul', 'var']
-      
-  mathml_elements = ['maction', 'math', 'merror', 'mfrac', 'mi',
-      'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
-      'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
-      'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
-      'munderover', 'none']
-      
-  svg_elements = ['a', 'animate', 'animateColor', 'animateMotion',
-      'animateTransform', 'circle', 'defs', 'desc', 'ellipse', 'font-face',
-      'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern', 'image',
-      'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph',
-      'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect',
-      'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use']
-      
-  acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
-      'action', 'align', 'alt', 'axis', 'border', 'cellpadding',
-      'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class',
-      'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime',
-      'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height',
-      'href', 'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang',
-      'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name',
-      'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev',
-      'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size',
-      'span', 'src', 'start', 'style', 'summary', 'tabindex', 'target', 'title',
-      'type', 'usemap', 'valign', 'value', 'vspace', 'width', 'xml:lang']
-
-
-  mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign',
-      'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
-      'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
-      'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
-      'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
-      'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
-      'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
-      'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
-      'xlink:type', 'xmlns', 'xmlns:xlink']
-
-      
-  svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic',
-       'arabic-form', 'ascent', 'attributeName', 'attributeType',
-       'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
-       'class', 'color', 'color-rendering', 'content', 'cx', 'cy', 'd', 'dx',
-       'dy', 'descent', 'display', 'dur', 'end', 'fill', 'fill-rule',
-       'font-family', 'font-size', 'font-stretch', 'font-style', 'font-variant',
-       'font-weight', 'from', 'fx', 'fy', 'g1', 'g2', 'glyph-name', 
-       'gradientUnits', 'hanging', 'height', 'horiz-adv-x', 'horiz-origin-x',
-       'id', 'ideographic', 'k', 'keyPoints', 'keySplines', 'keyTimes',
-       'lang', 'marker-end', 'marker-mid', 'marker-start', 'markerHeight',
-       'markerUnits', 'markerWidth', 'mathematical', 'max', 'min', 'name',
-       'offset', 'opacity', 'orient', 'origin', 'overline-position',
-       'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
-       'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount', 'repeatDur',
-       'requiredExtensions', 'requiredFeatures', 'restart', 'rotate', 'rx',
-       'ry', 'slope', 'stemh', 'stemv', 'stop-color', 'stop-opacity',
-       'strikethrough-position', 'strikethrough-thickness', 'stroke',
-       'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
-       'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity',
-       'stroke-width', 'systemLanguage', 'target',
-       'text-anchor', 'to', 'transform', 'type', 'u1', 'u2',
-       'underline-position', 'underline-thickness', 'unicode',
-       'unicode-range', 'units-per-em', 'values', 'version', 'viewBox',
-       'visibility', 'width', 'widths', 'x', 'x-height', 'x1', 'x2',
-       'xlink:actuate', 'xlink:arcrole', 'xlink:href', 'xlink:role',
-       'xlink:show', 'xlink:title', 'xlink:type', 'xml:base', 'xml:lang',
-       'xml:space', 'xmlns', 'xmlns:xlink', 'y', 'y1', 'y2', 'zoomAndPan']
-
-  attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'xlink:href']
-  
-  acceptable_css_properties = ['azimuth', 'background-color',
-      'border-bottom-color', 'border-collapse', 'border-color',
-      'border-left-color', 'border-right-color', 'border-top-color', 'clear',
-      'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font',
-      'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight',
-      'height', 'letter-spacing', 'line-height', 'overflow', 'pause',
-      'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness',
-      'speak', 'speak-header', 'speak-numeral', 'speak-punctuation',
-      'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent',
-      'unicode-bidi', 'vertical-align', 'voice-family', 'volume',
-      'white-space', 'width']
-
-  acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue',
-      'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed',
-      'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left',
-      'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive',
-      'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
-      'transparent', 'underline', 'white', 'yellow']
-
-  acceptable_svg_properties = [ 'fill', 'fill-opacity', 'fill-rule',
-      'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
-      'stroke-opacity']
-
-  acceptable_protocols = [ 'ed2k', 'ftp', 'http', 'https', 'irc',
-      'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
-      'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
-      'ssh', 'sftp', 'rtsp', 'afs' ]
-
-      ALLOWED_ELEMENTS = acceptable_elements + mathml_elements + svg_elements  unless defined?(ALLOWED_ELEMENTS)
-      ALLOWED_ATTRIBUTES = acceptable_attributes + mathml_attributes + svg_attributes unless defined?(ALLOWED_ATTRIBUTES)
-      ALLOWED_CSS_PROPERTIES = acceptable_css_properties unless defined?(ALLOWED_CSS_PROPERTIES)
-      ALLOWED_CSS_KEYWORDS = acceptable_css_keywords unless defined?(ALLOWED_CSS_KEYWORDS)
-      ALLOWED_SVG_PROPERTIES = acceptable_svg_properties unless defined?(ALLOWED_SVG_PROPERTIES)
-      ALLOWED_PROTOCOLS = acceptable_protocols unless defined?(ALLOWED_PROTOCOLS)
-      ATTR_VAL_IS_URI = attr_val_is_uri unless defined?(ATTR_VAL_IS_URI)
-
-      # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and stripping out all
-      # attributes not in ALLOWED_ATTRIBUTES. Style attributes are parsed, and a restricted set,
-      # specified by ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through.
-      # attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified in
-      # ALLOWED_PROTOCOLS are allowed.
-      # You can adjust what gets sanitized, by defining these constant arrays before this Module is loaded. 
+# Uses the HTML5lib parser, so that the parsing behaviour should
+# resemble that of browsers.
 #
-      #   sanitize_html('<script> do_nasty_stuff() </script>')
-      #    => &lt;script> do_nasty_stuff() &lt;/script>
-      #   sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
-      #    => <a>Click here for $100</a>
+#  sanitize_xhtml() is a case-sensitive sanitizer, suitable for XHTML
+#  sanitize_html() is a case-insensitive sanitizer suitable for HTML
+
+
+  require 'html5lib/sanitizer'
+  require 'html5lib/html5parser'
+  require 'html5lib/liberalxmlparser'
+  include HTML5lib
+
  def sanitize_xhtml(html)
-        if html.index("<")
-          tokenizer = HTML::Tokenizer.new(html)
-          new_text = ""
-
-          while token = tokenizer.next
-            node = XHTML::Node.parse(nil, 0, 0, token, false)
-            new_text << case node.tag?
-              when true
-                if ALLOWED_ELEMENTS.include?(node.name)
-                  if node.closing != :close
-                    node.attributes.delete_if { |attr,v| !ALLOWED_ATTRIBUTES.include?(attr) }
-                    ATTR_VAL_IS_URI.each do |attr|
-                      val_unescaped = CGI.unescapeHTML(node.attributes[attr].to_s).gsub(/[\000-\040\177\s]+|\302*[\200-\240]/,'').downcase
-                      if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ and !ALLOWED_PROTOCOLS.include?(val_unescaped.split(':')[0]) 
-                        node.attributes.delete attr 
-                      end
-                    end
-                    if node.attributes['style']
-                      node.attributes['style'] = sanitize_css(node.attributes['style']) 
-                    end
-                  end
-                  node.to_s
-                else
-                  node.to_s.gsub(/</, "&lt;")
-                end
-              else
-                node.to_s.gsub(/</, "&lt;")
-            end
+    XHTMLParser.parseFragment(html, :tokenizer => HTMLSanitizer).to_s
  end

-          html = new_text
-        end
-        html
+  def sanitize_html(html)
+    HTMLParser.parseFragment(html, :tokenizer => HTMLSanitizer).to_s
  end

-      def sanitize_css(style)
-          # disallow urls
-          style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ')
-
-          # gauntlet
-          if style !~ /^([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*$/
-             style = ''
-             return style
-          end
-          if style !~ /^(\s*[-\w]+\s*:\s*[^:;]*(;|$))*$/
-             style = ''
-             return style
-          end
-
-          clean = []
-          style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop,val|
-            if ALLOWED_CSS_PROPERTIES.include?(prop.downcase)
-              clean <<  prop + ': ' + val + ';'
-            elsif ['background','border','margin','padding'].include?(prop.split('-')[0].downcase) 
-              goodval = true
-              val.split().each do |keyword|
-                if !ALLOWED_CSS_KEYWORDS.include?(keyword) and 
-                   keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/
-                  goodval = false
-                end
-              end
-              if goodval 
-                clean <<  prop + ': ' + val + ';'
-              end
-            elsif ALLOWED_SVG_PROPERTIES.include?(prop.downcase)
-               clean <<  prop + ': ' + val + ';'
-            end
-          end
-
-          style = clean.join(' ')
-      end
 end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser.rb
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_body_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_body_phase.rb
@ -0,0 +1,46 @@
+require 'html5lib/html5parser/phase'
+
+module HTML5lib
+  class AfterBodyPhase < Phase
+
+    handle_end 'html'
+
+    def processComment(data)
+      # This is needed because data is to be appended to the <html> element
+      # here and not to whatever is currently open.
+      @tree.insertComment(data, @tree.openElements[0])
+    end
+
+    def processCharacters(data)
+      @parser.parseError(_('Unexpected non-space characters in the after body phase.'))
+      @parser.phase = @parser.phases[:inBody]
+      @parser.phase.processCharacters(data)
+    end
+
+    def processStartTag(name, attributes)
+      @parser.parseError(_("Unexpected start tag token (#{name}) in the after body phase."))
+      @parser.phase = @parser.phases[:inBody]
+      @parser.phase.processStartTag(name, attributes)
+    end
+
+    def endTagHtml(name)
+      if @parser.innerHTML
+        @parser.parseError
+      else
+        # XXX: This may need to be done, not sure
+        # Don't set lastPhase to the current phase but to the inBody phase
+        # instead. No need for extra parse errors if there's something after </html>.
+        # Try "<!doctype html>X</html>X" for instance.
+        @parser.lastPhase = @parser.phase
+        @parser.phase = @parser.phases[:trailingEnd]
+      end
+    end
+
+    def endTagOther(name)
+      @parser.parseError(_("Unexpected end tag token (#{name}) in the after body phase."))
+      @parser.phase = @parser.phases[:inBody]
+      @parser.phase.processEndTag(name)
+    end
+
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_frameset_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_frameset_phase.rb
@ -0,0 +1,34 @@
+require 'html5lib/html5parser/phase'
+
+module HTML5lib
+  class AfterFramesetPhase < Phase
+
+    # http://www.whatwg.org/specs/web-apps/current-work/#after3
+
+    handle_start 'html', 'noframes'
+
+    handle_end 'html'
+
+    def processCharacters(data)
+      @parser.parseError(_('Unexpected non-space characters in the after frameset phase. Ignored.'))
+    end
+
+    def startTagNoframes(name, attributes)
+      @parser.phases[:inBody].processStartTag(name, attributes)
+    end
+
+    def startTagOther(name, attributes)
+      @parser.parseError(_("Unexpected start tag (#{name}) in the after frameset phase. Ignored."))
+    end
+
+    def endTagHtml(name)
+      @parser.lastPhase = @parser.phase
+      @parser.phase = @parser.phases[:trailingEnd]
+    end
+
+    def endTagOther(name)
+      @parser.parseError(_("Unexpected end tag (#{name}) in the after frameset phase. Ignored."))
+    end
+
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_head_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_head_phase.rb
@ -0,0 +1,50 @@
+require 'html5lib/html5parser/phase'
+
+module HTML5lib
+  class AfterHeadPhase < Phase
+  
+    handle_start 'html', 'body', 'frameset', %w( base link meta script style title ) => 'FromHead'
+
+    def processEOF
+      anythingElse
+      @parser.phase.processEOF
+    end
+
+    def processCharacters(data)
+      anythingElse
+      @parser.phase.processCharacters(data)
+    end
+
+    def startTagBody(name, attributes)
+      @tree.insertElement(name, attributes)
+      @parser.phase = @parser.phases[:inBody]
+    end
+
+    def startTagFrameset(name, attributes)
+      @tree.insertElement(name, attributes)
+      @parser.phase = @parser.phases[:inFrameset]
+    end
+
+    def startTagFromHead(name, attributes)
+      @parser.parseError(_("Unexpected start tag (#{name}) that can be in head. Moved."))
+      @parser.phase = @parser.phases[:inHead]
+      @parser.phase.processStartTag(name, attributes)
+    end
+
+    def startTagOther(name, attributes)
+      anythingElse
+      @parser.phase.processStartTag(name, attributes)
+    end
+
+    def processEndTag(name)
+      anythingElse
+      @parser.phase.processEndTag(name)
+    end
+
+    def anythingElse
+      @tree.insertElement('body', {})
+      @parser.phase = @parser.phases[:inBody]
+    end
+
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/before_head_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/before_head_phase.rb
@ -0,0 +1,41 @@
+require 'html5lib/html5parser/phase'
+
+module HTML5lib
+  class BeforeHeadPhase < Phase
+
+    handle_start 'html', 'head'
+
+    handle_end 'html'
+
+    def processEOF
+      startTagHead('head', {})
+      @parser.phase.processEOF
+    end
+
+    def processCharacters(data)
+      startTagHead('head', {})
+      @parser.phase.processCharacters(data)
+    end
+
+    def startTagHead(name, attributes)
+      @tree.insertElement(name, attributes)
+      @tree.headPointer = @tree.openElements[-1]
+      @parser.phase = @parser.phases[:inHead]
+    end
+
+    def startTagOther(name, attributes)
+      startTagHead('head', {})
+      @parser.phase.processStartTag(name, attributes)
+    end
+
+    def endTagHtml(name)
+      startTagHead('head', {})
+      @parser.phase.processEndTag(name)
+    end
+
+    def endTagOther(name)
+      @parser.parseError(_("Unexpected end tag (#{name}) after the (implied) root element."))
+    end
+
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_body_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_body_phase.rb
@ -0,0 +1,548 @@
+require 'html5lib/html5parser/phase'
+
+module HTML5lib
+  class InBodyPhase < Phase
+
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-body
+
+    handle_start 'html', 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image'
+
+    handle_start 'input', 'textarea', 'select', 'isindex', %w( script style ), %w( marquee object )
+
+    handle_start %w( li dd dt ) => 'ListItem', %w( base link meta title ) => 'FromHead'
+      
+    handle_start %w( address blockquote center dir div dl fieldset listing menu ol p pre ul ) => 'CloseP'
+
+    handle_start %w( b big em font i nobr s small strike strong tt u ) => 'Formatting'
+
+    handle_start %w( area basefont bgsound br embed img param spacer wbr ) => 'VoidFormatting'
+
+    handle_start %w( iframe noembed noframes noscript ) => 'Cdata', HEADING_ELEMENTS => 'Heading'
+
+    handle_start %w( caption col colgroup frame frameset head option optgroup tbody td tfoot th thead tr ) => 'Misplaced'
+
+    handle_start %w( event-source section nav article aside header footer datagrid command ) => 'New'
+
+    handle_end 'p', 'body', 'html', 'form', %w( button marquee object ), %w( dd dt li ) => 'ListItem'
+
+    handle_end %w( address blockquote center div dl fieldset listing menu ol pre ul ) => 'Block'
+
+    handle_end %w( a b big em font i nobr s small strike strong tt u ) => 'Formatting'
+
+    handle_end %w( head frameset select optgroup option table caption colgroup col thead tfoot tbody tr td th ) => 'Misplaced' 
+
+    handle_end %w( area basefont bgsound br embed hr image img input isindex param spacer wbr frame ) => 'None'
+
+    handle_end %w( noframes noscript noembed textarea xmp iframe ) => 'CdataTextAreaXmp'
+
+    handle_end %w( event-source section nav article aside header footer datagrid command ) => 'New'
+
+    def initialize(parser, tree)
+      super(parser, tree)
+
+      # for special handling of whitespace in <pre>
+      @processSpaceCharactersPre = false
+    end
+
+    def processSpaceCharactersPre(data)
+      #Sometimes (start of <pre> blocks) we want to drop leading newlines
+      @processSpaceCharactersPre = false
+      if (data.length > 0 and data[0] == ?\n and 
+        @tree.openElements[-1].name == 'pre' and
+        not @tree.openElements[-1].hasContent)
+        data = data[1..-1]
+      end
+      @tree.insertText(data) if data.length > 0
+    end
+
+    def processSpaceCharacters(data)
+      if @processSpaceCharactersPre
+        processSpaceCharactersPre(data)
+      else
+        super(data)
+      end
+    end
+
+    def processCharacters(data)
+      # XXX The specification says to do this for every character at the
+      # moment, but apparently that doesn't match the real world so we don't
+      # do it for space characters.
+      @tree.reconstructActiveFormattingElements
+      @tree.insertText(data)
+    end
+
+    def startTagScriptStyle(name, attributes)
+      @parser.phases[:inHead].processStartTag(name, attributes)
+    end
+
+    def startTagFromHead(name, attributes)
+      @parser.parseError(_("Unexpected start tag (#{name}) that belongs in the head. Moved."))
+      @parser.phases[:inHead].processStartTag(name, attributes)
+    end
+
+    def startTagBody(name, attributes)
+      @parser.parseError(_('Unexpected start tag (body).'))
+
+      if (@tree.openElements.length == 1 or
+        @tree.openElements[1].name != 'body')
+        assert @parser.innerHTML
+      else
+        attributes.each do |attr, value|
+          unless @tree.openElements[1].attributes.has_key?(attr)
+            @tree.openElements[1].attributes[attr] = value
+          end
+        end
+      end
+    end
+
+    def startTagCloseP(name, attributes)
+      endTagP('p') if in_scope?('p')
+      @tree.insertElement(name, attributes)
+      @processSpaceCharactersPre = true if name == 'pre'
+    end
+
+    def startTagForm(name, attributes)
+      if @tree.formPointer
+        @parser.parseError('Unexpected start tag (form). Ignored.')
+      else
+        endTagP('p') if in_scope?('p')
+        @tree.insertElement(name, attributes)
+        @tree.formPointer = @tree.openElements[-1]
+      end
+    end
+
+    def startTagListItem(name, attributes)
+      endTagP('p') if in_scope?('p')
+      stopNames = {'li' => ['li'], 'dd' => ['dd', 'dt'], 'dt' => ['dd', 'dt']}
+      stopName = stopNames[name]
+
+      @tree.openElements.reverse.each_with_index do |node, i|
+        if stopName.include?(node.name)
+          (i + 1).times { @tree.openElements.pop }
+          break
+        end
+
+        # Phrasing elements are all non special, non scoping, non
+        # formatting elements
+        break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) and
+          not ['address', 'div'].include?(node.name))
+      end
+
+      # Always insert an <li> element.
+      @tree.insertElement(name, attributes)
+    end
+
+    def startTagPlaintext(name, attributes)
+      endTagP('p') if in_scope?('p')
+      @tree.insertElement(name, attributes)
+      @parser.tokenizer.contentModelFlag = :PLAINTEXT
+    end
+
+    def startTagHeading(name, attributes)
+      endTagP('p') if in_scope?('p')
+      HEADING_ELEMENTS.each do |element|
+        if in_scope?(element)
+          @parser.parseError(_("Unexpected start tag (#{name})."))
+        
+          remove_open_elements_until { |element| HEADING_ELEMENTS.include?(element.name) }
+
+          break
+         end
+      end
+      @tree.insertElement(name, attributes)
+    end
+
+    def startTagA(name, attributes)
+      if afeAElement = @tree.elementInActiveFormattingElements('a')
+        @parser.parseError(_('Unexpected start tag (a) implies end tag (a).'))
+        endTagFormatting('a')
+        @tree.openElements.delete(afeAElement) if @tree.openElements.include?(afeAElement)
+        @tree.activeFormattingElements.delete(afeAElement) if @tree.activeFormattingElements.include?(afeAElement)
+      end
+      @tree.reconstructActiveFormattingElements
+      addFormattingElement(name, attributes)
+    end
+
+    def startTagFormatting(name, attributes)
+      @tree.reconstructActiveFormattingElements
+      addFormattingElement(name, attributes)
+    end
+
+    def startTagButton(name, attributes)
+      if in_scope?('button')
+        @parser.parseError(_('Unexpected start tag (button) implied end tag (button).'))
+        processEndTag('button')
+        @parser.phase.processStartTag(name, attributes)
+      else
+        @tree.reconstructActiveFormattingElements
+        @tree.insertElement(name, attributes)
+        @tree.activeFormattingElements.push(Marker)
+      end
+    end
+
+    def startTagMarqueeObject(name, attributes)
+      @tree.reconstructActiveFormattingElements
+      @tree.insertElement(name, attributes)
+      @tree.activeFormattingElements.push(Marker)
+    end
+
+    def startTagXmp(name, attributes)
+      @tree.reconstructActiveFormattingElements
+      @tree.insertElement(name, attributes)
+      @parser.tokenizer.contentModelFlag = :CDATA
+    end
+
+    def startTagTable(name, attributes)
+      processEndTag('p') if in_scope?('p')
+      @tree.insertElement(name, attributes)
+      @parser.phase = @parser.phases[:inTable]
+    end
+
+    def startTagVoidFormatting(name, attributes)
+      @tree.reconstructActiveFormattingElements
+      @tree.insertElement(name, attributes)
+      @tree.openElements.pop
+    end
+
+    def startTagHr(name, attributes)
+      endTagP('p') if in_scope?('p')
+      @tree.insertElement(name, attributes)
+      @tree.openElements.pop
+    end
+
+    def startTagImage(name, attributes)
+      # No really...
+      @parser.parseError(_('Unexpected start tag (image). Treated as img.'))
+      processStartTag('img', attributes)
+    end
+
+    def startTagInput(name, attributes)
+      @tree.reconstructActiveFormattingElements
+      @tree.insertElement(name, attributes)
+      if @tree.formPointer
+        # XXX Not exactly sure what to do here
+        # @tree.openElements[-1].form = @tree.formPointer
+      end
+      @tree.openElements.pop
+    end
+
+    def startTagIsindex(name, attributes)
+      @parser.parseError("Unexpected start tag isindex. Don't use it!")
+      return if @tree.formPointer
+      processStartTag('form', {})
+      processStartTag('hr', {})
+      processStartTag('p', {})
+      processStartTag('label', {})
+      # XXX Localization ...
+      processCharacters('This is a searchable index. Insert your search keywords here:')
+      attributes['name'] = 'isindex'
+      attrs = attributes.to_a
+      processStartTag('input', attributes)
+      processEndTag('label')
+      processEndTag('p')
+      processStartTag('hr', {})
+      processEndTag('form')
+    end
+
+    def startTagTextarea(name, attributes)
+      # XXX Form element pointer checking here as well...
+      @tree.insertElement(name, attributes)
+      @parser.tokenizer.contentModelFlag = :RCDATA
+    end
+
+    # iframe, noembed noframes, noscript(if scripting enabled)
+    def startTagCdata(name, attributes)
+      @tree.insertElement(name, attributes)
+      @parser.tokenizer.contentModelFlag = :CDATA
+    end
+
+    def startTagSelect(name, attributes)
+      @tree.reconstructActiveFormattingElements
+      @tree.insertElement(name, attributes)
+      @parser.phase = @parser.phases[:inSelect]
+    end
+
+    def startTagMisplaced(name, attributes)
+      # Elements that should be children of other elements that have a
+      # different insertion mode; here they are ignored
+      # "caption", "col", "colgroup", "frame", "frameset", "head",
+      # "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
+      # "tr", "noscript"
+      @parser.parseError(_("Unexpected start tag (#{name}). Ignored."))
+    end
+
+    def startTagNew(name, attributes)
+      # New HTML5 elements, "event-source", "section", "nav",
+      # "article", "aside", "header", "footer", "datagrid", "command"
+      sys.stderr.write("Warning: Undefined behaviour for start tag #{name}")
+      startTagOther(name, attributes)
+      #raise NotImplementedError
+    end
+
+    def startTagOther(name, attributes)
+      @tree.reconstructActiveFormattingElements
+      @tree.insertElement(name, attributes)
+    end
+
+    def endTagP(name)
+      @tree.generateImpliedEndTags('p') if in_scope?('p')
+      @parser.parseError('Unexpected end tag (p).') unless @tree.openElements[-1].name == 'p'
+      @tree.openElements.pop while in_scope?('p')
+    end
+
+    def endTagBody(name)
+      # XXX Need to take open <p> tags into account here. We shouldn't imply
+      # </p> but we should not throw a parse error either. Specification is
+      # likely to be updated.
+      unless @tree.openElements[1].name == 'body'
+        # innerHTML case
+        @parser.parseError
+        return
+      end
+      unless @tree.openElements[-1].name == 'body'
+        @parser.parseError(_("Unexpected end tag (body). Missing end tag (#{@tree.openElements[-1].name})."))
+      end
+      @parser.phase = @parser.phases[:afterBody]
+    end
+
+    def endTagHtml(name)
+      endTagBody(name)
+      @parser.phase.processEndTag(name) unless @parser.innerHTML
+    end
+
+    def endTagBlock(name)
+      #Put us back in the right whitespace handling mode
+      @processSpaceCharactersPre = false if name == 'pre'
+
+      @tree.generateImpliedEndTags if in_scope?(name)
+
+      unless @tree.openElements[-1].name == name
+        @parser.parseError(("End tag (#{name}) seen too early. Expected other end tag."))
+      end
+
+      if in_scope?(name)
+        remove_open_elements_until(name)
+      end
+    end
+
+    def endTagForm(name)
+      endTagBlock(name)
+      @tree.formPointer = nil
+    end
+
+    def endTagListItem(name)
+      # AT Could merge this with the Block case
+      if in_scope?(name)
+        @tree.generateImpliedEndTags(name)
+
+        unless @tree.openElements[-1].name == name
+          @parser.parseError(("End tag (#{name}) seen too early. Expected other end tag."))
+        end
+      end
+
+      remove_open_elements_until(name) if in_scope?(name)
+    end  
+
+    def endTagHeading(name)
+      HEADING_ELEMENTS.each do |element|
+        if in_scope?(element)
+          @tree.generateImpliedEndTags
+          break
+        end
+      end
+
+      unless @tree.openElements[-1].name == name
+        @parser.parseError(("Unexpected end tag (#{name}). Expected other end tag."))
+      end
+
+      HEADING_ELEMENTS.each do |element|
+        if in_scope?(element)
+          remove_open_elements_until { |element| HEADING_ELEMENTS.include?(element.name) }
+          break
+        end
+      end
+    end
+
+    # The much-feared adoption agency algorithm
+    def endTagFormatting(name)
+      # http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
+      # XXX Better parseError messages appreciated.
+      while true
+        # Step 1 paragraph 1
+        afeElement = @tree.elementInActiveFormattingElements(name)
+        if not afeElement or (@tree.openElements.include?(afeElement) and not in_scope?(afeElement.name))
+          @parser.parseError(_("End tag (#{name}) violates step 1, paragraph 1 of the adoption agency algorithm."))
+          return
+        # Step 1 paragraph 2
+        elsif not @tree.openElements.include?(afeElement)
+          @parser.parseError(_("End tag (#{name}) violates step 1, paragraph 2 of the adoption agency algorithm."))
+          @tree.activeFormattingElements.delete(afeElement)
+          return
+        end
+
+        # Step 1 paragraph 3
+        if afeElement != @tree.openElements[-1]
+          @parser.parseError(_("End tag (#{name}) violates step 1, paragraph 3 of the adoption agency algorithm."))
+        end
+
+        # Step 2
+        # Start of the adoption agency algorithm proper
+        afeIndex = @tree.openElements.index(afeElement)
+        furthestBlock = nil
+        @tree.openElements[afeIndex..-1].each do |element|
+          if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(element.name)
+            furthestBlock = element
+            break
+          end
+        end
+
+        # Step 3
+        if furthestBlock.nil?
+          element = remove_open_elements_until { |element| element == afeElement }
+          @tree.activeFormattingElements.delete(element)
+          return
+        end
+        commonAncestor = @tree.openElements[afeIndex - 1]
+
+        # Step 5
+        furthestBlock.parent.removeChild(furthestBlock) if furthestBlock.parent
+
+        # Step 6
+        # The bookmark is supposed to help us identify where to reinsert
+        # nodes in step 12. We have to ensure that we reinsert nodes after
+        # the node before the active formatting element. Note the bookmark
+        # can move in step 7.4
+        bookmark = @tree.activeFormattingElements.index(afeElement)
+
+        # Step 7
+        lastNode = node = furthestBlock
+        while true
+          # AT replace this with a function and recursion?
+          # Node is element before node in open elements
+          node = @tree.openElements[@tree.openElements.index(node) - 1]
+          until @tree.activeFormattingElements.include?(node)
+            tmpNode = node
+            node = @tree.openElements[@tree.openElements.index(node) - 1]
+            @tree.openElements.delete(tmpNode)
+          end
+          # Step 7.3
+          break if node == afeElement
+          # Step 7.4
+          if lastNode == furthestBlock
+            # XXX should this be index(node) or index(node)+1
+            # Anne: I think +1 is ok. Given x = [2,3,4,5]
+            # x.index(3) gives 1 and then x[1 +1] gives 4...
+            bookmark = @tree.activeFormattingElements.index(node) + 1
+          end
+          # Step 7.5
+          cite = node.parent
+          if node.hasContent
+            clone = node.cloneNode
+            # Replace node with clone
+            @tree.activeFormattingElements[@tree.activeFormattingElements.index(node)] = clone
+            @tree.openElements[@tree.openElements.index(node)] = clone
+            node = clone
+          end
+          # Step 7.6
+          # Remove lastNode from its parents, if any
+          lastNode.parent.removeChild(lastNode) if lastNode.parent
+          node.appendChild(lastNode)
+          # Step 7.7
+          lastNode = node
+          # End of inner loop
+        end
+
+        # Step 8
+        lastNode.parent.removeChild(lastNode) if lastNode.parent
+        commonAncestor.appendChild(lastNode)
+
+        # Step 9
+        clone = afeElement.cloneNode
+
+        # Step 10
+        furthestBlock.reparentChildren(clone)
+
+        # Step 11
+        furthestBlock.appendChild(clone)
+
+        # Step 12
+        @tree.activeFormattingElements.delete(afeElement)
+        @tree.activeFormattingElements.insert([bookmark,@tree.activeFormattingElements.length].min, clone)
+
+        # Step 13
+        @tree.openElements.delete(afeElement)
+        @tree.openElements.insert(@tree.openElements.index(furthestBlock) + 1, clone)
+      end
+    end
+
+    def endTagButtonMarqueeObject(name)
+      @tree.generateImpliedEndTags if in_scope?(name)
+
+      unless @tree.openElements[-1].name == name
+        @parser.parseError(_("Unexpected end tag (#{name}). Expected other end tag first."))
+      end
+
+      if in_scope?(name)
+        remove_open_elements_until(name)
+      
+        @tree.clearActiveFormattingElements
+      end
+    end
+
+    def endTagMisplaced(name)
+      # This handles elements with end tags in other insertion modes.
+      @parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
+    end
+
+    def endTagNone(name)
+      # This handles elements with no end tag.
+      @parser.parseError(_("This tag (#{name}) has no end tag"))
+    end
+
+    def endTagCdataTextAreaXmp(name)
+      if @tree.openElements[-1].name == name
+        @tree.openElements.pop
+      else
+        @parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
+      end
+    end
+
+    def endTagNew(name)
+      # New HTML5 elements, "event-source", "section", "nav",
+      # "article", "aside", "header", "footer", "datagrid", "command"
+      STDERR.puts "Warning: Undefined behaviour for end tag #{name}"
+      endTagOther(name)
+      #raise NotImplementedError
+    end
+
+    def endTagOther(name)
+      # XXX This logic should be moved into the treebuilder
+      @tree.openElements.reverse.each do |node|
+        if node.name == name
+          @tree.generateImpliedEndTags
+
+          unless @tree.openElements[-1].name == name
+            @parser.parseError(_("Unexpected end tag (#{name})."))
+          end
+
+          remove_open_elements_until { |element| element == node }
+
+          break
+        else
+          if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name)
+            @parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
+            break
+          end
+        end
+      end
+    end
+
+    protected
+
+    def addFormattingElement(name, attributes)
+      @tree.insertElement(name, attributes)
+      @tree.activeFormattingElements.push(@tree.openElements[-1])
+    end
+
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_caption_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_caption_phase.rb
@ -0,0 +1,68 @@
+require 'html5lib/html5parser/phase'
+
+module HTML5lib
+  class InCaptionPhase < Phase
+
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-caption
+
+    handle_start 'html', %w( caption col colgroup tbody td tfoot th thead tr ) => 'TableElement'
+
+    handle_end 'caption', 'table', %w( body col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
+
+    def ignoreEndTagCaption
+      not in_scope?('caption', true)
+    end
+
+    def processCharacters(data)
+      @parser.phases[:inBody].processCharacters(data)
+    end
+
+    def startTagTableElement(name, attributes)
+      @parser.parseError
+      #XXX Have to duplicate logic here to find out if the tag is ignored
+      ignoreEndTag = ignoreEndTagCaption
+      @parser.phase.processEndTag('caption')
+      @parser.phase.processStartTag(name, attributes) unless ignoreEndTag
+    end
+
+    def startTagOther(name, attributes)
+      @parser.phases[:inBody].processStartTag(name, attributes)
+    end
+
+    def endTagCaption(name)
+      if ignoreEndTagCaption
+        # innerHTML case
+        assert @parser.innerHTML
+        @parser.parseError
+      else
+        # AT this code is quite similar to endTagTable in "InTable"
+        @tree.generateImpliedEndTags
+
+        unless @tree.openElements[-1].name == 'caption'
+          @parser.parseError(_("Unexpected end tag (caption). Missing end tags."))
+        end
+
+        remove_open_elements_until('caption')
+
+        @tree.clearActiveFormattingElements
+        @parser.phase = @parser.phases[:inTable]
+      end
+    end
+
+    def endTagTable(name)
+      @parser.parseError
+      ignoreEndTag = ignoreEndTagCaption
+      @parser.phase.processEndTag('caption')
+      @parser.phase.processEndTag(name) unless ignoreEndTag
+    end
+
+    def endTagIgnore(name)
+      @parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
+    end
+
+    def endTagOther(name)
+      @parser.phases[:inBody].processEndTag(name)
+    end
+
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_cell_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_cell_phase.rb
@ -0,0 +1,78 @@
+require 'html5lib/html5parser/phase'
+
+module HTML5lib
+  class InCellPhase < Phase
+
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-cell
+
+    handle_start 'html', %w( caption col colgroup tbody td tfoot th thead tr ) => 'TableOther'
+
+    handle_end %w( td th ) => 'TableCell', %w( body caption col colgroup html ) => 'Ignore'
+
+    handle_end %w( table tbody tfoot thead tr ) => 'Imply'
+
+    def processCharacters(data)
+      @parser.phases[:inBody].processCharacters(data)
+    end
+
+    def startTagTableOther(name, attributes)
+      if in_scope?('td', true) or in_scope?('th', true)
+        closeCell
+        @parser.phase.processStartTag(name, attributes)
+      else
+        # innerHTML case
+        @parser.parseError
+      end
+    end
+
+    def startTagOther(name, attributes)
+      @parser.phases[:inBody].processStartTag(name, attributes)
+    end
+
+    def endTagTableCell(name)
+      if in_scope?(name, true)
+        @tree.generateImpliedEndTags(name)
+        if @tree.openElements[-1].name != name
+          @parser.parseError("Got table cell end tag (#{name}) while required end tags are missing.")
+
+          remove_open_elements_until(name)
+        else
+          @tree.openElements.pop
+        end
+        @tree.clearActiveFormattingElements
+        @parser.phase = @parser.phases[:inRow]
+      else
+        @parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
+      end
+    end
+
+    def endTagIgnore(name)
+      @parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
+    end
+
+    def endTagImply(name)
+      if in_scope?(name, true)
+        closeCell
+        @parser.phase.processEndTag(name)
+      else
+        # sometimes innerHTML case
+        @parser.parseError
+      end
+    end
+
+    def endTagOther(name)
+      @parser.phases[:inBody].processEndTag(name)
+    end
+
+    protected
+
+    def closeCell
+      if in_scope?('td', true)
+        endTagTableCell('td')
+      elsif in_scope?('th', true)
+        endTagTableCell('th')
+      end
+    end
+
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_column_group_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_column_group_phase.rb
@ -0,0 +1,55 @@
+require 'html5lib/html5parser/phase'
+
+module HTML5lib
+  class InColumnGroupPhase < Phase
+
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-column
+
+    handle_start 'html', 'col'
+
+    handle_end 'colgroup', 'col'
+
+    def ignoreEndTagColgroup
+      @tree.openElements[-1].name == 'html'
+    end
+
+    def processCharacters(data)
+      ignoreEndTag = ignoreEndTagColgroup
+      endTagColgroup("colgroup")
+      @parser.phase.processCharacters(data) unless ignoreEndTag
+    end
+
+    def startTagCol(name, attributes)
+      @tree.insertElement(name, attributes)
+      @tree.openElements.pop
+    end
+
+    def startTagOther(name, attributes)
+      ignoreEndTag = ignoreEndTagColgroup
+      endTagColgroup('colgroup')
+      @parser.phase.processStartTag(name, attributes) unless ignoreEndTag
+    end
+
+    def endTagColgroup(name)
+      if ignoreEndTagColgroup
+        # innerHTML case
+        assert @parser.innerHTML
+        @parser.parseError
+      else
+        @tree.openElements.pop
+        @parser.phase = @parser.phases[:inTable]
+      end
+    end
+
+    def endTagCol(name)
+      @parser.parseError(_('Unexpected end tag (col). col has no end tag.'))
+    end
+
+    def endTagOther(name)
+      ignoreEndTag = ignoreEndTagColgroup
+      endTagColgroup('colgroup')
+      @parser.phase.processEndTag(name) unless ignoreEndTag
+    end
+
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_frameset_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_frameset_phase.rb
@ -0,0 +1,57 @@
+require 'html5lib/html5parser/phase'
+
+module HTML5lib
+  class InFramesetPhase < Phase
+
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
+
+    handle_start 'html', 'frameset', 'frame', 'noframes'
+
+    handle_end 'frameset', 'noframes'
+
+    def processCharacters(data)
+      @parser.parseError(_('Unexpected characters in the frameset phase. Characters ignored.'))
+    end
+
+    def startTagFrameset(name, attributes)
+      @tree.insertElement(name, attributes)
+    end
+
+    def startTagFrame(name, attributes)
+      @tree.insertElement(name, attributes)
+      @tree.openElements.pop
+    end
+
+    def startTagNoframes(name, attributes)
+      @parser.phases[:inBody].processStartTag(name, attributes)
+    end
+
+    def startTagOther(name, attributes)
+      @parser.parseError(_("Unexpected start tag token (#{name}) in the frameset phase. Ignored"))
+    end
+
+    def endTagFrameset(name)
+      if @tree.openElements[-1].name == 'html'
+        # innerHTML case
+        @parser.parseError(_("Unexpected end tag token (frameset) in the frameset phase (innerHTML)."))
+      else
+        @tree.openElements.pop
+      end
+      if (not @parser.innerHTML and
+        @tree.openElements[-1].name != 'frameset')
+        # If we're not in innerHTML mode and the the current node is not a
+        # "frameset" element (anymore) then switch.
+        @parser.phase = @parser.phases[:afterFrameset]
+      end
+    end
+
+    def endTagNoframes(name)
+      @parser.phases[:inBody].processEndTag(name)
+    end
+
+    def endTagOther(name)
+      @parser.parseError(_("Unexpected end tag token (#{name}) in the frameset phase. Ignored."))
+    end
+
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_head_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_head_phase.rb
@ -0,0 +1,120 @@
+require 'html5lib/html5parser/phase'
+
+module HTML5lib
+  class InHeadPhase < Phase
+
+    handle_start 'html', 'head', 'title', 'style', 'script', %w( base link meta )
+
+    handle_end 'head', 'html', %w( title style script )
+
+    def processEOF
+      if ['title', 'style', 'script'].include?(name = @tree.openElements[-1].name)
+        @parser.parseError(_("Unexpected end of file. Expected end tag (#{name})."))
+        @tree.openElements.pop
+      end
+      anythingElse
+      @parser.phase.processEOF
+    end
+
+    def processCharacters(data)
+      if ['title', 'style', 'script'].include?(@tree.openElements[-1].name)
+        @tree.insertText(data)
+      else
+        anythingElse
+        @parser.phase.processCharacters(data)
+      end
+    end
+
+    def startTagHead(name, attributes)
+      @parser.parseError(_('Unexpected start tag head in existing head. Ignored'))
+    end
+
+    def startTagTitle(name, attributes)
+      element = @tree.createElement(name, attributes)
+      appendToHead(element)
+      @tree.openElements.push(element)
+      @parser.tokenizer.contentModelFlag = :RCDATA
+    end
+
+    def startTagStyle(name, attributes)
+      element = @tree.createElement(name, attributes)
+      if @tree.headPointer != nil and @parser.phase == @parser.phases[:inHead]
+        appendToHead(element)
+      else
+        @tree.openElements[-1].appendChild(element)
+      end
+      @tree.openElements.push(element)
+      @parser.tokenizer.contentModelFlag = :CDATA
+    end
+
+    def startTagScript(name, attributes)
+      #XXX Inner HTML case may be wrong
+      element = @tree.createElement(name, attributes)
+      element._flags.push("parser-inserted")
+      if (@tree.headPointer != nil and
+        @parser.phase == @parser.phases[:inHead])
+        appendToHead(element)
+      else
+        @tree.openElements[-1].appendChild(element)
+      end
+      @tree.openElements.push(element)
+      @parser.tokenizer.contentModelFlag = :CDATA
+    end
+
+    def startTagBaseLinkMeta(name, attributes)
+      element = @tree.createElement(name, attributes)
+      appendToHead(element)
+    end
+
+    def startTagOther(name, attributes)
+      anythingElse
+      @parser.phase.processStartTag(name, attributes)
+    end
+
+    def endTagHead(name)
+      if @tree.openElements[-1].name == 'head'
+        @tree.openElements.pop
+      else
+        @parser.parseError(_("Unexpected end tag (head). Ignored."))
+      end
+      @parser.phase = @parser.phases[:afterHead]
+    end
+
+    def endTagHtml(name)
+      anythingElse
+      @parser.phase.processEndTag(name)
+    end
+
+    def endTagTitleStyleScript(name)
+      if @tree.openElements[-1].name == name
+        @tree.openElements.pop
+      else
+        @parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
+      end
+    end
+
+    def endTagOther(name)
+      @parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
+    end
+
+    def anythingElse
+      if @tree.openElements[-1].name == 'head'
+        endTagHead('head')
+      else
+        @parser.phase = @parser.phases[:afterHead]
+      end
+    end
+
+    protected
+
+    def appendToHead(element)
+      if @tree.headPointer.nil?
+        assert @parser.innerHTML
+        @tree.openElements[-1].appendChild(element)
+      else
+        @tree.headPointer.appendChild(element)
+      end
+    end
+
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_row_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_row_phase.rb
@ -0,0 +1,87 @@
+require 'html5lib/html5parser/phase'
+
+module HTML5lib
+  class InRowPhase < Phase
+
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-row
+
+    handle_start 'html', %w( td th ) => 'TableCell', %w( caption col colgroup tbody tfoot thead tr ) => 'TableOther'
+
+    handle_end 'tr', 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th ) => 'Ignore'
+
+    def processCharacters(data)
+      @parser.phases[:inTable].processCharacters(data)
+    end
+
+    def startTagTableCell(name, attributes)
+      clearStackToTableRowContext
+      @tree.insertElement(name, attributes)
+      @parser.phase = @parser.phases[:inCell]
+      @tree.activeFormattingElements.push(Marker)
+    end
+
+    def startTagTableOther(name, attributes)
+      ignoreEndTag = ignoreEndTagTr
+      endTagTr('tr')
+      # XXX how are we sure it's always ignored in the innerHTML case?
+      @parser.phase.processStartTag(name, attributes) unless ignoreEndTag
+    end
+
+    def startTagOther(name, attributes)
+      @parser.phases[:inTable].processStartTag(name, attributes)
+    end
+
+    def endTagTr(name)
+      if ignoreEndTagTr
+        # innerHTML case
+        assert @parser.innerHTML
+        @parser.parseError
+      else
+        clearStackToTableRowContext
+        @tree.openElements.pop
+        @parser.phase = @parser.phases[:inTableBody]
+      end
+    end
+
+    def endTagTable(name)
+      ignoreEndTag = ignoreEndTagTr
+      endTagTr('tr')
+      # Reprocess the current tag if the tr end tag was not ignored
+      # XXX how are we sure it's always ignored in the innerHTML case?
+      @parser.phase.processEndTag(name) unless ignoreEndTag
+    end
+
+    def endTagTableRowGroup(name)
+      if in_scope?(name, true)
+        endTagTr('tr')
+        @parser.phase.processEndTag(name)
+      else
+        # innerHTML case
+        @parser.parseError
+      end
+    end
+
+    def endTagIgnore(name)
+      @parser.parseError(_("Unexpected end tag (#{name}) in the row phase. Ignored."))
+    end
+
+    def endTagOther(name)
+      @parser.phases[:inTable].processEndTag(name)
+    end
+
+    protected
+
+    # XXX unify this with other table helper methods
+    def clearStackToTableRowContext
+      until ['tr', 'html'].include?(name = @tree.openElements[-1].name)
+        @parser.parseError(_("Unexpected implied end tag (#{name}) in the row phase."))
+        @tree.openElements.pop
+      end
+    end
+
+    def ignoreEndTagTr
+      not in_scope?('tr', :tableVariant => true)
+    end
+
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_select_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_select_phase.rb
@ -0,0 +1,84 @@
+require 'html5lib/html5parser/phase'
+
+module HTML5lib
+  class InSelectPhase < Phase
+
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-select
+
+    handle_start 'html', 'option', 'optgroup', 'select'
+
+    handle_end 'option', 'optgroup', 'select', %w( caption table tbody tfoot thead tr td th ) => 'TableElements'
+
+    def processCharacters(data)
+      @tree.insertText(data)
+    end
+
+    def startTagOption(name, attributes)
+      # We need to imply </option> if <option> is the current node.
+      @tree.openElements.pop if @tree.openElements[-1].name == 'option'
+      @tree.insertElement(name, attributes)
+    end
+
+    def startTagOptgroup(name, attributes)
+      @tree.openElements.pop if @tree.openElements[-1].name == 'option'
+      @tree.openElements.pop if @tree.openElements[-1].name == 'optgroup'
+      @tree.insertElement(name, attributes)
+    end
+
+    def startTagSelect(name, attributes)
+      @parser.parseError(_('Unexpected start tag (select) in the select phase implies select start tag.'))
+      endTagSelect('select')
+    end
+
+    def startTagOther(name, attributes)
+      @parser.parseError(_('Unexpected start tag token (#{name}) in the select phase. Ignored.'))
+    end
+
+    def endTagOption(name)
+      if @tree.openElements[-1].name == 'option'
+        @tree.openElements.pop
+      else
+        @parser.parseError(_('Unexpected end tag (option) in the select phase. Ignored.'))
+      end
+    end
+
+    def endTagOptgroup(name)
+      # </optgroup> implicitly closes <option>
+      if @tree.openElements[-1].name == 'option' and @tree.openElements[-2].name == 'optgroup'
+        @tree.openElements.pop
+      end
+      # It also closes </optgroup>
+      if @tree.openElements[-1].name == 'optgroup'
+        @tree.openElements.pop
+      # But nothing else
+      else
+        @parser.parseError(_('Unexpected end tag (optgroup) in the select phase. Ignored.'))
+      end
+    end
+
+    def endTagSelect(name)
+      if in_scope?('select', true)
+        remove_open_elements_until('select')
+
+        @parser.resetInsertionMode
+      else
+        # innerHTML case
+        @parser.parseError
+      end
+    end
+
+    def endTagTableElements(name)
+      @parser.parseError(_("Unexpected table end tag (#{name}) in the select phase."))
+
+      if in_scope?(name, true)
+        endTagSelect('select')
+        @parser.phase.processEndTag(name)
+      end
+    end
+
+    def endTagOther(name)
+      @parser.parseError(_("Unexpected end tag token (#{name}) in the select phase. Ignored."))
+    end
+
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_body_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_body_phase.rb
@ -0,0 +1,83 @@
+require 'html5lib/html5parser/phase'
+
+module HTML5lib
+  class InTableBodyPhase < Phase
+
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-table0
+
+    handle_start 'html', 'tr', %w( td th ) => 'TableCell', %w( caption col colgroup tbody tfoot thead ) => 'TableOther'
+
+    handle_end 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th tr ) => 'Ingore'
+
+    def processCharacters(data)
+      @parser.phases[:inTable].processCharacters(data)
+    end
+
+    def startTagTr(name, attributes)
+      clearStackToTableBodyContext
+      @tree.insertElement(name, attributes)
+      @parser.phase = @parser.phases[:inRow]
+    end
+
+    def startTagTableCell(name, attributes)
+      @parser.parseError(_("Unexpected table cell start tag (#{name}) in the table body phase."))
+      startTagTr('tr', {})
+      @parser.phase.processStartTag(name, attributes)
+    end
+
+    def startTagTableOther(name, attributes)
+      # XXX AT Any ideas on how to share this with endTagTable?
+      if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
+        clearStackToTableBodyContext
+        endTagTableRowGroup(@tree.openElements[-1].name)
+        @parser.phase.processStartTag(name, attributes)
+      else
+        # innerHTML case
+        @parser.parseError
+      end
+    end
+
+    def startTagOther(name, attributes)
+      @parser.phases[:inTable].processStartTag(name, attributes)
+    end
+
+    def endTagTableRowGroup(name)
+      if in_scope?(name, true)
+        clearStackToTableBodyContext
+        @tree.openElements.pop
+        @parser.phase = @parser.phases[:inTable]
+      else
+        @parser.parseError(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
+      end
+    end
+
+    def endTagTable(name)
+      if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
+        clearStackToTableBodyContext
+        endTagTableRowGroup(@tree.openElements[-1].name)
+        @parser.phase.processEndTag(name)
+      else
+        # innerHTML case
+        @parser.parseError
+      end
+    end
+
+    def endTagIgnore(name)
+      @parser.parseError(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
+    end
+
+    def endTagOther(name)
+      @parser.phases[:inTable].processEndTag(name)
+    end
+
+    protected
+
+    def clearStackToTableBodyContext
+      until ['tbody', 'tfoot', 'thead', 'html'].include?(name = @tree.openElements[-1].name)
+        @parser.parseError(_("Unexpected implied end tag (#{name}) in the table body phase."))
+        @tree.openElements.pop
+      end
+    end
+
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_phase.rb
@ -0,0 +1,110 @@
+require 'html5lib/html5parser/phase'
+
+module HTML5lib
+  class InTablePhase < Phase
+
+    # http://www.whatwg.org/specs/web-apps/current-work/#in-table
+
+    handle_start 'html', 'caption', 'colgroup', 'col', 'table'
+
+    handle_start %w( tbody tfoot thead ) => 'RowGroup', %w( td th tr ) => 'ImplyTbody'
+
+    handle_end 'table', %w( body caption col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
+
+    def processCharacters(data)
+      @parser.parseError(_("Unexpected non-space characters in table context caused voodoo mode."))
+      # Make all the special element rearranging voodoo kick in
+      @tree.insertFromTable = true
+      # Process the character in the "in body" mode
+      @parser.phases[:inBody].processCharacters(data)
+      @tree.insertFromTable = false
+    end
+
+    def startTagCaption(name, attributes)
+      clearStackToTableContext
+      @tree.activeFormattingElements.push(Marker)
+      @tree.insertElement(name, attributes)
+      @parser.phase = @parser.phases[:inCaption]
+    end
+
+    def startTagColgroup(name, attributes)
+      clearStackToTableContext
+      @tree.insertElement(name, attributes)
+      @parser.phase = @parser.phases[:inColumnGroup]
+    end
+
+    def startTagCol(name, attributes)
+      startTagColgroup('colgroup', {})
+      @parser.phase.processStartTag(name, attributes)
+    end
+
+    def startTagRowGroup(name, attributes)
+      clearStackToTableContext
+      @tree.insertElement(name, attributes)
+      @parser.phase = @parser.phases[:inTableBody]
+    end
+
+    def startTagImplyTbody(name, attributes)
+      startTagRowGroup('tbody', {})
+      @parser.phase.processStartTag(name, attributes)
+    end
+
+    def startTagTable(name, attributes)
+      @parser.parseError(_("Unexpected start tag (table) in table phase. Implies end tag (table)."))
+      @parser.phase.processEndTag('table')
+      @parser.phase.processStartTag(name, attributes) unless @parser.innerHTML
+    end
+
+    def startTagOther(name, attributes)
+      @parser.parseError(_("Unexpected start tag (#{name}) in table context caused voodoo mode."))
+      # Make all the special element rearranging voodoo kick in
+      @tree.insertFromTable = true
+      # Process the start tag in the "in body" mode
+      @parser.phases[:inBody].processStartTag(name, attributes)
+      @tree.insertFromTable = false
+    end
+
+    def endTagTable(name)
+      if in_scope?('table', true)
+        @tree.generateImpliedEndTags
+      
+        unless @tree.openElements[-1].name == 'table'
+          @parser.parseError(_("Unexpected end tag (table). Expected end tag (#{@tree.openElements[-1].name})."))
+        end
+      
+        remove_open_elements_until('table')
+
+        @parser.resetInsertionMode
+      else
+        # innerHTML case
+        assert @parser.innerHTML
+        @parser.parseError
+      end
+    end
+
+    def endTagIgnore(name)
+      @parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
+    end
+
+    def endTagOther(name)
+      @parser.parseError(_("Unexpected end tag (#{name}) in table context caused voodoo mode."))
+      # Make all the special element rearranging voodoo kick in
+      @parser.insertFromTable = true
+      # Process the end tag in the "in body" mode
+      @parser.phases[:inBody].processEndTag(name)
+      @parser.insertFromTable = false
+    end
+
+    protected
+
+    def clearStackToTableContext
+      # "clear the stack back to a table context"
+      until ['table', 'html'].include?(name = @tree.openElements[-1].name)
+        @parser.parseError(_("Unexpected implied end tag (#{name}) in the table phase."))
+        @tree.openElements.pop
+      end
+      # When the current node is <html> it's an innerHTML case
+    end
+
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/initial_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/initial_phase.rb
@ -0,0 +1,49 @@
+require 'html5lib/html5parser/phase'
+
+module HTML5lib
+  class InitialPhase < Phase
+
+    # This phase deals with error handling as well which is currently not
+    # covered in the specification. The error handling is typically known as
+    # "quirks mode". It is expected that a future version of HTML5 will define this.
+
+    def processEOF
+      @parser.parseError(_('Unexpected End of file. Expected DOCTYPE.'))
+      @parser.phase = @parser.phases[:rootElement]
+      @parser.phase.processEOF
+    end
+
+    def processComment(data)
+      @tree.insertComment(data, @tree.document)
+    end
+
+    def processDoctype(name, error)
+      @parser.parseError(_('Erroneous DOCTYPE.')) if error
+      @tree.insertDoctype(name)
+      @parser.phase = @parser.phases[:rootElement]
+    end
+
+    def processSpaceCharacters(data)
+      @tree.insertText(data, @tree.document)
+    end
+
+    def processCharacters(data)
+      @parser.parseError(_('Unexpected non-space characters. Expected DOCTYPE.'))
+      @parser.phase = @parser.phases[:rootElement]
+      @parser.phase.processCharacters(data)
+    end
+
+    def processStartTag(name, attributes)
+      @parser.parseError(_("Unexpected start tag (#{name}). Expected DOCTYPE."))
+      @parser.phase = @parser.phases[:rootElement]
+      @parser.phase.processStartTag(name, attributes)
+    end
+
+    def processEndTag(name)
+      @parser.parseError(_("Unexpected end tag (#{name}). Expected DOCTYPE."))
+      @parser.phase = @parser.phases[:rootElement]
+      @parser.phase.processEndTag(name)
+    end
+
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/phase.rb
@ -0,0 +1,156 @@
+module HTML5lib
+  # Base class for helper objects that implement each phase of processing.
+  #
+  # Handler methods should be in the following order (they can be omitted):
+  #
+  #   * EOF
+  #   * Comment
+  #   * Doctype
+  #   * SpaceCharacters
+  #   * Characters
+  #   * StartTag
+  #     - startTag* methods
+  #   * EndTag
+  #     - endTag* methods
+  #
+  class Phase
+
+    # The following example call:
+    #
+    #   tag_handlers('startTag', 'html', %( base link meta ), %( li dt dd ) => 'ListItem')
+    #
+    # ...would return a hash equal to this:
+    #
+    #   { 'html' => 'startTagHtml',
+    #     'base' => 'startTagBaseLinkMeta',
+    #     'link' => 'startTagBaseLinkMeta',
+    #     'meta' => 'startTagBaseLinkMeta',
+    #     'li'   => 'startTagListItem',
+    #     'dt'   => 'startTagListItem',
+    #     'dd'   => 'startTagListItem'  }
+    #
+    def self.tag_handlers(prefix, *tags)
+      mapping = {}
+      if tags.last.is_a?(Hash)
+        tags.pop.each do |names, handler_method_suffix|
+          handler_method = prefix + handler_method_suffix
+          Array(names).each { |name| mapping[name] = handler_method }
+        end
+      end
+      tags.each do |names|
+        names = Array(names)
+        handler_method = prefix + names.map { |name| name.capitalize }.join
+        names.each { |name| mapping[name] = handler_method }
+      end
+      return mapping
+    end
+
+    def self.start_tag_handlers
+      @start_tag_handlers ||= Hash.new('startTagOther')
+    end
+
+    # Declare what start tags this Phase handles. Can be called more than once.
+    #
+    # Example usage:
+    #
+    #   handle_start 'html'
+    #   # html start tags will be handled by a method named 'startTagHtml'
+    #
+    #   handle_start %( base link meta )
+    #   # base, link and meta start tags will be handled by a method named 'startTagBaseLinkMeta'
+    #
+    #   handle_start %( li dt dd ) => 'ListItem'
+    #   # li, dt, and dd start tags will be handled by a method named 'startTagListItem'
+    #
+    def self.handle_start(*tags)
+      start_tag_handlers.update tag_handlers('startTag', *tags)
+    end
+
+    def self.end_tag_handlers
+      @end_tag_handlers ||= Hash.new('endTagOther')
+    end
+
+    # Declare what end tags this Phase handles. Behaves like handle_start.
+    #
+    def self.handle_end(*tags)
+      end_tag_handlers.update tag_handlers('endTag', *tags)
+    end
+
+    def initialize(parser, tree)
+      @parser, @tree = parser, tree
+    end
+
+    def processEOF
+      @tree.generateImpliedEndTags
+
+      if @tree.openElements.length > 2
+        @parser.parseError(_('Unexpected end of file. Missing closing tags.'))
+      elsif @tree.openElements.length == 2 and @tree.openElements[1].name != 'body'
+        # This happens for framesets or something?
+        @parser.parseError(_("Unexpected end of file. Expected end tag (#{@tree.openElements[1].name}) first."))
+      elsif @parser.innerHTML and @tree.openElements.length > 1 
+        # XXX This is not what the specification says. Not sure what to do here.
+        @parser.parseError(_('XXX innerHTML EOF'))
+      end
+      # Betting ends.
+    end
+
+    def processComment(data)
+      # For most phases the following is correct. Where it's not it will be
+      # overridden.
+      @tree.insertComment(data, @tree.openElements[-1])
+    end
+
+    def processDoctype(name, error)
+      @parser.parseError(_('Unexpected DOCTYPE. Ignored.'))
+    end
+
+    def processSpaceCharacters(data)
+      @tree.insertText(data)
+    end
+
+    def processStartTag(name, attributes)
+      send self.class.start_tag_handlers[name], name, attributes
+    end
+
+    def startTagHtml(name, attributes)
+      if @parser.firstStartTag == false and name == 'html'
+         @parser.parseError(_('html needs to be the first start tag.'))
+      end
+      # XXX Need a check here to see if the first start tag token emitted is
+      # this token... If it's not, invoke @parser.parseError.
+      attributes.each do |attr, value|
+        unless @tree.openElements[0].attributes.has_key?(attr)
+          @tree.openElements[0].attributes[attr] = value
+        end
+      end
+      @parser.firstStartTag = false
+    end
+
+    def processEndTag(name)
+      send self.class.end_tag_handlers[name], name
+    end
+
+    def _(string)
+      string
+    end
+
+    def assert(value)
+      throw AssertionError.new unless value
+    end
+
+    def in_scope?(*args)
+      @tree.elementInScope(*args)
+    end
+
+    def remove_open_elements_until(name=nil)
+      finished = false
+      until finished
+        element = @tree.openElements.pop
+        finished = name.nil?? yield(element) : element.name == name
+      end
+      return element
+    end
+
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/root_element_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/root_element_phase.rb
@ -0,0 +1,43 @@
+require 'html5lib/html5parser/phase'
+
+module HTML5lib
+  class RootElementPhase < Phase
+
+    def processEOF
+      insertHtmlElement
+      @parser.phase.processEOF
+    end
+
+    def processComment(data)
+      @tree.insertComment(data, @tree.document)
+    end
+
+    def processSpaceCharacters(data)
+      @tree.insertText(data, @tree.document)
+    end
+
+    def processCharacters(data)
+      insertHtmlElement
+      @parser.phase.processCharacters(data)
+    end
+
+    def processStartTag(name, attributes)
+      @parser.firstStartTag = true if name == 'html'
+      insertHtmlElement
+      @parser.phase.processStartTag(name, attributes)
+    end
+
+    def processEndTag(name)
+      insertHtmlElement
+      @parser.phase.processEndTag(name)
+    end
+
+    def insertHtmlElement
+      element = @tree.createElement('html', {})
+      @tree.openElements.push(element)
+      @tree.document.appendChild(element)
+      @parser.phase = @parser.phases[:beforeHead]
+    end
+
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/trailing_end_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/html5parser/trailing_end_phase.rb
@ -0,0 +1,36 @@
+require 'html5lib/html5parser/phase'
+
+module HTML5lib
+  class TrailingEndPhase < Phase
+
+    def processEOF
+    end
+
+    def processComment(data)
+      @tree.insertComment(data, @tree.document)
+    end
+
+    def processSpaceCharacters(data)
+      @parser.lastPhase.processSpaceCharacters(data)
+    end
+
+    def processCharacters(data)
+      @parser.parseError(_('Unexpected non-space characters. Expected end of file.'))
+      @parser.phase = @parser.lastPhase
+      @parser.phase.processCharacters(data)
+    end
+
+    def processStartTag(name, attributes)
+      @parser.parseError(_('Unexpected start tag (#{name}). Expected end of file.'))
+      @parser.phase = @parser.lastPhase
+      @parser.phase.processStartTag(name, attributes)
+    end
+
+    def processEndTag(name)
+      @parser.parseError(_('Unexpected end tag (#{name}). Expected end of file.'))
+      @parser.phase = @parser.lastPhase
+      @parser.phase.processEndTag(name)
+    end
+
+  end
+end
--- a/vendor/plugins/HTML5lib/lib/html5lib/inputstream.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/inputstream.rb
@ -10,7 +10,7 @@ module HTML5lib

  class HTMLInputStream

-    attr_accessor :queue, :charEncoding
+    attr_accessor :queue, :char_encoding

    # Initialises the HTMLInputStream.
    # 
@ -28,16 +28,16 @@ class HTMLInputStream

    def initialize(source, options = {})
      @encoding = nil
-        @parseMeta = true
+      @parse_meta = true
      @chardet = true

      options.each { |name, value| instance_variable_set("@#{name}", value) }

      # List of where new lines occur
-        @newLines = []
+      @new_lines = []

      # Raw Stream
-        @rawStream = openStream(source)
+      @raw_stream = open_stream(source)

      # Encoding Information
      #Number of bytes to use when looking for a meta element with
@ -47,15 +47,15 @@ class HTMLInputStream
      @DEFAULT_ENCODING = 'windows-1252'
    
      #Detect encoding iff no explicit "transport level" encoding is supplied
-        if @encoding.nil? or not HTML5lib.isValidEncoding(@encoding)
-            @charEncoding = detectEncoding
+      if @encoding.nil? or not HTML5lib.is_valid_encoding(@encoding)
+        @char_encoding = detect_encoding
      else
-            @charEncoding = @encoding
+        @char_encoding = @encoding
      end

      # Read bytes from stream decoding them into Unicode
-        uString = @rawStream.read
-        unless @charEncoding == 'utf-8'
+      uString = @raw_stream.read
+      unless @char_encoding == 'utf-8'
        begin
          require 'iconv'
          uString = Iconv.iconv('utf-8', @encoding, uString)[0]
@ -68,7 +68,7 @@ class HTMLInputStream
      uString.gsub!("\x00", [0xFFFD].pack('U'))

      # Convert the unicode string into a list to be used as the data stream
-        @dataStream = uString
+      @data_stream = uString

      @queue = []

@ -79,7 +79,7 @@ class HTMLInputStream
    # Produces a file object from source.
    #
    # source can be either a file object, local filename or a string.
-    def openStream(source)
+    def open_stream(source)
      # Already an IO like object
      if source.respond_to?(:read)
        @stream = source
@ -90,24 +90,24 @@ class HTMLInputStream
      return @stream
    end

-    def detectEncoding
+    def detect_encoding

      #First look for a BOM
      #This will also read past the BOM if present
-        encoding = detectBOM
+      encoding = detect_bom
      #If there is no BOM need to look for meta elements with encoding 
      #information
-        if encoding.nil? and @parseMeta
-            encoding = detectEncodingMeta
+      if encoding.nil? and @parse_meta
+        encoding = detect_encoding_meta
      end
      #Guess with chardet, if avaliable
      if encoding.nil? and @chardet
        begin
          require 'rubygems'
          require 'UniversalDetector' # gem install chardet
-                buffer = @rawStream.read
+          buffer = @raw_stream.read
          encoding = UniversalDetector::chardet(buffer)['encoding']
-                @rawStream = openStream(buffer)
+          @raw_stream = open_stream(buffer)
        rescue LoadError
        end
      end
@ -117,10 +117,10 @@ class HTMLInputStream
      end
    
      #Substitute for equivalent encodings:
-        encodingSub = {'ascii' => 'windows-1252', 'iso-8859-1' => 'windows-1252'}
+      encoding_sub = {'ascii' => 'windows-1252', 'iso-8859-1' => 'windows-1252'}

-        if encodingSub.has_key?(encoding.downcase)
-            encoding = encodingSub[encoding.downcase]
+      if encoding_sub.has_key?(encoding.downcase)
+        encoding = encoding_sub[encoding.downcase]
      end

      return encoding
@ -129,8 +129,8 @@ class HTMLInputStream
    # Attempts to detect at BOM at the start of the stream. If
    # an encoding can be determined from the BOM return the name of the
    # encoding otherwise return nil
-    def detectBOM
-        bomDict = {
+    def detect_bom
+      bom_dict = {
        "\xef\xbb\xbf" => 'utf-8',
        "\xff\xfe" => 'utf-16-le',
        "\xfe\xff" => 'utf-16-be',
@ -139,19 +139,19 @@ class HTMLInputStream
      }

      # Go to beginning of file and read in 4 bytes
-        @rawStream.seek(0)
-        string = @rawStream.read(4)
+      @raw_stream.seek(0)
+      string = @raw_stream.read(4)
      return nil unless string

      # Try detecting the BOM using bytes from the string
-        encoding = bomDict[string[0...3]]          # UTF-8
+      encoding = bom_dict[string[0...3]]      # UTF-8
      seek = 3
      unless encoding
        # Need to detect UTF-32 before UTF-16
-            encoding = bomDict[string]             # UTF-32
+        encoding = bom_dict[string]       # UTF-32
        seek = 4
        unless encoding
-                encoding = bomDict[string[0...2]]  # UTF-16
+          encoding = bom_dict[string[0...2]]  # UTF-16
          seek = 2
        end
      end
@ -159,36 +159,36 @@ class HTMLInputStream
      #AT - move this to the caller?
      # Set the read position past the BOM if one was found, otherwise
      # set it to the start of the stream
-        @rawStream.seek(encoding ? seek : 0)
+      @raw_stream.seek(encoding ? seek : 0)

      return encoding
    end

    # Report the encoding declared by the meta element
-    def detectEncodingMeta
-        parser = EncodingParser.new(@rawStream.read(@NUM_BYTES_META))
-        @rawStream.seek(0)
-        return parser.getEncoding
+    def detect_encoding_meta
+      parser = EncodingParser.new(@raw_stream.read(@NUM_BYTES_META))
+      @raw_stream.seek(0)
+      return parser.get_encoding
    end

-    def determineNewLines
+    def determine_new_lines
      # Looks through the stream to find where new lines occur so
      # the position method can tell where it is.
-        @newLines.push(0)
-        (0...@dataStream.length).each { |i| @newLines.push(i) if @dataStream[i] == ?\n }
+      @new_lines.push(0)
+      (0...@data_stream.length).each { |i| @new_lines.push(i) if @data_stream[i] == ?\n }
    end

    # Returns (line, col) of the current position in the stream.
    def position
      # Generate list of new lines first time around
-        determineNewLines if @newLines.empty?
+      determine_new_lines if @new_lines.empty?
      line = 0
      tell = @tell
-        @newLines.each do |pos|
+      @new_lines.each do |pos|
        break unless pos < tell
        line += 1
      end
-        col = tell - @newLines[line-1] - 1
+      col = tell - @new_lines[line-1] - 1
      return [line, col]
    end

@ -205,7 +205,7 @@ class HTMLInputStream
      else
        begin
          @tell += 1
-                return @dataStream[@tell - 1].chr
+          return @data_stream[@tell - 1].chr
        rescue
          return :EOF
        end
@ -215,22 +215,22 @@ class HTMLInputStream
    # Returns a string of characters from the stream up to but not
    # including any character in characters or EOF. characters can be
    # any container that supports the in method being called on it.
-    def charsUntil(characters, opposite = false)
-        charStack = [char]
+    def chars_until(characters, opposite=false)
+      char_stack = [char]

-        unless charStack[0] == :EOF
-            while (characters.include? charStack[-1]) == opposite
+      unless char_stack[0] == :EOF
+        while (characters.include? char_stack[-1]) == opposite
          unless @queue.empty?
            # First from the queue
-                    charStack.push(@queue.shift)
-                    break if charStack[-1] == :EOF
+            char_stack.push(@queue.shift)
+            break if char_stack[-1] == :EOF
          else
            # Then the rest
            begin
-                        charStack.push(@dataStream[@tell].chr)
+              char_stack.push(@data_stream[@tell].chr)
              @tell += 1
            rescue
-                        charStack.push(:EOF)
+              char_stack.push(:EOF)
              break
            end
          end
@ -239,8 +239,8 @@ class HTMLInputStream

      # Put the character stopped on back to the front of the queue
      # from where it came.
-        @queue.insert(0, charStack.pop)
-        return charStack.join('')
+      @queue.insert(0, char_stack.pop)
+      return char_stack.join('')
    end
  end

@ -263,14 +263,14 @@ class EncodingBytes < String
    rescue EOF
    end
  
-    def currentByte
+    def current_byte
      raise EOF if @position >= length
      return self[@position].chr
    end
  
    # Skip past a list of characters
    def skip(chars=SPACE_CHARACTERS)
-        while chars.include?(currentByte)
+      while chars.include?(current_byte)
        @position += 1
      end
    end
@ -278,7 +278,7 @@ class EncodingBytes < String
    # Look for a sequence of bytes at the start of a string. If the bytes 
    # are found return true and advance the position to the byte after the 
    # match. Otherwise return false and leave the position alone
-    def matchBytes(bytes, lower = false)
+    def match_bytes(bytes, lower=false)
      data = self[position ... position+bytes.length]
      data.downcase! if lower
      rv = (data == bytes)
@ -288,10 +288,10 @@ class EncodingBytes < String
  
    # Look for the next sequence of bytes matching a given sequence. If
    # a match is found advance the position to the last byte of the match
-    def jumpTo(bytes)
-        newPosition = self[position .. -1].index(bytes)
-        if newPosition
-            @position += (newPosition + bytes.length-1)
+    def jump_to(bytes)
+      new_position = self[position .. -1].index(bytes)
+      if new_position
+        @position += (new_position + bytes.length-1)
        return true
      else
        raise EOF
@ -300,8 +300,8 @@ class EncodingBytes < String
  
    # Move the pointer so it points to the next byte in a set of possible
    # bytes
-    def findNext(byteList)
-        until byteList.include?(currentByte)
+    def find_next(byte_list)
+      until byte_list.include?(current_byte)
        @position += 1
      end
    end
@ -317,139 +317,139 @@ class EncodingParser
    end

    @@method_dispatch = [
-        ['<!--', :handleComment],
-        ['<meta', :handleMeta],
-        ['</', :handlePossibleEndTag],
-        ['<!', :handleOther],
-        ['<?', :handleOther],
-        ['<', :handlePossibleStartTag]
+      ['<!--', :handle_comment],
+      ['<meta', :handle_meta],
+      ['</', :handle_possible_end_tag],
+      ['<!', :handle_other],
+      ['<?', :handle_other],
+      ['<', :handle_possible_start_tag]
    ]

-    def getEncoding
+    def get_encoding
      @data.each do |byte|
-            keepParsing = true
+        keep_parsing = true
        @@method_dispatch.each do |(key, method)|
-                if @data.matchBytes(key, lower = true)
-                    keepParsing = send(method)    
+          if @data.match_bytes(key, lower = true)
+            keep_parsing = send(method)
            break
          end
        end
-            break unless keepParsing
+        break unless keep_parsing
      end
      @encoding = @encoding.strip unless @encoding.nil?
      return @encoding
    end

    # Skip over comments
-    def handleComment
-        return @data.jumpTo('-->')
+    def handle_comment
+      return @data.jump_to('-->')
    end

-    def handleMeta
+    def handle_meta
      # if we have <meta not followed by a space so just keep going
-        return true unless SPACE_CHARACTERS.include?(@data.currentByte)
+      return true unless SPACE_CHARACTERS.include?(@data.current_byte)

      #We have a valid meta element we want to search for attributes
      while true
        #Try to find the next attribute after the current position
-            attr = getAttribute
+        attr = get_attribute

        return true if attr.nil?
        
        if attr[0] == 'charset'
-                tentativeEncoding = attr[1]
-                if HTML5lib.isValidEncoding(tentativeEncoding)
-                    @encoding = tentativeEncoding    
+          tentative_encoding = attr[1]
+          if HTML5lib.is_valid_encoding(tentative_encoding)
+            @encoding = tentative_encoding  
            return false
          end
        elsif attr[0] == 'content'
-                contentParser = ContentAttrParser.new(EncodingBytes.new(attr[1]))
-                tentativeEncoding = contentParser.parse
-                if HTML5lib.isValidEncoding(tentativeEncoding)
-                    @encoding = tentativeEncoding    
+          content_parser = ContentAttrParser.new(EncodingBytes.new(attr[1]))
+          tentative_encoding = content_parser.parse
+          if HTML5lib.is_valid_encoding(tentative_encoding)
+            @encoding = tentative_encoding
            return false
          end
        end
      end
    end

-    def handlePossibleStartTag
-        return handlePossibleTag(false)
+    def handle_possible_start_tag
+      return handle_possible_tag(false)
    end

-    def handlePossibleEndTag
+    def handle_possible_end_tag
      @data.position += 1
-        return handlePossibleTag(true)
+      return handle_possible_tag(true)
    end

-    def handlePossibleTag(endTag)
-        unless ASCII_LETTERS.include?(@data.currentByte)
+    def handle_possible_tag(end_tag)
+      unless ASCII_LETTERS.include?(@data.current_byte)
        #If the next byte is not an ascii letter either ignore this
        #fragment (possible start tag case) or treat it according to 
        #handleOther
-            if endTag
+        if end_tag
          @data.position -= 1
-                handleOther
+          handle_other
        end
        return true
      end
    
-        @data.findNext(SPACE_CHARACTERS + ['<', '>'])
+      @data.find_next(SPACE_CHARACTERS + ['<', '>'])

-        if @data.currentByte == '<'
+      if @data.current_byte == '<'
        #return to the first step in the overall "two step" algorithm
        #reprocessing the < byte
        @data.position -= 1  
      else
        #Read all attributes
-            {} until getAttribute.nil?
+        {} until get_attribute.nil?
      end
      return true
    end

-    def handleOther
-        return @data.jumpTo('>')
+    def handle_other
+      return @data.jump_to('>')
    end

    # Return a name,value pair for the next attribute in the stream,
    # if one is found, or nil
-    def getAttribute
+    def get_attribute
      @data.skip(SPACE_CHARACTERS + ['/'])

-        if @data.currentByte == '<'
+      if @data.current_byte == '<'
        @data.position -= 1
        return nil
-        elsif @data.currentByte == '>'
+      elsif @data.current_byte == '>'
        return nil
      end

-        attrName = []
-        attrValue = []
-        spaceFound = false
+      attr_name = []
+      attr_value = []
+      space_found = false
      #Step 5 attribute name
      while true
-            if @data.currentByte == '=' and attrName:   
+        if @data.current_byte == '=' and attr_name:
          break
-            elsif SPACE_CHARACTERS.include?(@data.currentByte)
-                spaceFound = true
+        elsif SPACE_CHARACTERS.include?(@data.current_byte)
+          space_found = true
          break
-            elsif ['/', '<', '>'].include?(@data.currentByte)
-                return [attrName.join(''), '']
-            elsif ASCII_UPPERCASE.include?(@data.currentByte)
-                attrName.push(@data.currentByte.downcase)
+        elsif ['/', '<', '>'].include?(@data.current_byte)
+          return [attr_name.join(''), '']
+        elsif ASCII_UPPERCASE.include?(@data.current_byte)
+          attr_name.push(@data.current_byte.downcase)
        else
-                attrName.push(@data.currentByte)
+          attr_name.push(@data.current_byte)
        end
        #Step 6
        @data.position += 1
      end
      #Step 7
-        if spaceFound
+      if space_found
        @data.skip
        #Step 8
-            unless @data.currentByte == '='
+        unless @data.current_byte == '='
          @data.position -= 1
-                return [attrName.join(''), '']
+          return [attr_name.join(''), '']
        end
      end
      #XXX need to advance position in both spaces and value case
@ -458,38 +458,38 @@ class EncodingParser
      #Step 10
      @data.skip
      #Step 11
-        if ["'", '"'].include?(@data.currentByte)
+      if ["'", '"'].include?(@data.current_byte)
        #11.1
-            quoteChar = @data.currentByte
+        quote_char = @data.current_byte
        while true
          @data.position+=1
          #11.3
-                if @data.currentByte == quoteChar
+          if @data.current_byte == quote_char
            @data.position += 1
-                    return [attrName.join(''), attrValue.join('')]
+            return [attr_name.join(''), attr_value.join('')]
          #11.4
-                elsif ASCII_UPPERCASE.include?(@data.currentByte)
-                    attrValue.push(@data.currentByte.downcase)
+          elsif ASCII_UPPERCASE.include?(@data.current_byte)
+            attr_value.push(@data.current_byte.downcase)
          #11.5
          else
-                    attrValue.push(@data.currentByte)
+            attr_value.push(@data.current_byte)
          end
        end
-        elsif ['>', '<'].include?(@data.currentByte)
-            return [attrName.join(''), '']
-        elsif ASCII_UPPERCASE.include?(@data.currentByte)
-            attrValue.push(@data.currentByte.downcase)
+      elsif ['>', '<'].include?(@data.current_byte)
+        return [attr_name.join(''), '']
+      elsif ASCII_UPPERCASE.include?(@data.current_byte)
+        attr_value.push(@data.current_byte.downcase)
      else
-            attrValue.push(@data.currentByte)
+        attr_value.push(@data.current_byte)
      end
      while true
        @data.position += 1
-            if (SPACE_CHARACTERS + ['>', '<']).include?(@data.currentByte)
-                return [attrName.join(''), attrValue.join('')]
-            elsif ASCII_UPPERCASE.include?(@data.currentByte)
-                attrValue.push(@data.currentByte.downcase)
+        if (SPACE_CHARACTERS + ['>', '<']).include?(@data.current_byte)
+          return [attr_name.join(''), attr_value.join('')]
+        elsif ASCII_UPPERCASE.include?(@data.current_byte)
+          attr_value.push(@data.current_byte.downcase)
        else
-                attrValue.push(@data.currentByte)
+          attr_value.push(@data.current_byte)
        end
      end
    end
@ -499,40 +499,41 @@ class ContentAttrParser
    def initialize(data)
      @data = data
    end
+
    def parse
      begin
        #Skip to the first ";"
        @data.position = 0
-            @data.jumpTo(';')
+        @data.jump_to(';')
        @data.position += 1
        @data.skip
        #Check if the attr name is charset 
        #otherwise return
-            @data.jumpTo('charset')
+        @data.jump_to('charset')
        @data.position += 1
        @data.skip
-            unless @data.currentByte == '='
+        unless @data.current_byte == '='
          #If there is no = sign keep looking for attrs
          return nil
        end
        @data.position += 1
        @data.skip
        #Look for an encoding between matching quote marks
-            if ['"', "'"].include?(@data.currentByte)
-                quoteMark = @data.currentByte
+        if ['"', "'"].include?(@data.current_byte)
+          quote_mark = @data.current_byte
          @data.position += 1
-                oldPosition = @data.position
-                @data.jumpTo(quoteMark)
-                return @data[oldPosition ... @data.position]
+          old_position = @data.position
+          @data.jump_to(quote_mark)
+          return @data[old_position ... @data.position]
        else
          #Unquoted value
-                oldPosition = @data.position
+          old_position = @data.position
          begin
-                    @data.findNext(SPACE_CHARACTERS)
-                    return @data[oldPosition ... @data.position]
+            @data.find_next(SPACE_CHARACTERS)
+            return @data[old_position ... @data.position]
          rescue EOF
            #Return the whole remaining value
-                    return @data[oldPosition .. -1]
+            return @data[old_position .. -1]
          end
        end
      rescue EOF
@ -542,7 +543,7 @@ class ContentAttrParser
  end

  # Determine if a string is a supported encoding
-def self.isValidEncoding(encoding)
+  def self.is_valid_encoding(encoding)
    (not encoding.nil? and encoding.kind_of?(String) and ENCODINGS.include?(encoding.downcase.strip))
  end

--- a/vendor/plugins/HTML5lib/lib/html5lib/sanitizer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/sanitizer.rb
@ -144,7 +144,6 @@ class HTMLSanitizer < HTMLTokenizer
        else
          yield token
        end
-
      end
    end

--- a/vendor/plugins/HTML5lib/lib/html5lib/tokenizer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/tokenizer.rb
@ -147,7 +147,7 @@ class HTMLTokenizer
        charAsInt = 65533
      end

-        if charAsInt <= 0x10FFF
+      if charAsInt <= 0x10FFFF
        char = [charAsInt].pack('U')
      else
        @tokenQueue.push({:type => :ParseError, :data =>
@ -261,13 +261,11 @@ class HTMLTokenizer
      @state = @states[:data]
    end

-
    # Below are the various tokenizer states worked out.

    # XXX AT Perhaps we should have Hixie run some evaluation on billions of
    # documents to figure out what the order of the various if and elsif
    # statements should be.
-
    def dataState
      data = @stream.char
      if data == "&" and (@contentModelFlag == :PCDATA or
@ -285,10 +283,10 @@ class HTMLTokenizer
        # XXX need to check if we don't need a special "spaces" flag on
        # characters.
        @tokenQueue.push({:type => :SpaceCharacters, :data =>
-              data + @stream.charsUntil(SPACE_CHARACTERS, true)})
+          data + @stream.chars_until(SPACE_CHARACTERS, true)})
      else
        @tokenQueue.push({:type => :Characters, :data => 
-              data + @stream.charsUntil(["&", "<"])})
+          data + @stream.chars_until(["&", "<"])})
      end
      return true
    end
@ -430,7 +428,7 @@ class HTMLTokenizer
        emitCurrentToken
      elsif ASCII_LETTERS.include? data
        @currentToken[:name] += data +\
-              @stream.charsUntil(ASCII_LETTERS, true)
+          @stream.chars_until(ASCII_LETTERS, true)
      elsif data == ">"
        emitCurrentToken
      elsif data == "<"
@ -450,7 +448,7 @@ class HTMLTokenizer
    def beforeAttributeNameState
      data = @stream.char
      if SPACE_CHARACTERS.include? data
-            @stream.charsUntil(SPACE_CHARACTERS, true)
+        @stream.chars_until(SPACE_CHARACTERS, true)
      elsif data == :EOF
        @tokenQueue.push({:type => :ParseError, :data =>
          _("Unexpected end of file. Expected attribute name instead.")})
@ -486,7 +484,7 @@ class HTMLTokenizer
        leavingThisState = false
      elsif ASCII_LETTERS.include? data
        @currentToken[:data][-1][0] += data +\
-              @stream.charsUntil(ASCII_LETTERS, true)
+          @stream.chars_until(ASCII_LETTERS, true)
        leavingThisState = false
      elsif data == ">"
        # XXX If we emit here the attributes are converted to a dict
@ -529,7 +527,7 @@ class HTMLTokenizer
    def afterAttributeNameState
      data = @stream.char
      if SPACE_CHARACTERS.include? data
-            @stream.charsUntil(SPACE_CHARACTERS, true)
+        @stream.chars_until(SPACE_CHARACTERS, true)
      elsif data == "="
        @state = @states[:beforeAttributeValue]
      elsif data == ">"
@ -559,7 +557,7 @@ class HTMLTokenizer
    def beforeAttributeValueState
      data = @stream.char
      if SPACE_CHARACTERS.include? data
-            @stream.charsUntil(SPACE_CHARACTERS, true)
+        @stream.chars_until(SPACE_CHARACTERS, true)
      elsif data == "\""
        @state = @states[:attributeValueDoubleQuoted]
      elsif data == "&"
@ -597,7 +595,7 @@ class HTMLTokenizer
        emitCurrentToken
      else
        @currentToken[:data][-1][1] += data +\
-              @stream.charsUntil(["\"", "&"])
+          @stream.chars_until(["\"", "&"])
      end
      return true
    end
@ -614,7 +612,7 @@ class HTMLTokenizer
        emitCurrentToken
      else
        @currentToken[:data][-1][1] += data +\
-              @stream.charsUntil(["'", "&"])
+          @stream.chars_until(["'", "&"])
      end
      return true
    end
@ -638,17 +636,17 @@ class HTMLTokenizer
        emitCurrentToken
      else
        @currentToken[:data][-1][1] += data + 
-              @stream.charsUntil(["&", ">","<"] + SPACE_CHARACTERS)
+          @stream.chars_until(["&", ">","<"] + SPACE_CHARACTERS)
      end
      return true
    end

    def bogusCommentState
      # Make a new comment token and give it as value all the characters
-        # until the first > or :EOF (charsUntil checks for :EOF automatically)
+      # until the first > or :EOF (chars_until checks for :EOF automatically)
      # and emit it.
      @tokenQueue.push(
-          {:type => :Comment, :data => @stream.charsUntil((">"))})
+        {:type => :Comment, :data => @stream.chars_until((">"))})

      # Eat the character directly after the bogus comment which is either a
      # ">" or an :EOF.
@ -690,7 +688,7 @@ class HTMLTokenizer
        @tokenQueue.push(@currentToken)
        @state = @states[:data]
      else
-            @currentToken[:data] += data + @stream.charsUntil("-")
+        @currentToken[:data] += data + @stream.chars_until("-")
      end
      return true
    end
@ -706,7 +704,7 @@ class HTMLTokenizer
        @state = @states[:data]
      else
        @currentToken[:data] += "-" + data +\
-              @stream.charsUntil("-")
+          @stream.chars_until("-")
        # Consume the next character which is either a "-" or an :EOF as
        # well so if there's a "-" directly after the "-" we go nicely to
        # the "comment end state" without emitting a ParseError there.
--- a/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/hpricot.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/hpricot.rb
@ -89,13 +89,16 @@ class Element < Node
          def initialize(hpricot)
            @hpricot = hpricot
          end
+
          def []=(k, v)
            @hpricot.stag.send(stag_attributes_method)[k] = v
          end
+
          def stag_attributes_method
            # STag#attributes changed to STag#raw_attributes after Hpricot 0.5
            @hpricot.stag.respond_to?(:raw_attributes) ? :raw_attributes : :attributes
          end
+
          def method_missing(*a, &b)
            @hpricot.attributes.send(*a, &b)
          end
--- a/vendor/plugins/HTML5lib/tests/preamble.rb
+++ b/vendor/plugins/HTML5lib/tests/preamble.rb
@ -9,3 +9,15 @@ $:.unshift File.dirname(__FILE__)
 def html5lib_test_files(subdirectory)
  Dir[File.join(HTML5LIB_BASE, 'tests', subdirectory, '*.*')]
 end
+
+begin
+  require 'jsonx'
+rescue LoadError
+  class JSON
+    def self.parse json
+      json.gsub! /"\s*:/, '"=>'
+      json.gsub!(/\\u[0-9a-fA-F]{4}/) {|x| [x[2..-1].to_i(16)].pack('U')}
+      eval json
+    end
+  end
+end
--- a/vendor/plugins/HTML5lib/tests/test_encoding.rb
+++ b/vendor/plugins/HTML5lib/tests/test_encoding.rb
@ -11,7 +11,7 @@ begin
    def test_chardet
        File.open(File.join(HTML5LIB_BASE, 'tests', 'encoding', 'chardet', 'test_big5.txt')) do |file|
            stream = HTML5lib::HTMLInputStream.new(file, :chardet => true)
-            assert_equal 'big5', stream.charEncoding.downcase
+            assert_equal 'big5', stream.char_encoding.downcase
        end
    end
 rescue LoadError
@ -28,7 +28,7 @@ end

            define_method 'test_%s_%d' % [ test_name, index + 1 ] do
                stream = HTML5lib::HTMLInputStream.new(input, :chardet => false)
-                assert_equal encoding.downcase, stream.charEncoding.downcase, input
+                assert_equal encoding.downcase, stream.char_encoding.downcase, input
            end
        end
    end
--- a/vendor/plugins/HTML5lib/tests/test_sanitizer.rb
+++ b/vendor/plugins/HTML5lib/tests/test_sanitizer.rb
@ -203,4 +203,8 @@ class SanitizeTest < Test::Unit::TestCase
       sanitize_html(%(<img src='vbscript:msgbox("XSS")' />))
  end

+  def test_should_handle_astral_plane_characters
+    assert_equal "<p>\360\235\222\265 \360\235\224\270</p>",
+      sanitize_html("<p>&#x1d4b5; &#x1d538;</p>")
+  end
 end
--- a/vendor/plugins/HTML5lib/tests/test_tokenizer.rb
+++ b/vendor/plugins/HTML5lib/tests/test_tokenizer.rb
@ -4,18 +4,6 @@ require 'html5lib/tokenizer'

 require 'tokenizer_test_parser'

-begin
-  require 'jsonx'
-rescue LoadError
-  class JSON
-    def self.parse json
-      json.gsub! /"\s*:/, '"=>'
-      json.gsub!(/\\u[0-9a-fA-F]{4}/) {|x| [x[2..-1].to_i(16)].pack('U')}
-      eval json
-    end
-  end
-end 
-
 class Html5TokenizerTestCase < Test::Unit::TestCase

  def type_of?(token_name, token)