diff --git a/vendor/plugins/HTML5lib/bin/html5 b/vendor/plugins/HTML5lib/bin/html5 index bc0514ad..870e5117 100755 --- a/vendor/plugins/HTML5lib/bin/html5 +++ b/vendor/plugins/HTML5lib/bin/html5 @@ -1,5 +1,6 @@ #!/usr/bin/env ruby +require 'core_ext/string' $:.unshift File.dirname(__FILE__), 'lib' def parse(opts, args) @@ -82,7 +83,7 @@ def print_output(parser, document, opts) if opts.error errList=[] for pos, errorcode, datavars in parser.errors - errList << "Line %i Col %i"%pos + " " + constants.E.get(errorcode, 'Unknown error "%s"' % errorcode) % datavars + errList << "Line #{pos[0]} Col #{pos[1]} " + (HTML5::E[errorcode] || "Unknown error \"#{errorcode}\"") % datavars end $stdout.write("\nParse errors:\n" + errList.join("\n")+"\n") end diff --git a/vendor/plugins/HTML5lib/lib/core_ext/string.rb b/vendor/plugins/HTML5lib/lib/core_ext/string.rb new file mode 100644 index 00000000..dec1fa4f --- /dev/null +++ b/vendor/plugins/HTML5lib/lib/core_ext/string.rb @@ -0,0 +1,17 @@ +class String + alias old_format % + define_method("%") do |data| + unless data.kind_of?(Hash) + $VERBOSE = false + r = old_format(data) + $VERBOSE = true + r + else + ret = self.clone + data.each do |k,v| + ret.gsub!(/\%\(#{k}\)/, v) + end + ret + end + end +end \ No newline at end of file diff --git a/vendor/plugins/HTML5lib/lib/html5/html5parser.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser.rb index 8a7e05f3..e6c2a8a2 100644 --- a/vendor/plugins/HTML5lib/lib/html5/html5parser.rb +++ b/vendor/plugins/HTML5lib/lib/html5/html5parser.rb @@ -69,15 +69,15 @@ module HTML5 if inner_html case @inner_html = container.downcase - when 'title', 'textarea' - @tokenizer.content_model_flag = :RCDATA - when 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript' - @tokenizer.content_model_flag = :CDATA - when 'plaintext' - @tokenizer.content_model_flag = :PLAINTEXT - else + when 'title', 'textarea' + @tokenizer.content_model_flag = :RCDATA + when 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript' + @tokenizer.content_model_flag = :CDATA + when 'plaintext' + @tokenizer.content_model_flag = :PLAINTEXT + else # content_model_flag already is PCDATA - #@tokenizer.content_model_flag = :PCDATA + @tokenizer.content_model_flag = :PCDATA end @phase = @phases[:rootElement] diff --git a/vendor/plugins/HTML5lib/lib/html5/html5parser/in_body_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_body_phase.rb index e7c30203..f0254a08 100644 --- a/vendor/plugins/HTML5lib/lib/html5/html5parser/in_body_phase.rb +++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_body_phase.rb @@ -6,45 +6,45 @@ module HTML5 # http://www.whatwg.org/specs/web-apps/current-work/#in-body handle_start 'html' - handle_start %w( base link meta script style ) => 'ProcessInHead' + handle_start %w(base link meta script style) => 'ProcessInHead' handle_start 'title' handle_start 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image' - handle_start 'input', 'textarea', 'select', 'isindex', %w( marquee object ) + handle_start 'input', 'textarea', 'select', 'isindex', %w(marquee object) - handle_start %w( li dd dt ) => 'ListItem' - - handle_start %w( address blockquote center dir div dl fieldset listing menu ol p pre ul ) => 'CloseP' + handle_start %w(li dd dt) => 'ListItem' - handle_start %w( b big em font i s small strike strong tt u ) => 'Formatting' + handle_start %w(address blockquote center dir div dl fieldset listing menu ol p pre ul) => 'CloseP' + + handle_start %w(b big em font i s small strike strong tt u) => 'Formatting' handle_start 'nobr' - handle_start %w( area basefont bgsound br embed img param spacer wbr ) => 'VoidFormatting' + handle_start %w(area basefont bgsound br embed img param spacer wbr) => 'VoidFormatting' - handle_start %w( iframe noembed noframes noscript ) => 'Cdata', HEADING_ELEMENTS => 'Heading' + handle_start %w(iframe noembed noframes noscript) => 'Cdata', HEADING_ELEMENTS => 'Heading' - handle_start %w( caption col colgroup frame frameset head option optgroup tbody td tfoot th thead tr ) => 'Misplaced' + handle_start %w(caption col colgroup frame frameset head option optgroup tbody td tfoot th thead tr) => 'Misplaced' - handle_start %w( event-source section nav article aside header footer datagrid command ) => 'New' + handle_start %w(event-source section nav article aside header footer datagrid command) => 'New' - handle_end 'p', 'body', 'html', 'form', %w( button marquee object ), %w( dd dt li ) => 'ListItem' + handle_end 'p', 'body', 'html', 'form', %w(button marquee object), %w(dd dt li) => 'ListItem' - handle_end %w( address blockquote center div dl fieldset listing menu ol pre ul ) => 'Block' + handle_end %w(address blockquote center div dl fieldset listing menu ol pre ul) => 'Block' handle_end HEADING_ELEMENTS => 'Heading' - handle_end %w( a b big em font i nobr s small strike strong tt u ) => 'Formatting' + handle_end %w(a b big em font i nobr s small strike strong tt u) => 'Formatting' - handle_end %w( head frameset select optgroup option table caption colgroup col thead tfoot tbody tr td th ) => 'Misplaced' + handle_end %w(head frameset select optgroup option table caption colgroup col thead tfoot tbody tr td th) => 'Misplaced' handle_end 'br' - handle_end %w( area basefont bgsound embed hr image img input isindex param spacer wbr frame ) => 'None' + handle_end %w(area basefont bgsound embed hr image img input isindex param spacer wbr frame) => 'None' - handle_end %w( noframes noscript noembed textarea xmp iframe ) => 'CdataTextAreaXmp' + handle_end %w(noframes noscript noembed textarea xmp iframe ) => 'CdataTextAreaXmp' - handle_end %w( event-source section nav article aside header footer datagrid command ) => 'New' + handle_end %w(event-source section nav article aside header footer datagrid command) => 'New' def initialize(parser, tree) super(parser, tree) @@ -107,7 +107,7 @@ module HTML5 def startTagBody(name, attributes) parse_error("unexpected-start-tag", {"name" => "body"}) - if (@tree.open_elements.length == 1 || @tree.open_elements[1].name != 'body') + if @tree.open_elements.length == 1 || @tree.open_elements[1].name != 'body' assert @parser.inner_html else attributes.each do |attr, value| @@ -126,11 +126,11 @@ module HTML5 def startTagForm(name, attributes) if @tree.formPointer - parse_error("Unexpected start tag (form). Ignored.") + parse_error("unexpected-start-tag", {"name" => name}) else endTagP('p') if in_scope?('p') @tree.insert_element(name, attributes) - @tree.formPointer = @tree.open_elements[-1] + @tree.formPointer = @tree.open_elements.last end end diff --git a/vendor/plugins/HTML5lib/lib/html5/html5parser/in_select_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_select_phase.rb index 619a4503..0e16e51b 100644 --- a/vendor/plugins/HTML5lib/lib/html5/html5parser/in_select_phase.rb +++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_select_phase.rb @@ -69,8 +69,7 @@ module HTML5 end def endTagTableElements(name) - parse_error("unexpected-end-tag-in-select", - {"name" => name}) + parse_error("unexpected-end-tag-in-select", {"name" => name}) if in_scope?(name, true) endTagSelect('select') @@ -79,7 +78,7 @@ module HTML5 end def endTagOther(name) - parse_error(_("Unexpected end tag token (#{name}) in the select phase. Ignored.")) + parse_error("unexpected-end-tag-in-select", {"name" => name}) end end diff --git a/vendor/plugins/HTML5lib/lib/html5/html5parser/in_table_body_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_table_body_phase.rb index b9adfa40..0685d34e 100644 --- a/vendor/plugins/HTML5lib/lib/html5/html5parser/in_table_body_phase.rb +++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_table_body_phase.rb @@ -7,7 +7,7 @@ module HTML5 handle_start 'html', 'tr', %w( td th ) => 'TableCell', %w( caption col colgroup tbody tfoot thead ) => 'TableOther' - handle_end 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th tr ) => 'Ingore' + handle_end 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th tr ) => 'Ignore' def processCharacters(data) @parser.phases[:inTable].processCharacters(data) diff --git a/vendor/plugins/HTML5lib/lib/html5/html5parser/root_element_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/root_element_phase.rb index 33990aae..22db451b 100644 --- a/vendor/plugins/HTML5lib/lib/html5/html5parser/root_element_phase.rb +++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/root_element_phase.rb @@ -33,10 +33,9 @@ module HTML5 def insert_html_element element = @tree.createElement('html', {}) - @tree.open_elements.push(element) + @tree.open_elements << element @tree.document.appendChild(element) @parser.phase = @parser.phases[:beforeHead] end - end end \ No newline at end of file diff --git a/vendor/plugins/HTML5lib/lib/html5/inputstream.rb b/vendor/plugins/HTML5lib/lib/html5/inputstream.rb index fe87bf96..f763bf69 100755 --- a/vendor/plugins/HTML5lib/lib/html5/inputstream.rb +++ b/vendor/plugins/HTML5lib/lib/html5/inputstream.rb @@ -60,15 +60,11 @@ module HTML5 if @char_encoding == 'windows-1252' @win1252 = true elsif @char_encoding != 'utf-8' + require 'iconv' begin - require 'iconv' - begin - @buffer << @raw_stream.read unless @raw_stream.eof? - @buffer = Iconv.iconv('utf-8', @char_encoding, @buffer).first - rescue - @win1252 = true - end - rescue LoadError + @buffer << @raw_stream.read unless @raw_stream.eof? + @buffer = Iconv.iconv('utf-8', @char_encoding, @buffer).first + rescue @win1252 = true end end @@ -88,12 +84,11 @@ module HTML5 def open_stream(source) # Already an IO like object if source.respond_to?(:read) - @stream = source + source else # Treat source as a string and wrap in StringIO - @stream = StringIO.new(source) + StringIO.new(source) end - return @stream end def detect_encoding @@ -138,14 +133,12 @@ module HTML5 encoding = @DEFAULT_ENCODING end - #Substitute for equivalent encodings - encoding_sub = {'iso-8859-1' => 'windows-1252'} - - if encoding_sub.has_key?(encoding.downcase) - encoding = encoding_sub[encoding.downcase] + #Substitute for equivalent encoding + if 'iso-8859-1' == encoding.downcase + encoding = 'windows-1252' end - return encoding + encoding end # Attempts to detect at BOM at the start of the stream. If @@ -153,9 +146,9 @@ module HTML5 # encoding otherwise return nil def detect_bom bom_dict = { - "\xef\xbb\xbf" => 'utf-8', - "\xff\xfe" => 'utf-16le', - "\xfe\xff" => 'utf-16be', + "\xef\xbb\xbf" => 'utf-8', + "\xff\xfe" => 'utf-16le', + "\xfe\xff" => 'utf-16be', "\xff\xfe\x00\x00" => 'utf-32le', "\x00\x00\xfe\xff" => 'utf-32be' } @@ -200,7 +193,7 @@ module HTML5 #TODO: huh? require 'delegate' - # @raw_stream = SimpleDelegator.new(@raw_stream) + @raw_stream = SimpleDelegator.new(@raw_stream) class << @raw_stream def read(chars=-1) @@ -251,7 +244,7 @@ module HTML5 col -= 1 end end - return [line+1, col] + return [line + 1, col] end # Read one character from the stream or queue if available. Return @@ -260,9 +253,9 @@ module HTML5 unless @queue.empty? return @queue.shift else - if @tell + 3 > @buffer.length and !@raw_stream.eof? + if @tell + 3 > @buffer.length && !@raw_stream.eof? # read next block - @buffer = @buffer[@tell .. -1] + @raw_stream.read(@NUM_BYTES_BUFFER) + @buffer = @buffer[@tell..-1] + @raw_stream.read(@NUM_BYTES_BUFFER) @tell = 0 end @@ -270,7 +263,7 @@ module HTML5 @tell += 1 case c - when 0x01 .. 0x7F + when 0x01..0x7F if c == 0x0D # normalize newlines @tell += 1 if @buffer[@tell] == 0x0A @@ -288,7 +281,7 @@ module HTML5 c.chr - when 0x80 .. 0xBF + when 0x80..0xBF if !@win1252 [0xFFFD].pack('U') # invalid utf-8 elsif c <= 0x9f @@ -297,10 +290,11 @@ module HTML5 "\xC2" + c.chr # convert to utf-8 end - when 0xC0 .. 0xFF + when 0xC0..0xFF if instance_variables.include?("@win1252") && @win1252 - "\xC3" + (c-64).chr # convert to utf-8 - elsif @buffer[@tell-1 .. @tell+3] =~ /^ + "\xC3" + (c - 64).chr # convert to utf-8 + # from http://www.w3.org/International/questions/qa-forms-utf-8.en.php + elsif @buffer[@tell - 1..@tell + 3] =~ /^ ( [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte diff --git a/vendor/plugins/HTML5lib/lib/html5/sanitizer.rb b/vendor/plugins/HTML5lib/lib/html5/sanitizer.rb index d1a6c430..a8bf088a 100644 --- a/vendor/plugins/HTML5lib/lib/html5/sanitizer.rb +++ b/vendor/plugins/HTML5lib/lib/html5/sanitizer.rb @@ -110,13 +110,13 @@ module HTML5 def sanitize_token(token) case token[:type] when :StartTag, :EndTag, :EmptyTag - if ALLOWED_ELEMENTS.include?(token[:name]) + if self.class.const_get("ALLOWED_ELEMENTS").include?(token[:name]) if token.has_key? :data attrs = Hash[*token[:data].flatten] - attrs.delete_if { |attr,v| !ALLOWED_ATTRIBUTES.include?(attr) } + attrs.delete_if { |attr,v| !self.class.const_get("ALLOWED_ATTRIBUTES").include?(attr) } ATTR_VAL_IS_URI.each do |attr| val_unescaped = CGI.unescapeHTML(attrs[attr].to_s).gsub(/`|[\000-\040\177\s]+|\302[\200-\240]/,'').downcase - if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ and !ALLOWED_PROTOCOLS.include?(val_unescaped.split(':')[0]) + if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ and !self.class.const_get("ALLOWED_PROTOCOLS").include?(val_unescaped.split(':')[0]) attrs.delete attr end end @@ -160,14 +160,14 @@ module HTML5 style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop, val| next if val.empty? prop.downcase! - if ALLOWED_CSS_PROPERTIES.include?(prop) + if self.class.const_get("ALLOWED_CSS_PROPERTIES").include?(prop) clean << "#{prop}: #{val};" elsif %w[background border margin padding].include?(prop.split('-')[0]) clean << "#{prop}: #{val};" unless val.split().any? do |keyword| - !ALLOWED_CSS_KEYWORDS.include?(keyword) and + !self.class.const_get("ALLOWED_CSS_KEYWORDS").include?(keyword) and keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/ end - elsif ALLOWED_SVG_PROPERTIES.include?(prop) + elsif self.class.const_get("ALLOWED_SVG_PROPERTIES").include?(prop) clean << "#{prop}: #{val};" end end diff --git a/vendor/plugins/HTML5lib/lib/html5/serializer/htmlserializer.rb b/vendor/plugins/HTML5lib/lib/html5/serializer/htmlserializer.rb index d8920b2c..467c5e95 100644 --- a/vendor/plugins/HTML5lib/lib/html5/serializer/htmlserializer.rb +++ b/vendor/plugins/HTML5lib/lib/html5/serializer/htmlserializer.rb @@ -73,7 +73,7 @@ module HTML5 elsif [:Characters, :SpaceCharacters].include? type if type == :SpaceCharacters or in_cdata if in_cdata and token[:data].include?("") - serialize_error(_("Unexpected in CDATA")) + serialize_error("Unexpected in CDATA") end result << token[:data] else diff --git a/vendor/plugins/HTML5lib/lib/html5/treebuilders/rexml.rb b/vendor/plugins/HTML5lib/lib/html5/treebuilders/rexml.rb index 2a937978..3494c70c 100644 --- a/vendor/plugins/HTML5lib/lib/html5/treebuilders/rexml.rb +++ b/vendor/plugins/HTML5lib/lib/html5/treebuilders/rexml.rb @@ -99,12 +99,13 @@ module HTML5 super nil end - def appendChild node - if node.kind_of? Element and node.name == 'html' - node.rxobj.add_namespace('http://www.w3.org/1999/xhtml') - end - super node - end + # ryansking: not sure why this was here. removing it doesn't cause any tests to fail + # def appendChild node + # if node.kind_of? Element and node.name == 'html' + # node.rxobj.add_namespace('http://www.w3.org/1999/xhtml') + # end + # super node + # end def printTree indent=0 tree = "#document" diff --git a/vendor/plugins/HTML5lib/lib/html5/treebuilders/simpletree.rb b/vendor/plugins/HTML5lib/lib/html5/treebuilders/simpletree.rb index 4e824139..e5cddfad 100644 --- a/vendor/plugins/HTML5lib/lib/html5/treebuilders/simpletree.rb +++ b/vendor/plugins/HTML5lib/lib/html5/treebuilders/simpletree.rb @@ -176,7 +176,7 @@ module HTML5 def get_fragment @document = super - @document.childNodes + @document end end diff --git a/vendor/plugins/HTML5lib/lib/html5/treewalkers/base.rb b/vendor/plugins/HTML5lib/lib/html5/treewalkers/base.rb index 3f1cc7d3..9824cf27 100644 --- a/vendor/plugins/HTML5lib/lib/html5/treewalkers/base.rb +++ b/vendor/plugins/HTML5lib/lib/html5/treewalkers/base.rb @@ -68,6 +68,14 @@ class Base end alias walk each + + def to_ary + a = [] + each do |i| + a << i + end + a + end end class NonRecursiveTreeWalker < TreeWalkers::Base diff --git a/vendor/plugins/HTML5lib/testdata/tree-construction/tests6.dat b/vendor/plugins/HTML5lib/testdata/tree-construction/tests6.dat index 29ff8040..6edef40c 100644 --- a/vendor/plugins/HTML5lib/testdata/tree-construction/tests6.dat +++ b/vendor/plugins/HTML5lib/testdata/tree-construction/tests6.dat @@ -91,3 +91,106 @@ End of file before doctype | |
| + +#data + +