Sync with latest HTML5lib

This commit is contained in:
Jacques Distler 2007-10-06 11:55:58 -05:00
parent 3a3cfeaa9b
commit 55fdc9fff4
18 changed files with 266 additions and 124 deletions

View file

@ -1,5 +1,6 @@
#!/usr/bin/env ruby #!/usr/bin/env ruby
require 'core_ext/string'
$:.unshift File.dirname(__FILE__), 'lib' $:.unshift File.dirname(__FILE__), 'lib'
def parse(opts, args) def parse(opts, args)
@ -82,7 +83,7 @@ def print_output(parser, document, opts)
if opts.error if opts.error
errList=[] errList=[]
for pos, errorcode, datavars in parser.errors for pos, errorcode, datavars in parser.errors
errList << "Line %i Col %i"%pos + " " + constants.E.get(errorcode, 'Unknown error "%s"' % errorcode) % datavars errList << "Line #{pos[0]} Col #{pos[1]} " + (HTML5::E[errorcode] || "Unknown error \"#{errorcode}\"") % datavars
end end
$stdout.write("\nParse errors:\n" + errList.join("\n")+"\n") $stdout.write("\nParse errors:\n" + errList.join("\n")+"\n")
end end

View file

@ -0,0 +1,17 @@
class String
alias old_format %
define_method("%") do |data|
unless data.kind_of?(Hash)
$VERBOSE = false
r = old_format(data)
$VERBOSE = true
r
else
ret = self.clone
data.each do |k,v|
ret.gsub!(/\%\(#{k}\)/, v)
end
ret
end
end
end

View file

@ -69,15 +69,15 @@ module HTML5
if inner_html if inner_html
case @inner_html = container.downcase case @inner_html = container.downcase
when 'title', 'textarea' when 'title', 'textarea'
@tokenizer.content_model_flag = :RCDATA @tokenizer.content_model_flag = :RCDATA
when 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript' when 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript'
@tokenizer.content_model_flag = :CDATA @tokenizer.content_model_flag = :CDATA
when 'plaintext' when 'plaintext'
@tokenizer.content_model_flag = :PLAINTEXT @tokenizer.content_model_flag = :PLAINTEXT
else else
# content_model_flag already is PCDATA # content_model_flag already is PCDATA
#@tokenizer.content_model_flag = :PCDATA @tokenizer.content_model_flag = :PCDATA
end end
@phase = @phases[:rootElement] @phase = @phases[:rootElement]

View file

@ -6,45 +6,45 @@ module HTML5
# http://www.whatwg.org/specs/web-apps/current-work/#in-body # http://www.whatwg.org/specs/web-apps/current-work/#in-body
handle_start 'html' handle_start 'html'
handle_start %w( base link meta script style ) => 'ProcessInHead' handle_start %w(base link meta script style) => 'ProcessInHead'
handle_start 'title' handle_start 'title'
handle_start 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image' handle_start 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image'
handle_start 'input', 'textarea', 'select', 'isindex', %w( marquee object ) handle_start 'input', 'textarea', 'select', 'isindex', %w(marquee object)
handle_start %w( li dd dt ) => 'ListItem' handle_start %w(li dd dt) => 'ListItem'
handle_start %w( address blockquote center dir div dl fieldset listing menu ol p pre ul ) => 'CloseP'
handle_start %w( b big em font i s small strike strong tt u ) => 'Formatting' handle_start %w(address blockquote center dir div dl fieldset listing menu ol p pre ul) => 'CloseP'
handle_start %w(b big em font i s small strike strong tt u) => 'Formatting'
handle_start 'nobr' handle_start 'nobr'
handle_start %w( area basefont bgsound br embed img param spacer wbr ) => 'VoidFormatting' handle_start %w(area basefont bgsound br embed img param spacer wbr) => 'VoidFormatting'
handle_start %w( iframe noembed noframes noscript ) => 'Cdata', HEADING_ELEMENTS => 'Heading' handle_start %w(iframe noembed noframes noscript) => 'Cdata', HEADING_ELEMENTS => 'Heading'
handle_start %w( caption col colgroup frame frameset head option optgroup tbody td tfoot th thead tr ) => 'Misplaced' handle_start %w(caption col colgroup frame frameset head option optgroup tbody td tfoot th thead tr) => 'Misplaced'
handle_start %w( event-source section nav article aside header footer datagrid command ) => 'New' handle_start %w(event-source section nav article aside header footer datagrid command) => 'New'
handle_end 'p', 'body', 'html', 'form', %w( button marquee object ), %w( dd dt li ) => 'ListItem' handle_end 'p', 'body', 'html', 'form', %w(button marquee object), %w(dd dt li) => 'ListItem'
handle_end %w( address blockquote center div dl fieldset listing menu ol pre ul ) => 'Block' handle_end %w(address blockquote center div dl fieldset listing menu ol pre ul) => 'Block'
handle_end HEADING_ELEMENTS => 'Heading' handle_end HEADING_ELEMENTS => 'Heading'
handle_end %w( a b big em font i nobr s small strike strong tt u ) => 'Formatting' handle_end %w(a b big em font i nobr s small strike strong tt u) => 'Formatting'
handle_end %w( head frameset select optgroup option table caption colgroup col thead tfoot tbody tr td th ) => 'Misplaced' handle_end %w(head frameset select optgroup option table caption colgroup col thead tfoot tbody tr td th) => 'Misplaced'
handle_end 'br' handle_end 'br'
handle_end %w( area basefont bgsound embed hr image img input isindex param spacer wbr frame ) => 'None' handle_end %w(area basefont bgsound embed hr image img input isindex param spacer wbr frame) => 'None'
handle_end %w( noframes noscript noembed textarea xmp iframe ) => 'CdataTextAreaXmp' handle_end %w(noframes noscript noembed textarea xmp iframe ) => 'CdataTextAreaXmp'
handle_end %w( event-source section nav article aside header footer datagrid command ) => 'New' handle_end %w(event-source section nav article aside header footer datagrid command) => 'New'
def initialize(parser, tree) def initialize(parser, tree)
super(parser, tree) super(parser, tree)
@ -107,7 +107,7 @@ module HTML5
def startTagBody(name, attributes) def startTagBody(name, attributes)
parse_error("unexpected-start-tag", {"name" => "body"}) parse_error("unexpected-start-tag", {"name" => "body"})
if (@tree.open_elements.length == 1 || @tree.open_elements[1].name != 'body') if @tree.open_elements.length == 1 || @tree.open_elements[1].name != 'body'
assert @parser.inner_html assert @parser.inner_html
else else
attributes.each do |attr, value| attributes.each do |attr, value|
@ -126,11 +126,11 @@ module HTML5
def startTagForm(name, attributes) def startTagForm(name, attributes)
if @tree.formPointer if @tree.formPointer
parse_error("Unexpected start tag (form). Ignored.") parse_error("unexpected-start-tag", {"name" => name})
else else
endTagP('p') if in_scope?('p') endTagP('p') if in_scope?('p')
@tree.insert_element(name, attributes) @tree.insert_element(name, attributes)
@tree.formPointer = @tree.open_elements[-1] @tree.formPointer = @tree.open_elements.last
end end
end end

View file

@ -69,8 +69,7 @@ module HTML5
end end
def endTagTableElements(name) def endTagTableElements(name)
parse_error("unexpected-end-tag-in-select", parse_error("unexpected-end-tag-in-select", {"name" => name})
{"name" => name})
if in_scope?(name, true) if in_scope?(name, true)
endTagSelect('select') endTagSelect('select')
@ -79,7 +78,7 @@ module HTML5
end end
def endTagOther(name) def endTagOther(name)
parse_error(_("Unexpected end tag token (#{name}) in the select phase. Ignored.")) parse_error("unexpected-end-tag-in-select", {"name" => name})
end end
end end

View file

@ -7,7 +7,7 @@ module HTML5
handle_start 'html', 'tr', %w( td th ) => 'TableCell', %w( caption col colgroup tbody tfoot thead ) => 'TableOther' handle_start 'html', 'tr', %w( td th ) => 'TableCell', %w( caption col colgroup tbody tfoot thead ) => 'TableOther'
handle_end 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th tr ) => 'Ingore' handle_end 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th tr ) => 'Ignore'
def processCharacters(data) def processCharacters(data)
@parser.phases[:inTable].processCharacters(data) @parser.phases[:inTable].processCharacters(data)

View file

@ -33,10 +33,9 @@ module HTML5
def insert_html_element def insert_html_element
element = @tree.createElement('html', {}) element = @tree.createElement('html', {})
@tree.open_elements.push(element) @tree.open_elements << element
@tree.document.appendChild(element) @tree.document.appendChild(element)
@parser.phase = @parser.phases[:beforeHead] @parser.phase = @parser.phases[:beforeHead]
end end
end end
end end

View file

@ -60,15 +60,11 @@ module HTML5
if @char_encoding == 'windows-1252' if @char_encoding == 'windows-1252'
@win1252 = true @win1252 = true
elsif @char_encoding != 'utf-8' elsif @char_encoding != 'utf-8'
require 'iconv'
begin begin
require 'iconv' @buffer << @raw_stream.read unless @raw_stream.eof?
begin @buffer = Iconv.iconv('utf-8', @char_encoding, @buffer).first
@buffer << @raw_stream.read unless @raw_stream.eof? rescue
@buffer = Iconv.iconv('utf-8', @char_encoding, @buffer).first
rescue
@win1252 = true
end
rescue LoadError
@win1252 = true @win1252 = true
end end
end end
@ -88,12 +84,11 @@ module HTML5
def open_stream(source) def open_stream(source)
# Already an IO like object # Already an IO like object
if source.respond_to?(:read) if source.respond_to?(:read)
@stream = source source
else else
# Treat source as a string and wrap in StringIO # Treat source as a string and wrap in StringIO
@stream = StringIO.new(source) StringIO.new(source)
end end
return @stream
end end
def detect_encoding def detect_encoding
@ -138,14 +133,12 @@ module HTML5
encoding = @DEFAULT_ENCODING encoding = @DEFAULT_ENCODING
end end
#Substitute for equivalent encodings #Substitute for equivalent encoding
encoding_sub = {'iso-8859-1' => 'windows-1252'} if 'iso-8859-1' == encoding.downcase
encoding = 'windows-1252'
if encoding_sub.has_key?(encoding.downcase)
encoding = encoding_sub[encoding.downcase]
end end
return encoding encoding
end end
# Attempts to detect at BOM at the start of the stream. If # Attempts to detect at BOM at the start of the stream. If
@ -153,9 +146,9 @@ module HTML5
# encoding otherwise return nil # encoding otherwise return nil
def detect_bom def detect_bom
bom_dict = { bom_dict = {
"\xef\xbb\xbf" => 'utf-8', "\xef\xbb\xbf" => 'utf-8',
"\xff\xfe" => 'utf-16le', "\xff\xfe" => 'utf-16le',
"\xfe\xff" => 'utf-16be', "\xfe\xff" => 'utf-16be',
"\xff\xfe\x00\x00" => 'utf-32le', "\xff\xfe\x00\x00" => 'utf-32le',
"\x00\x00\xfe\xff" => 'utf-32be' "\x00\x00\xfe\xff" => 'utf-32be'
} }
@ -200,7 +193,7 @@ module HTML5
#TODO: huh? #TODO: huh?
require 'delegate' require 'delegate'
# @raw_stream = SimpleDelegator.new(@raw_stream) @raw_stream = SimpleDelegator.new(@raw_stream)
class << @raw_stream class << @raw_stream
def read(chars=-1) def read(chars=-1)
@ -251,7 +244,7 @@ module HTML5
col -= 1 col -= 1
end end
end end
return [line+1, col] return [line + 1, col]
end end
# Read one character from the stream or queue if available. Return # Read one character from the stream or queue if available. Return
@ -260,9 +253,9 @@ module HTML5
unless @queue.empty? unless @queue.empty?
return @queue.shift return @queue.shift
else else
if @tell + 3 > @buffer.length and !@raw_stream.eof? if @tell + 3 > @buffer.length && !@raw_stream.eof?
# read next block # read next block
@buffer = @buffer[@tell .. -1] + @raw_stream.read(@NUM_BYTES_BUFFER) @buffer = @buffer[@tell..-1] + @raw_stream.read(@NUM_BYTES_BUFFER)
@tell = 0 @tell = 0
end end
@ -270,7 +263,7 @@ module HTML5
@tell += 1 @tell += 1
case c case c
when 0x01 .. 0x7F when 0x01..0x7F
if c == 0x0D if c == 0x0D
# normalize newlines # normalize newlines
@tell += 1 if @buffer[@tell] == 0x0A @tell += 1 if @buffer[@tell] == 0x0A
@ -288,7 +281,7 @@ module HTML5
c.chr c.chr
when 0x80 .. 0xBF when 0x80..0xBF
if !@win1252 if !@win1252
[0xFFFD].pack('U') # invalid utf-8 [0xFFFD].pack('U') # invalid utf-8
elsif c <= 0x9f elsif c <= 0x9f
@ -297,10 +290,11 @@ module HTML5
"\xC2" + c.chr # convert to utf-8 "\xC2" + c.chr # convert to utf-8
end end
when 0xC0 .. 0xFF when 0xC0..0xFF
if instance_variables.include?("@win1252") && @win1252 if instance_variables.include?("@win1252") && @win1252
"\xC3" + (c-64).chr # convert to utf-8 "\xC3" + (c - 64).chr # convert to utf-8
elsif @buffer[@tell-1 .. @tell+3] =~ /^ # from http://www.w3.org/International/questions/qa-forms-utf-8.en.php
elsif @buffer[@tell - 1..@tell + 3] =~ /^
( [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte ( [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
| [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte

View file

@ -110,13 +110,13 @@ module HTML5
def sanitize_token(token) def sanitize_token(token)
case token[:type] case token[:type]
when :StartTag, :EndTag, :EmptyTag when :StartTag, :EndTag, :EmptyTag
if ALLOWED_ELEMENTS.include?(token[:name]) if self.class.const_get("ALLOWED_ELEMENTS").include?(token[:name])
if token.has_key? :data if token.has_key? :data
attrs = Hash[*token[:data].flatten] attrs = Hash[*token[:data].flatten]
attrs.delete_if { |attr,v| !ALLOWED_ATTRIBUTES.include?(attr) } attrs.delete_if { |attr,v| !self.class.const_get("ALLOWED_ATTRIBUTES").include?(attr) }
ATTR_VAL_IS_URI.each do |attr| ATTR_VAL_IS_URI.each do |attr|
val_unescaped = CGI.unescapeHTML(attrs[attr].to_s).gsub(/`|[\000-\040\177\s]+|\302[\200-\240]/,'').downcase val_unescaped = CGI.unescapeHTML(attrs[attr].to_s).gsub(/`|[\000-\040\177\s]+|\302[\200-\240]/,'').downcase
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ and !ALLOWED_PROTOCOLS.include?(val_unescaped.split(':')[0]) if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ and !self.class.const_get("ALLOWED_PROTOCOLS").include?(val_unescaped.split(':')[0])
attrs.delete attr attrs.delete attr
end end
end end
@ -160,14 +160,14 @@ module HTML5
style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop, val| style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop, val|
next if val.empty? next if val.empty?
prop.downcase! prop.downcase!
if ALLOWED_CSS_PROPERTIES.include?(prop) if self.class.const_get("ALLOWED_CSS_PROPERTIES").include?(prop)
clean << "#{prop}: #{val};" clean << "#{prop}: #{val};"
elsif %w[background border margin padding].include?(prop.split('-')[0]) elsif %w[background border margin padding].include?(prop.split('-')[0])
clean << "#{prop}: #{val};" unless val.split().any? do |keyword| clean << "#{prop}: #{val};" unless val.split().any? do |keyword|
!ALLOWED_CSS_KEYWORDS.include?(keyword) and !self.class.const_get("ALLOWED_CSS_KEYWORDS").include?(keyword) and
keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/ keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/
end end
elsif ALLOWED_SVG_PROPERTIES.include?(prop) elsif self.class.const_get("ALLOWED_SVG_PROPERTIES").include?(prop)
clean << "#{prop}: #{val};" clean << "#{prop}: #{val};"
end end
end end

View file

@ -73,7 +73,7 @@ module HTML5
elsif [:Characters, :SpaceCharacters].include? type elsif [:Characters, :SpaceCharacters].include? type
if type == :SpaceCharacters or in_cdata if type == :SpaceCharacters or in_cdata
if in_cdata and token[:data].include?("</") if in_cdata and token[:data].include?("</")
serialize_error(_("Unexpected </ in CDATA")) serialize_error("Unexpected </ in CDATA")
end end
result << token[:data] result << token[:data]
else else

View file

@ -99,12 +99,13 @@ module HTML5
super nil super nil
end end
def appendChild node # ryansking: not sure why this was here. removing it doesn't cause any tests to fail
if node.kind_of? Element and node.name == 'html' # def appendChild node
node.rxobj.add_namespace('http://www.w3.org/1999/xhtml') # if node.kind_of? Element and node.name == 'html'
end # node.rxobj.add_namespace('http://www.w3.org/1999/xhtml')
super node # end
end # super node
# end
def printTree indent=0 def printTree indent=0
tree = "#document" tree = "#document"

View file

@ -176,7 +176,7 @@ module HTML5
def get_fragment def get_fragment
@document = super @document = super
@document.childNodes @document
end end
end end

View file

@ -68,6 +68,14 @@ class Base
end end
alias walk each alias walk each
def to_ary
a = []
each do |i|
a << i
end
a
end
end end
class NonRecursiveTreeWalker < TreeWalkers::Base class NonRecursiveTreeWalker < TreeWalkers::Base

View file

@ -91,3 +91,106 @@ End of file before doctype
| <html> | <html>
| <head> | <head>
| <body> | <body>
#data
<body>
<div>
#errors
Unexpected start tag (body)
Expected closing tag. Unexpected end of file
#document-fragment
div
#document
| "
"
| <div>
#data
<frameset></frameset>
foo
#errors
Unexpected start tag (frameset). Expected DOCTYPE.
Unexpected non-space characters in the after frameset phase. Ignored.
#document
| <html>
| <head>
| <frameset>
| "
"
#data
<frameset></frameset>
<noframes>
#errors
Unexpected start tag (frameset). Expected DOCTYPE.
Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <frameset>
| "
"
| <noframes>
#data
<frameset></frameset>
<div>
#errors
Unexpected start tag (frameset). Expected DOCTYPE.
Unexpected start tag (div) in the after frameset phase. Ignored.
#document
| <html>
| <head>
| <frameset>
| "
"
#data
<frameset></frameset>
</html>
#errors
Unexpected start tag (frameset). Expected DOCTYPE.
#document
| <html>
| <head>
| <frameset>
| "
"
#data
<frameset></frameset>
</div>
#errors
Unexpected start tag (frameset). Expected DOCTYPE.
Unexpected end tag (div) in the after frameset phase. Ignored.
#document
| <html>
| <head>
| <frameset>
| "
"
#data
<form><form>
#errors
Unexpected start tag (form). Expected DOCTYPE.
Unexpected start tag (form).
Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <form>
#data
<button><button>
#errors
Unexpected start tag (button). Expected DOCTYPE.
Unexpected start tag (button) implies end tag (button).
Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <button>
| <button>

View file

@ -714,15 +714,15 @@
{"description": "allowed 'a' attribute on <datalist>", {"description": "allowed 'a' attribute on <datalist>",
"input": "<datalist a>", "input": "<datalist a>",
"fail-if": "unknown-attribute"}, "fail-unless": "unknown-attribute"},
{"description": "allowed 'd' attribute on <datalist>", {"description": "allowed 'd' attribute on <datalist>",
"input": "<datalist d>", "input": "<datalist d>",
"fail-if": "unknown-attribute"}, "fail-unless": "unknown-attribute"},
{"description": "allowed 't' attribute on <datalist>", {"description": "allowed 't' attribute on <datalist>",
"input": "<datalist t>", "input": "<datalist t>",
"fail-if": "unknown-attribute"}, "fail-unless": "unknown-attribute"},
{"description": "allowed 'action' attribute on <button>", {"description": "allowed 'action' attribute on <button>",
"input": "<button action>", "input": "<button action>",

View file

@ -8,9 +8,11 @@ else
TESTDATA_DIR = File.join(File.dirname(File.dirname(File.expand_path(__FILE__))), 'testdata') TESTDATA_DIR = File.join(File.dirname(File.dirname(File.expand_path(__FILE__))), 'testdata')
end end
$:.unshift File.join(File.dirname(File.dirname(__FILE__)),'lib') # $:.unshift File.join(File.dirname(File.dirname(__FILE__)), 'lib')
$:.unshift File.dirname(__FILE__) # $:.unshift File.dirname(__FILE__)
require 'core_ext/string'
def html5_test_files(subdirectory) def html5_test_files(subdirectory)
Dir[File.join(TESTDATA_DIR, subdirectory, '*.*')] Dir[File.join(TESTDATA_DIR, subdirectory, '*.*')]
@ -42,7 +44,7 @@ module HTML5
def each def each
data = {} data = {}
key=nil key = nil
@f.each_line do |line| @f.each_line do |line|
if line[0] == ?# and @sections.include?(line[1..-2]) if line[0] == ?# and @sections.include?(line[1..-2])
heading = line[1..-2] heading = line[1..-2]
@ -68,21 +70,3 @@ module HTML5
end end
end end
end end
class String
alias old_format %
define_method("%") do |data|
unless data.kind_of?(Hash)
$VERBOSE = false
r = old_format(data)
$VERBOSE = true
r
else
ret = self.clone
data.each do |k,v|
ret.gsub!(/\%\(#{k}\)/, v)
end
ret
end
end
end

View file

@ -0,0 +1,17 @@
require File.join(File.dirname(__FILE__), 'preamble')
require "test/unit"
require "html5/inputstream"
class TestHtml5Inputstream < Test::Unit::TestCase
def test_newline_in_queue
stream = HTML5::HTMLInputStream.new("\nfoo")
stream.unget(stream.char)
assert_equal [1, 0], stream.position
end
def test_buffer_boundary
stream = HTML5::HTMLInputStream.new("abcdefghijklmnopqrstuvwxyz" * 50, :encoding => 'windows-1252')
1022.times{stream.char}
assert_equal "i", stream.char
end
end

View file

@ -47,31 +47,29 @@ class TestTreeWalkers < Test::Unit::TestCase
indent = 0 indent = 0
concatenateCharacterTokens(tokens) do |token| concatenateCharacterTokens(tokens) do |token|
case token[:type] case token[:type]
when :StartTag, :EmptyTag when :StartTag, :EmptyTag
output << "#{' '*indent}<#{token[:name]}>" output << "#{' '*indent}<#{token[:name]}>"
indent += 2 indent += 2
for name, value in token[:data].to_a.sort for name, value in token[:data].to_a.sort
next if name=='xmlns' next if name=='xmlns'
output << "#{' '*indent}#{name}=\"#{value}\"" output << "#{' '*indent}#{name}=\"#{value}\""
end end
indent -= 2 if token[:type] == :EmptyTag indent -= 2 if token[:type] == :EmptyTag
when :EndTag when :EndTag
indent -= 2 indent -= 2
when :Comment when :Comment
output << "#{' '*indent}<!-- #{token[:data]} -->" output << "#{' '*indent}<!-- #{token[:data]} -->"
when :Doctype when :Doctype
if token[:name] and token[:name].any? if token[:name] and token[:name].any?
output << "#{' '*indent}<!DOCTYPE #{token[:name]}>" output << "#{' '*indent}<!DOCTYPE #{token[:name]}>"
else
output << "#{' '*indent}<!DOCTYPE >"
end
when :Characters, :SpaceCharacters
output << "#{' '*indent}\"#{token[:data]}\""
else else
# TODO: what to do with errors? output << "#{' '*indent}<!DOCTYPE >"
end
when :Characters, :SpaceCharacters
output << "#{' '*indent}\"#{token[:data]}\""
end end
end end
return output.join("\n") output.join("\n")
end end
html5_test_files('tree-construction').each do |test_file| html5_test_files('tree-construction').each do |test_file|
@ -113,4 +111,25 @@ class TestTreeWalkers < Test::Unit::TestCase
end end
end end
end end
def test_all_tokens
expected = [
{:data => [], :type => :StartTag, :name => 'html'},
{:data => [], :type => :StartTag, :name => 'head'},
{:data => [], :type => :EndTag, :name => 'head'},
{:data => [], :type => :StartTag, :name => 'body'},
{:data => [], :type => :EndTag, :name => 'body'},
{:data => [], :type => :EndTag, :name => 'html'}]
for treeName, tree_class in $tree_types_to_test
p = HTML5::HTMLParser.new(:tree => tree_class[:builder])
document = p.parse("<html></html>")
# document = tree_class.get(:adapter)(document)
output = tree_class[:walker].new(document)
expected.zip(output) do |expected_token, output_token|
assert_equal(expected_token, output_token)
end
end
end
end end