Sync with latest HTML5lib

This commit is contained in:
Jacques Distler 2007-06-22 03:12:08 -05:00
parent bf572e295f
commit 8e92e4a3ab
41 changed files with 1334 additions and 564 deletions

View file

@ -20,20 +20,43 @@ module HTML5lib
when :EmptyTag
if token[:name].downcase == "meta"
if token[:data].any? {|name,value| name=='charset'}
# replace charset with actual encoding
attrs=Hash[*token[:data].flatten]
attrs['charset'] = @encoding
token[:data] = attrs.to_a.sort
meta_found = true
# replace charset with actual encoding
token[:data].each_with_index do |(name,value),index|
if name == 'charset'
token[:data][index][1]=@encoding
meta_found = true
end
end
# replace charset with actual encoding
has_http_equiv_content_type = false
content_index = -1
token[:data].each_with_index do |(name,value),i|
if name.downcase == 'charset'
token[:data][i] = ['charset', @encoding]
meta_found = true
break
elsif name == 'http-equiv' and value.downcase == 'content-type'
has_http_equiv_content_type = true
elsif name == 'content'
content_index = i
end
end
if not meta_found
if has_http_equiv_content_type and content_index >= 0
token[:data][content_index][1] =
'text/html; charset=%s' % @encoding
meta_found = true
end
end
elsif token[:name].downcase == "head" and not meta_found
# insert meta into empty head
yield({:type => :StartTag, :name => "head", :data => {}})
yield({:type => :EmptyTag, :name => "meta",
:data => {"charset" => @encoding}})
yield({:type => :EndTag, :name => "head"})
yield(:type => :StartTag, :name => "head", :data => token[:data])
yield(:type => :EmptyTag, :name => "meta",
:data => [["charset", @encoding]])
yield(:type => :EndTag, :name => "head")
meta_found = true
next
end
@ -42,8 +65,8 @@ module HTML5lib
if token[:name].downcase == "head" and pending.any?
# insert meta into head (if necessary) and flush pending queue
yield pending.shift
yield({:type => :EmptyTag, :name => "meta",
:data => {"charset" => @encoding}}) if not meta_found
yield(:type => :EmptyTag, :name => "meta",
:data => [["charset", @encoding]]) if not meta_found
yield pending.shift while pending.any?
meta_found = true
state = :post_head

View file

@ -62,7 +62,8 @@ module HTML5lib
@errors = []
@tokenizer = @tokenizer.class unless Class === @tokenizer
@tokenizer = @tokenizer.new(stream, :encoding => encoding, :parseMeta => innerHTML)
@tokenizer = @tokenizer.new(stream, :encoding => encoding,
:parseMeta => !innerHTML)
if innerHTML
case @innerHTML = container.downcase
@ -99,10 +100,13 @@ module HTML5lib
case token[:type]
when :Characters, :SpaceCharacters, :Comment
@phase.send method, token[:data]
when :StartTag, :Doctype
when :StartTag
@phase.send method, token[:name], token[:data]
when :EndTag
@phase.send method, token[:name]
when :Doctype
@phase.send method, token[:name], token[:publicId],
token[:systemId], token[:correct]
else
parseError(token[:data])
end
@ -147,10 +151,6 @@ module HTML5lib
raise ParseError if @strict
end
# This error is not an error
def atheistParseError
end
# HTML5 specific normalizations to the token stream
def normalizeToken(token)
@ -160,9 +160,7 @@ module HTML5lib
# element. If it matches a void element atheists did the wrong
# thing and if it doesn't it's wrong for everyone.
if VOID_ELEMENTS.include?(token[:name])
atheistParseError
else
unless VOID_ELEMENTS.include?(token[:name])
parseError(_('Solidus (/) incorrectly placed in tag.'))
end

View file

@ -5,7 +5,7 @@ module HTML5lib
handle_start 'html', 'head'
handle_end 'html'
handle_end %w( html head body br ) => 'ImplyHead'
def processEOF
startTagHead('head', {})
@ -28,7 +28,7 @@ module HTML5lib
@parser.phase.processStartTag(name, attributes)
end
def endTagHtml(name)
def endTagImplyHead(name)
startTagHead('head', {})
@parser.phase.processEndTag(name)
end
@ -38,4 +38,4 @@ module HTML5lib
end
end
end
end

View file

@ -5,15 +5,20 @@ module HTML5lib
# http://www.whatwg.org/specs/web-apps/current-work/#in-body
handle_start 'html', 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image'
handle_start 'html'
handle_start %w( base link meta script style ) => 'ProcessInHead'
handle_start 'title'
handle_start 'input', 'textarea', 'select', 'isindex', %w( script style ), %w( marquee object )
handle_start 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image'
handle_start %w( li dd dt ) => 'ListItem', %w( base link meta title ) => 'FromHead'
handle_start 'input', 'textarea', 'select', 'isindex', %w( marquee object )
handle_start %w( li dd dt ) => 'ListItem'
handle_start %w( address blockquote center dir div dl fieldset listing menu ol p pre ul ) => 'CloseP'
handle_start %w( b big em font i nobr s small strike strong tt u ) => 'Formatting'
handle_start %w( b big em font i s small strike strong tt u ) => 'Formatting'
handle_start 'nobr'
handle_start %w( area basefont bgsound br embed img param spacer wbr ) => 'VoidFormatting'
@ -33,7 +38,9 @@ module HTML5lib
handle_end %w( head frameset select optgroup option table caption colgroup col thead tfoot tbody tr td th ) => 'Misplaced'
handle_end %w( area basefont bgsound br embed hr image img input isindex param spacer wbr frame ) => 'None'
handle_end 'br'
handle_end %w( area basefont bgsound embed hr image img input isindex param spacer wbr frame ) => 'None'
handle_end %w( noframes noscript noembed textarea xmp iframe ) => 'CdataTextAreaXmp'
@ -73,11 +80,11 @@ module HTML5lib
@tree.insertText(data)
end
def startTagScriptStyle(name, attributes)
def startTagProcessInHead(name, attributes)
@parser.phases[:inHead].processStartTag(name, attributes)
end
def startTagFromHead(name, attributes)
def startTagTitle(name, attributes)
@parser.parseError(_("Unexpected start tag (#{name}) that belongs in the head. Moved."))
@parser.phases[:inHead].processStartTag(name, attributes)
end
@ -120,7 +127,12 @@ module HTML5lib
@tree.openElements.reverse.each_with_index do |node, i|
if stopName.include?(node.name)
(i + 1).times { @tree.openElements.pop }
poppedNodes = (0..i).collect { @tree.openElements.pop }
if i >= 1
@parser.parseError("Missing end tag%s (%s)" % [
(i>1 ? 's' : ''),
poppedNodes.reverse.map {|item| item.name}.join(', ')])
end
break
end
@ -142,15 +154,19 @@ module HTML5lib
def startTagHeading(name, attributes)
endTagP('p') if in_scope?('p')
HEADING_ELEMENTS.each do |element|
if in_scope?(element)
@parser.parseError(_("Unexpected start tag (#{name})."))
remove_open_elements_until { |element| HEADING_ELEMENTS.include?(element.name) }
break
end
end
# Uncomment the following for IE7 behavior:
# HEADING_ELEMENTS.each do |element|
# if in_scope?(element)
# @parser.parseError(_("Unexpected start tag (#{name})."))
#
# remove_open_elements_until do |element|
# HEADING_ELEMENTS.include?(element.name)
# end
#
# break
# end
# end
@tree.insertElement(name, attributes)
end
@ -170,6 +186,12 @@ module HTML5lib
addFormattingElement(name, attributes)
end
def startTagNobr(name, attributes)
@tree.reconstructActiveFormattingElements
processEndTag('nobr') if in_scope?('nobr')
addFormattingElement(name, attributes)
end
def startTagButton(name, attributes)
if in_scope?('button')
@parser.parseError(_('Unexpected start tag (button) implied end tag (button).'))
@ -497,6 +519,13 @@ module HTML5lib
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
end
def endTagBr(name)
@parser.parseError(_("Unexpected end tag (br). Treated as br element."))
@tree.reconstructActiveFormattingElements
@tree.insertElement(name, {})
@tree.openElements.pop()
end
def endTagNone(name)
# This handles elements with no end tag.
@parser.parseError(_("This tag (#{name}) has no end tag"))

View file

@ -5,7 +5,9 @@ module HTML5lib
handle_start 'html', 'head', 'title', 'style', 'script', %w( base link meta )
handle_end 'head', 'html', %w( title style script )
handle_end 'head'
handle_end %w( html body br ) => 'ImplyAfterHead'
handle_end %w( title style script )
def processEOF
if ['title', 'style', 'script'].include?(name = @tree.openElements[-1].name)
@ -63,7 +65,11 @@ module HTML5lib
def startTagBaseLinkMeta(name, attributes)
element = @tree.createElement(name, attributes)
appendToHead(element)
if @tree.headPointer != nil and @parser.phase == @parser.phases[:inHead]
appendToHead(element)
else
@tree.openElements[-1].appendChild(element)
end
end
def startTagOther(name, attributes)
@ -80,7 +86,7 @@ module HTML5lib
@parser.phase = @parser.phases[:afterHead]
end
def endTagHtml(name)
def endTagImplyAfterHead(name)
anythingElse
@parser.phase.processEndTag(name)
end
@ -117,4 +123,4 @@ module HTML5lib
end
end
end
end

View file

@ -89,10 +89,10 @@ module HTML5lib
def endTagOther(name)
@parser.parseError(_("Unexpected end tag (#{name}) in table context caused voodoo mode."))
# Make all the special element rearranging voodoo kick in
@parser.insertFromTable = true
@tree.insertFromTable = true
# Process the end tag in the "in body" mode
@parser.phases[:inBody].processEndTag(name)
@parser.insertFromTable = false
@tree.insertFromTable = false
end
protected

View file

@ -17,9 +17,95 @@ module HTML5lib
@tree.insertComment(data, @tree.document)
end
def processDoctype(name, error)
@parser.parseError(_('Erroneous DOCTYPE.')) if error
def processDoctype(name, publicId, systemId, correct)
if name.downcase != 'html' or publicId or systemId
@parser.parseError(_('Erroneous DOCTYPE.'))
end
# XXX need to update DOCTYPE tokens
@tree.insertDoctype(name)
publicId = publicId.to_s.upcase
if name.downcase != 'html'
# XXX quirks mode
else
if ["+//silmaril//dtd html pro v0r11 19970101//en",
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en",
"-//as//dtd html 3.0 aswedit + extensions//en",
"-//ietf//dtd html 2.0 level 1//en",
"-//ietf//dtd html 2.0 level 2//en",
"-//ietf//dtd html 2.0 strict level 1//en",
"-//ietf//dtd html 2.0 strict level 2//en",
"-//ietf//dtd html 2.0 strict//en",
"-//ietf//dtd html 2.0//en",
"-//ietf//dtd html 2.1e//en",
"-//ietf//dtd html 3.0//en",
"-//ietf//dtd html 3.0//en//",
"-//ietf//dtd html 3.2 final//en",
"-//ietf//dtd html 3.2//en",
"-//ietf//dtd html 3//en",
"-//ietf//dtd html level 0//en",
"-//ietf//dtd html level 0//en//2.0",
"-//ietf//dtd html level 1//en",
"-//ietf//dtd html level 1//en//2.0",
"-//ietf//dtd html level 2//en",
"-//ietf//dtd html level 2//en//2.0",
"-//ietf//dtd html level 3//en",
"-//ietf//dtd html level 3//en//3.0",
"-//ietf//dtd html strict level 0//en",
"-//ietf//dtd html strict level 0//en//2.0",
"-//ietf//dtd html strict level 1//en",
"-//ietf//dtd html strict level 1//en//2.0",
"-//ietf//dtd html strict level 2//en",
"-//ietf//dtd html strict level 2//en//2.0",
"-//ietf//dtd html strict level 3//en",
"-//ietf//dtd html strict level 3//en//3.0",
"-//ietf//dtd html strict//en",
"-//ietf//dtd html strict//en//2.0",
"-//ietf//dtd html strict//en//3.0",
"-//ietf//dtd html//en",
"-//ietf//dtd html//en//2.0",
"-//ietf//dtd html//en//3.0",
"-//metrius//dtd metrius presentational//en",
"-//microsoft//dtd internet explorer 2.0 html strict//en",
"-//microsoft//dtd internet explorer 2.0 html//en",
"-//microsoft//dtd internet explorer 2.0 tables//en",
"-//microsoft//dtd internet explorer 3.0 html strict//en",
"-//microsoft//dtd internet explorer 3.0 html//en",
"-//microsoft//dtd internet explorer 3.0 tables//en",
"-//netscape comm. corp.//dtd html//en",
"-//netscape comm. corp.//dtd strict html//en",
"-//o'reilly and associates//dtd html 2.0//en",
"-//o'reilly and associates//dtd html extended 1.0//en",
"-//spyglass//dtd html 2.0 extended//en",
"-//sq//dtd html 2.0 hotmetal + extensions//en",
"-//sun microsystems corp.//dtd hotjava html//en",
"-//sun microsystems corp.//dtd hotjava strict html//en",
"-//w3c//dtd html 3 1995-03-24//en",
"-//w3c//dtd html 3.2 draft//en",
"-//w3c//dtd html 3.2 final//en",
"-//w3c//dtd html 3.2//en",
"-//w3c//dtd html 3.2s draft//en",
"-//w3c//dtd html 4.0 frameset//en",
"-//w3c//dtd html 4.0 transitional//en",
"-//w3c//dtd html experimental 19960712//en",
"-//w3c//dtd html experimental 970421//en",
"-//w3c//dtd w3 html//en",
"-//w3o//dtd w3 html 3.0//en",
"-//w3o//dtd w3 html 3.0//en//",
"-//w3o//dtd w3 html strict 3.0//en//",
"-//webtechs//dtd mozilla html 2.0//en",
"-//webtechs//dtd mozilla html//en",
"-/w3c/dtd html 4.0 transitional/en",
"html"].include?(publicId) or
(systemId == nil and
["-//w3c//dtd html 4.01 frameset//EN",
"-//w3c//dtd html 4.01 transitional//EN"].include?(publicId)) or
(systemId == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")
#XXX quirks mode
end
end
@parser.phase = @parser.phases[:rootElement]
end
@ -46,4 +132,4 @@ module HTML5lib
end
end
end
end

View file

@ -101,7 +101,7 @@ module HTML5lib
@tree.insertComment(data, @tree.openElements[-1])
end
def processDoctype(name, error)
def processDoctype(name, publicId, systemId, correct)
@parser.parseError(_('Unexpected DOCTYPE. Ignored.'))
end

View file

@ -33,9 +33,6 @@ module HTML5lib
options.each { |name, value| instance_variable_set("@#{name}", value) }
# List of where new lines occur
@new_lines = [0]
# Raw Stream
@raw_stream = open_stream(source)
@ -77,6 +74,8 @@ module HTML5lib
# Reset position in the list to read from
@tell = 0
@line = @col = 0
@line_lengths = []
end
# Produces a file object from source.
@ -112,7 +111,7 @@ module HTML5lib
require 'UniversalDetector' # gem install chardet
buffer = @raw_stream.read
encoding = UniversalDetector::chardet(buffer)['encoding']
@raw_stream = open_stream(buffer)
seek(buffer, 0)
rescue LoadError
end
end
@ -122,7 +121,7 @@ module HTML5lib
encoding = @DEFAULT_ENCODING
end
#Substitute for equivalent encodings:
#Substitute for equivalent encodings
encoding_sub = {'iso-8859-1' => 'windows-1252'}
if encoding_sub.has_key?(encoding.downcase)
@ -145,7 +144,6 @@ module HTML5lib
}
# Go to beginning of file and read in 4 bytes
@raw_stream.seek(0)
string = @raw_stream.read(4)
return nil unless string
@ -162,30 +160,80 @@ module HTML5lib
end
end
#AT - move this to the caller?
# Set the read position past the BOM if one was found, otherwise
# set it to the start of the stream
@raw_stream.seek(encoding ? seek : 0)
seek(string, encoding ? seek : 0)
return encoding
end
def seek(buffer, n)
if @raw_stream.respond_to?(:unget)
@raw_stream.unget(buffer[n..-1])
return
end
if @raw_stream.respond_to?(:seek)
begin
@raw_stream.seek(n)
return
rescue Errno::ESPIPE
end
end
require 'delegate'
@raw_stream = SimpleDelegator.new(@raw_stream)
class << @raw_stream
def read(chars=-1)
if chars == -1 or chars > @data.length
result = @data
@data = ''
return result if __getobj__.eof?
return result + __getobj__.read if chars == -1
return result + __getobj__.read(chars-result.length)
elsif @data.empty?
return __getobj__.read(chars)
else
result = @data[1...chars]
@data = @data[chars..-1]
return result
end
end
def unget(data)
if !@data or @data.empty?
@data = data
else
@data += data
end
end
end
@raw_stream.unget(buffer[n .. -1])
end
# Report the encoding declared by the meta element
def detect_encoding_meta
parser = EncodingParser.new(@raw_stream.read(@NUM_BYTES_META))
@raw_stream.seek(0)
buffer = @raw_stream.read(@NUM_BYTES_META)
parser = EncodingParser.new(buffer)
seek(buffer, 0)
return parser.get_encoding
end
# Returns (line, col) of the current position in the stream.
def position
line = 0
@new_lines.each do |pos|
break unless pos < @tell
line += 1
line, col = @line, @col
@queue.reverse.each do |c|
if c == "\n"
line -= 1
raise RuntimeError.new("col=#{col}") unless col == 0
col = @line_lengths[line]
else
col -= 1
end
end
col = @tell - @new_lines[line-1] - 1
return [line, col]
return [line+1, col]
end
# Read one character from the stream or queue if available. Return
@ -205,9 +253,14 @@ module HTML5lib
c = 0x0A
end
# record where newlines occur so that the position method
# can tell where it is
@new_lines << @tell-1 if c == 0x0A
# update position in stream
if c == 0x0a
@line_lengths << @col
@line += 1
@col = 0
else
@col += 1
end
c.chr
@ -261,11 +314,7 @@ module HTML5lib
# Put the character stopped on back to the front of the queue
# from where it came.
c = char_stack.pop
if c == :EOF or @data_stream[@tell-1] == c[0]
@tell -= 1
else
@queue.insert(0, c)
end
@queue.insert(0, c) unless c == :EOF
return char_stack.join('')
end
end
@ -454,7 +503,7 @@ module HTML5lib
space_found = false
#Step 5 attribute name
while true
if @data.current_byte == '=' and attr_name:
if @data.current_byte == '=' and attr_name
break
elsif SPACE_CHARACTERS.include?(@data.current_byte)
space_found = true

View file

@ -69,15 +69,22 @@ module HTML5lib
# ensure that non-void XHTML elements have content so that separate
# open and close tags are emitted
if token[:type] == :EndTag and \
not VOID_ELEMENTS.include? token[:name] and \
token[:name] == @tree.openElements[-1].name and \
not @tree.openElements[-1].hasContent
@tree.insertText('') unless
@tree.openElements.any? {|e|
e.attributes.keys.include? 'xmlns' and
e.attributes['xmlns'] != 'http://www.w3.org/1999/xhtml'
}
if token[:type] == :EndTag
if VOID_ELEMENTS.include? token[:name]
if @tree.openElements[-1].name != token["name"]:
token[:type] = :EmptyTag
token["data"] ||= {}
end
else
if token[:name] == @tree.openElements[-1].name and \
not @tree.openElements[-1].hasContent
@tree.insertText('') unless
@tree.openElements.any? {|e|
e.attributes.keys.include? 'xmlns' and
e.attributes['xmlns'] != 'http://www.w3.org/1999/xhtml'
}
end
end
end
return token

View file

@ -1,178 +1,2 @@
require 'html5lib/constants'
module HTML5lib
class HTMLSerializer
CDATA_ELEMENTS = %w[style script xmp iframe noembed noframes noscript]
def self.serialize(stream, options = {})
new(options).serialize(stream, options[:encoding])
end
def initialize(options={})
@quote_attr_values = false
@quote_char = '"'
@use_best_quote_char = true
@minimize_boolean_attributes = true
@use_trailing_solidus = false
@space_before_trailing_solidus = true
@omit_optional_tags = true
@sanitize = false
@strip_whitespace = false
@inject_meta_charset = true
options.each do |name, value|
next unless %w(quote_attr_values quote_char use_best_quote_char
minimize_boolean_attributes use_trailing_solidus
space_before_trailing_solidus omit_optional_tags sanitize
strip_whitespace inject_meta_charset).include? name.to_s
@use_best_quote_char = false if name.to_s == 'quote_char'
instance_variable_set("@#{name}", value)
end
@errors = []
end
def serialize(treewalker, encoding=nil)
in_cdata = false
@errors = []
if encoding and @inject_meta_charset
require 'html5lib/filters/inject_meta_charset'
treewalker = Filters::InjectMetaCharset.new(treewalker, encoding)
end
if @strip_whitespace
require 'html5lib/filters/whitespace'
treewalker = Filters::WhitespaceFilter.new(treewalker)
end
if @sanitize
require 'html5lib/filters/sanitizer'
treewalker = Filters::HTMLSanitizeFilter.new(treewalker)
end
if @omit_optional_tags
require 'html5lib/filters/optionaltags'
treewalker = Filters::OptionalTagFilter.new(treewalker)
end
result = []
treewalker.each do |token|
type = token[:type]
if type == :Doctype
doctype = "<!DOCTYPE %s>" % token[:name]
result << doctype
elsif [:Characters, :SpaceCharacters].include? type
if type == :SpaceCharacters or in_cdata
if in_cdata and token[:data].include?("</")
serializeError(_("Unexpected </ in CDATA"))
end
result << token[:data]
else
result << token[:data].
gsub("&", "&amp;").
gsub("<", "&lt;").
gsub(">", "&gt;")
end
elsif [:StartTag, :EmptyTag].include? type
name = token[:name]
if CDATA_ELEMENTS.include?(name)
in_cdata = true
elsif in_cdata
serializeError(_("Unexpected child element of a CDATA element"))
end
attributes = []
for k,v in attrs = token[:data].to_a.sort
attributes << ' '
attributes << k
if not @minimize_boolean_attributes or \
(!(BOOLEAN_ATTRIBUTES[name]||[]).include?(k) \
and !BOOLEAN_ATTRIBUTES[:global].include?(k))
attributes << "="
if @quote_attr_values or v.empty?
quote_attr = true
else
quote_attr = (SPACE_CHARACTERS + %w(< > " ')).any? {|c| v.include?(c)}
end
v = v.gsub("&", "&amp;")
if quote_attr
quote_char = @quote_char
if @use_best_quote_char
if v.index("'") and !v.index('"')
quote_char = '"'
elsif v.index('"') and !v.index("'")
quote_char = "'"
end
end
if quote_char == "'"
v = v.gsub("'", "&#39;")
else
v = v.gsub('"', "&quot;")
end
attributes << quote_char << v << quote_char
else
attributes << v
end
end
end
if VOID_ELEMENTS.include?(name) and @use_trailing_solidus
if @space_before_trailing_solidus
attributes << " /"
else
attributes << "/"
end
end
result << "<%s%s>" % [name, attributes.join('')]
elsif type == :EndTag
name = token[:name]
if CDATA_ELEMENTS.include?(name)
in_cdata = false
elsif in_cdata
serializeError(_("Unexpected child element of a CDATA element"))
end
end_tag = "</#{name}>"
result << end_tag
elsif type == :Comment
data = token[:data]
serializeError(_("Comment contains --")) if data.index("--")
comment = "<!--%s-->" % token[:data]
result << comment
else
serializeError(token[:data])
end
end
if encoding and encoding != 'utf-8'
require 'iconv'
Iconv.iconv(encoding, 'utf-8', result.join('')).first
else
result.join('')
end
end
alias :render :serialize
def serializeError(data="XXX ERROR MESSAGE NEEDED")
# XXX The idea is to make data mandatory.
@errors.push(data)
if @strict
raise SerializeError
end
end
end
# Error in serialized tree
class SerializeError < Exception
end
end
require 'html5lib/serializer/htmlserializer'
require 'html5lib/serializer/xhtmlserializer'

View file

@ -0,0 +1,177 @@
require 'html5lib/constants'
module HTML5lib
class HTMLSerializer
def self.serialize(stream, options = {})
new(options).serialize(stream, options[:encoding])
end
def escape(string)
string.gsub("&", "&amp;").gsub("<", "&lt;").gsub(">", "&gt;")
end
def initialize(options={})
@quote_attr_values = false
@quote_char = '"'
@use_best_quote_char = true
@minimize_boolean_attributes = true
@use_trailing_solidus = false
@space_before_trailing_solidus = true
@escape_lt_in_attrs = false
@omit_optional_tags = true
@sanitize = false
@strip_whitespace = false
@inject_meta_charset = true
options.each do |name, value|
next unless instance_variables.include?("@#{name}")
@use_best_quote_char = false if name.to_s == 'quote_char'
instance_variable_set("@#{name}", value)
end
@errors = []
end
def serialize(treewalker, encoding=nil)
in_cdata = false
@errors = []
if encoding and @inject_meta_charset
require 'html5lib/filters/inject_meta_charset'
treewalker = Filters::InjectMetaCharset.new(treewalker, encoding)
end
if @strip_whitespace
require 'html5lib/filters/whitespace'
treewalker = Filters::WhitespaceFilter.new(treewalker)
end
if @sanitize
require 'html5lib/filters/sanitizer'
treewalker = Filters::HTMLSanitizeFilter.new(treewalker)
end
if @omit_optional_tags
require 'html5lib/filters/optionaltags'
treewalker = Filters::OptionalTagFilter.new(treewalker)
end
result = []
treewalker.each do |token|
type = token[:type]
if type == :Doctype
doctype = "<!DOCTYPE %s>" % token[:name]
result << doctype
elsif [:Characters, :SpaceCharacters].include? type
if type == :SpaceCharacters or in_cdata
if in_cdata and token[:data].include?("</")
serializeError(_("Unexpected </ in CDATA"))
end
result << token[:data]
else
result << escape(token[:data])
end
elsif [:StartTag, :EmptyTag].include? type
name = token[:name]
if RCDATA_ELEMENTS.include?(name)
in_cdata = true
elsif in_cdata
serializeError(_("Unexpected child element of a CDATA element"))
end
attributes = []
for k,v in attrs = token[:data].to_a.sort
attributes << ' '
attributes << k
if not @minimize_boolean_attributes or \
(!(BOOLEAN_ATTRIBUTES[name]||[]).include?(k) \
and !BOOLEAN_ATTRIBUTES[:global].include?(k))
attributes << "="
if @quote_attr_values or v.empty?
quote_attr = true
else
quote_attr = (SPACE_CHARACTERS + %w(< > " ')).any? {|c| v.include?(c)}
end
v = v.gsub("&", "&amp;")
v = v.gsub("<", "&lt;") if @escape_lt_in_attrs
if quote_attr
quote_char = @quote_char
if @use_best_quote_char
if v.index("'") and !v.index('"')
quote_char = '"'
elsif v.index('"') and !v.index("'")
quote_char = "'"
end
end
if quote_char == "'"
v = v.gsub("'", "&#39;")
else
v = v.gsub('"', "&quot;")
end
attributes << quote_char << v << quote_char
else
attributes << v
end
end
end
if VOID_ELEMENTS.include?(name) and @use_trailing_solidus
if @space_before_trailing_solidus
attributes << " /"
else
attributes << "/"
end
end
result << "<%s%s>" % [name, attributes.join('')]
elsif type == :EndTag
name = token[:name]
if RCDATA_ELEMENTS.include?(name)
in_cdata = false
elsif in_cdata
serializeError(_("Unexpected child element of a CDATA element"))
end
end_tag = "</#{name}>"
result << end_tag
elsif type == :Comment
data = token[:data]
serializeError(_("Comment contains --")) if data.index("--")
comment = "<!--%s-->" % token[:data]
result << comment
else
serializeError(token[:data])
end
end
if encoding and encoding != 'utf-8'
require 'iconv'
Iconv.iconv(encoding, 'utf-8', result.join('')).first
else
result.join('')
end
end
alias :render :serialize
def serializeError(data="XXX ERROR MESSAGE NEEDED")
# XXX The idea is to make data mandatory.
@errors.push(data)
if @strict
raise SerializeError
end
end
end
# Error in serialized tree
class SerializeError < Exception
end
end

View file

@ -0,0 +1,19 @@
require 'html5lib/serializer/htmlserializer'
module HTML5lib
class XHTMLSerializer < HTMLSerializer
DEFAULTS = {
:quote_attr_values => true,
:minimize_boolean_attributes => false,
:use_trailing_solidus => true,
:escape_lt_in_attrs => true,
:omit_optional_tags => false
}
def initialize(options={})
super(DEFAULTS.clone.update(options))
end
end
end

View file

@ -41,19 +41,31 @@ module HTML5lib
:attributeValueUnQuoted => :attributeValueUnQuotedState,
:bogusComment => :bogusCommentState,
:markupDeclarationOpen => :markupDeclarationOpenState,
:commentStart => :commentStartState,
:commentStartDash => :commentStartDashState,
:comment => :commentState,
:commentDash => :commentDashState,
:commentEndDash => :commentEndDashState,
:commentEnd => :commentEndState,
:doctype => :doctypeState,
:beforeDoctypeName => :beforeDoctypeNameState,
:doctypeName => :doctypeNameState,
:afterDoctypeName => :afterDoctypeNameState,
:beforeDoctypePublicIdentifier => :beforeDoctypePublicIdentifierState,
:doctypePublicIdentifierDoubleQuoted => :doctypePublicIdentifierDoubleQuotedState,
:doctypePublicIdentifierSingleQuoted => :doctypePublicIdentifierSingleQuotedState,
:afterDoctypePublicIdentifier => :afterDoctypePublicIdentifierState,
:beforeDoctypeSystemIdentifier => :beforeDoctypeSystemIdentifierState,
:doctypeSystemIdentifierDoubleQuoted => :doctypeSystemIdentifierDoubleQuotedState,
:doctypeSystemIdentifierSingleQuoted => :doctypeSystemIdentifierSingleQuotedState,
:afterDoctypeSystemIdentifier => :afterDoctypeSystemIdentifierState,
:bogusDoctype => :bogusDoctypeState
}
# Setup the initial tokenizer state
@contentModelFlag = :PCDATA
@state = @states[:data]
@escapeFlag = false
@lastFourChars = []
# The current token being created
@currentToken = nil
@ -133,24 +145,14 @@ module HTML5lib
# If the integer is between 127 and 160 (so 128 and bigger and 159 and
# smaller) we need to do the "windows trick".
if (127...160).include? charAsInt
#XXX - removed parse error from windows 1252 entity for now
#we may want to reenable this later
#@tokenQueue.push({:type => :ParseError, :data =>
# _("Entity used with illegal number (windows-1252 reference).")})
@tokenQueue.push({:type => :ParseError, :data =>
_("Entity used with illegal number (windows-1252 reference).")})
charAsInt = ENTITIES_WINDOWS1252[charAsInt - 128]
end
# 0 is not a good number.
if charAsInt == 0
charAsInt = 65533
end
if charAsInt <= 0x10FFFF
if charAsInt > 0 and charAsInt <= 1114111
char = [charAsInt].pack('U')
else
@tokenQueue.push({:type => :ParseError, :data =>
_("Numeric entity couldn't be converted to character.")})
end
# Discard the ; if present. Otherwise, put it back on the queue and
@ -167,7 +169,10 @@ module HTML5lib
def consumeEntity
char = nil
charStack = [@stream.char]
if charStack[0] == "#"
if SPACE_CHARACTERS.include?(charStack[0]) or
[:EOF, '<', '&'].include?(charStack[0])
@stream.queue+= charStack
elsif charStack[0] == "#"
# We might have a number entity here.
charStack += [@stream.char, @stream.char]
if charStack.include? :EOF
@ -194,10 +199,6 @@ module HTML5lib
_("Numeric entity expected but none found.")})
end
end
# Break out if we reach the end of the file
elsif charStack[0] == :EOF
@tokenQueue.push({:type => :ParseError, :data =>
_("Entity expected. Got end of file instead.")})
else
# At this point in the process might have named entity. Entities
# are stored in the global variable "entities".
@ -267,14 +268,33 @@ module HTML5lib
# statements should be.
def dataState
data = @stream.char
if data == "&" and (@contentModelFlag == :PCDATA or
@contentModelFlag == :RCDATA)
if @contentModelFlag == :CDATA or @contentModelFlag == :RCDATA
@lastFourChars << data
@lastFourChars.shift if @lastFourChars.length > 4
end
if data == "&" and [:PCDATA,:RCDATA].include?(@contentModelFlag)
@state = @states[:entityData]
elsif data == "<" and @contentModelFlag != :PLAINTEXT
@state = @states[:tagOpen]
elsif data == "-" and [:CDATA,:RCDATA].include?(@contentModelFlag) and
@escapeFlag == false and @lastFourChars.join('') == "<!--"
@escapeFlag = true
@tokenQueue.push({:type => :Characters, :data => data})
elsif data == "<" and @escapeFlag == false and
[:PCDATA,:CDATA,:RCDATA].include?(@contentModelFlag)
@state = @states[:tagOpen]
elsif data == ">" and [:CDATA,:RCDATA].include?(@contentModelFlag) and
@escapeFlag == true and @lastFourChars[1..-1].join('') == "-->"
@escapeFlag = false
@tokenQueue.push({:type => :Characters, :data => data})
elsif data == :EOF
# Tokenization ends.
return false
elsif SPACE_CHARACTERS.include? data
# Directly after emitting a token you switch back to the "data
# state". At that point SPACE_CHARACTERS are important so they are
@ -285,7 +305,7 @@ module HTML5lib
data + @stream.chars_until(SPACE_CHARACTERS, true)})
else
@tokenQueue.push({:type => :Characters, :data =>
data + @stream.chars_until(["&", "<"])})
data + @stream.chars_until(%w[& < > -])})
end
return true
end
@ -380,8 +400,6 @@ module HTML5lib
# emitting the end tag token.
@contentModelFlag = :PCDATA
else
@tokenQueue.push({:type => :ParseError, :data =>
_("Expected closing tag after seeing '</'. None found.")})
@tokenQueue.push({:type => :Characters, :data => "</"})
@state = @states[:data]
@ -391,29 +409,27 @@ module HTML5lib
end
end
if @contentModelFlag == :PCDATA
data = @stream.char
if data == :EOF
@tokenQueue.push({:type => :ParseError, :data =>
_("Expected closing tag. Unexpected end of file.")})
@tokenQueue.push({:type => :Characters, :data => "</"})
@state = @states[:data]
elsif ASCII_LETTERS.include? data
@currentToken =\
{:type => :EndTag, :name => data, :data => []}
@state = @states[:tagName]
elsif data == ">"
@tokenQueue.push({:type => :ParseError, :data =>
_("Expected closing tag. Got '>' instead. Ignoring '</>'.")})
@state = @states[:data]
else
# XXX data can be _'_...
@tokenQueue.push({:type => :ParseError, :data =>
_("Expected closing tag. Unexpected character '" + data + "' found.")})
@stream.queue.push(data)
@state = @states[:bogusComment]
end
data = @stream.char
if data == :EOF
@tokenQueue.push({:type => :ParseError, :data =>
_("Expected closing tag. Unexpected end of file.")})
@tokenQueue.push({:type => :Characters, :data => "</"})
@state = @states[:data]
elsif ASCII_LETTERS.include? data
@currentToken = {:type => :EndTag, :name => data, :data => []}
@state = @states[:tagName]
elsif data == ">"
@tokenQueue.push({:type => :ParseError, :data =>
_("Expected closing tag. Got '>' instead. Ignoring '</>'.")})
@state = @states[:data]
else
# XXX data can be _'_...
@tokenQueue.push({:type => :ParseError, :data =>
_("Expected closing tag. Unexpected character '#{data}' found.")})
@stream.queue.push(data)
@state = @states[:bogusComment]
end
return true
end
@ -430,11 +446,6 @@ module HTML5lib
@stream.chars_until(ASCII_LETTERS, true)
elsif data == ">"
emitCurrentToken
elsif data == "<"
@stream.queue.push(data)
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected < character when getting the tag name.")})
emitCurrentToken
elsif data == "/"
processSolidusInTag
@state = @states[:beforeAttributeName]
@ -459,11 +470,6 @@ module HTML5lib
emitCurrentToken
elsif data == "/"
processSolidusInTag
elsif data == "<"
@stream.queue.push(data)
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected < character. Expected attribute name instead.")})
emitCurrentToken
else
@currentToken[:data].push([data, ""])
@state = @states[:attributeName]
@ -494,12 +500,6 @@ module HTML5lib
elsif data == "/"
processSolidusInTag
@state = @states[:beforeAttributeName]
elsif data == "<"
@stream.queue.push(data)
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected < character in attribute name.")})
emitCurrentToken
leavingThisState = false
else
@currentToken[:data][-1][0] += data
leavingThisState = false
@ -537,11 +537,6 @@ module HTML5lib
elsif data == "/"
processSolidusInTag
@state = @states[:beforeAttributeName]
elsif data == "<"
@stream.queue.push(data)
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected < character. Expected = or end of tag.")})
emitCurrentToken
elsif data == :EOF
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected end of file. Expected = or end of tag.")})
@ -566,11 +561,6 @@ module HTML5lib
@state = @states[:attributeValueSingleQuoted]
elsif data == ">"
emitCurrentToken
elsif data == "<"
@stream.queue.push(data)
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected < character. Expected attribute value.")})
emitCurrentToken
elsif data == :EOF
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected end of file. Expected attribute value.")})
@ -624,11 +614,6 @@ module HTML5lib
processEntityInAttribute
elsif data == ">"
emitCurrentToken
elsif data == "<"
@stream.queue.push(data)
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected < character in attribute value.")})
emitCurrentToken
elsif data == :EOF
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected end of file in attribute value.")})
@ -658,14 +643,15 @@ module HTML5lib
charStack = [@stream.char, @stream.char]
if charStack == ["-", "-"]
@currentToken = {:type => :Comment, :data => ""}
@state = @states[:comment]
@state = @states[:commentStart]
else
5.times { charStack.push(@stream.char) }
# Put in explicit :EOF check
if ((not charStack.include? :EOF) and
charStack.join("").upcase == "DOCTYPE")
@currentToken =\
{:type => :Doctype, :name => "", :data => true}
{:type => :Doctype, :name => "",
:publicId => nil, :systemId => nil, :correct => true}
@state = @states[:doctype]
else
@tokenQueue.push({:type => :ParseError, :data =>
@ -677,10 +663,52 @@ module HTML5lib
return true
end
def commentStartState
data = @stream.char
if data == "-"
@state = @states[:commentStartDash]
elsif data == ">"
@tokenQueue.push({:type => :ParseError, :data =>
_("Incorrect comment.")})
@tokenQueue.push(@currentToken)
@state = @states[:data]
elsif data == EOF
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected end of file in comment.")})
@tokenQueue.push(@currentToken)
@state = @states[:data]
else
@currentToken[:data] += data + @stream.chars_until("-")
@state = @states[:comment]
end
return true
end
def commentStartDashState
data = @stream.char
if data == "-"
@state = @states[:commentEnd]
elsif data == ">"
@tokenQueue.push({:type => :ParseError, :data =>
_("Incorrect comment.")})
@tokenQueue.push(@currentToken)
@state = @states[:data]
elsif data == EOF
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected end of file in comment.")})
@tokenQueue.push(@currentToken)
@state = @states[:data]
else
@currentToken[:data] += data + @stream.chars_until("-")
@state = @states[:comment]
end
return true
end
def commentState
data = @stream.char
if data == "-"
@state = @states[:commentDash]
@state = @states[:commentEndDash]
elsif data == :EOF
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected end of file in comment.")})
@ -692,7 +720,7 @@ module HTML5lib
return true
end
def commentDashState
def commentEndDashState
data = @stream.char
if data == "-"
@state = @states[:commentEnd]
@ -752,19 +780,16 @@ module HTML5lib
def beforeDoctypeNameState
data = @stream.char
if SPACE_CHARACTERS.include? data
elsif ASCII_LOWERCASE.include? data
@currentToken[:name] = data.upcase
@state = @states[:doctypeName]
elsif data == ">"
# Character needs to be consumed per the specification so don't
# invoke emitCurrentTokenWithParseError with :data as argument.
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected > character. Expected DOCTYPE name.")})
@currentToken[:correct] = false
@tokenQueue.push(@currentToken)
@state = @states[:data]
elsif data == :EOF
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected end of file. Expected DOCTYPE name.")})
@currentToken[:correct] = false
@tokenQueue.push(@currentToken)
@state = @states[:data]
else
@ -776,33 +801,21 @@ module HTML5lib
def doctypeNameState
data = @stream.char
needsDoctypeCheck = false
if SPACE_CHARACTERS.include? data
@state = @states[:afterDoctypeName]
needsDoctypeCheck = true
elsif data == ">"
@tokenQueue.push(@currentToken)
@state = @states[:data]
elsif data == :EOF
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected end of file in DOCTYPE name.")})
@currentToken[:correct] = false
@tokenQueue.push(@currentToken)
@state = @states[:data]
else
# We can't just uppercase everything that arrives here. For
# instance, non-ASCII characters.
if ASCII_LOWERCASE.include? data
data = data.upcase
end
@currentToken[:name] += data
needsDoctypeCheck = true
end
# After some iterations through this state it should eventually say
# "HTML". Otherwise there's an error.
if needsDoctypeCheck and @currentToken[:name] == "HTML"
@currentToken[:data] = false
end
return true
end
@ -814,16 +827,195 @@ module HTML5lib
@state = @states[:data]
elsif data == :EOF
@currentToken[:data] = true
# XXX EMIT
@stream.queue.push(data)
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected end of file in DOCTYPE.")})
@currentToken[:correct] = false
@tokenQueue.push(@currentToken)
@state = @states[:data]
else
charStack = [data]
5.times { charStack << stream.char }
token = charStack.join('').tr(ASCII_UPPERCASE,ASCII_LOWERCASE)
if token == "public"
@state = @states[:beforeDoctypePublicIdentifier]
elsif token == "system"
@state = @states[:beforeDoctypeSystemIdentifier]
else
@stream.queue += charStack
@tokenQueue.push({:type => :ParseError, :data =>
_("Expected 'public' or 'system'. Got '#{charStack.join('')}'")})
@state = @states[:bogusDoctype]
end
end
return true
end
def beforeDoctypePublicIdentifierState
data = @stream.char
if SPACE_CHARACTERS.include?(data)
elsif data == "\""
@currentToken[:publicId] = ""
@state = @states[:doctypePublicIdentifierDoubleQuoted]
elsif data == "'"
@currentToken[:publicId] = ""
@state = @states[:doctypePublicIdentifierSingleQuoted]
elsif data == ">"
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected end of DOCTYPE.")})
@currentToken[:correct] = false
@tokenQueue.push(@currentToken)
@state = @states[:data]
elsif data == :EOF
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected end of file in DOCTYPE.")})
@currentToken[:correct] = false
@tokenQueue.push(@currentToken)
@state = @states[:data]
else
@tokenQueue.push({:type => :ParseError, :data =>
_("Expected space or '>'. Got '" + data + "'")})
@currentToken[:data] = true
_("Unexpected character in DOCTYPE.")})
@state = @states[:bogusDoctype]
end
return true
end
def doctypePublicIdentifierDoubleQuotedState
data = @stream.char
if data == "\""
@state = @states[:afterDoctypePublicIdentifier]
elsif data == :EOF
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected end of file in DOCTYPE.")})
@currentToken[:correct] = false
@tokenQueue.push(@currentToken)
@state = @states[:data]
else
@currentToken[:publicId] += data
end
return true
end
def doctypePublicIdentifierSingleQuotedState
data = @stream.char
if data == "'"
@state = @states[:afterDoctypePublicIdentifier]
elsif data == :EOF
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected end of file in DOCTYPE.")})
@currentToken[:correct] = false
@tokenQueue.push(@currentToken)
@state = @states[:data]
else
@currentToken[:publicId] += data
end
return true
end
def afterDoctypePublicIdentifierState
data = @stream.char
if SPACE_CHARACTERS.include?(data)
elsif data == "\""
@currentToken[:systemId] = ""
@state = @states[:doctypeSystemIdentifierDoubleQuoted]
elsif data == "'"
@currentToken[:systemId] = ""
@state = @states[:doctypeSystemIdentifierSingleQuoted]
elsif data == ">"
@tokenQueue.push(@currentToken)
@state = @states[:data]
elsif data == :EOF
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected end of file in DOCTYPE.")})
@currentToken[:correct] = false
@tokenQueue.push(@currentToken)
@state = @states[:data]
else
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected character in DOCTYPE.")})
@state = @states[:bogusDoctype]
end
return true
end
def beforeDoctypeSystemIdentifierState
data = @stream.char
if SPACE_CHARACTERS.include?(data)
elsif data == "\""
@currentToken[:systemId] = ""
@state = @states[:doctypeSystemIdentifierDoubleQuoted]
elsif data == "'"
@currentToken[:systemId] = ""
@state = @states[:doctypeSystemIdentifierSingleQuoted]
elsif data == ">"
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected character in DOCTYPE.")})
@currentToken[:correct] = false
@tokenQueue.push(@currentToken)
@state = @states[:data]
elsif data == :EOF
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected end of file in DOCTYPE.")})
@currentToken[:correct] = false
@tokenQueue.push(@currentToken)
@state = @states[:data]
else
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected character in DOCTYPE.")})
@state = @states[:bogusDoctype]
end
return true
end
def doctypeSystemIdentifierDoubleQuotedState
data = @stream.char
if data == "\""
@state = @states[:afterDoctypeSystemIdentifier]
elsif data == :EOF
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected end of file in DOCTYPE.")})
@currentToken[:correct] = false
@tokenQueue.push(@currentToken)
@state = @states[:data]
else
@currentToken[:systemId] += data
end
return true
end
def doctypeSystemIdentifierSingleQuotedState
data = @stream.char
if data == "'"
@state = @states[:afterDoctypeSystemIdentifier]
elsif data == :EOF
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected end of file in DOCTYPE.")})
@currentToken[:correct] = false
@tokenQueue.push(@currentToken)
@state = @states[:data]
else
@currentToken[:systemId] += data
end
return true
end
def afterDoctypeSystemIdentifierState
data = @stream.char
if SPACE_CHARACTERS.include?(data)
elsif data == ">"
@tokenQueue.push(@currentToken)
@state = @states[:data]
elsif data == :EOF
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected end of file in DOCTYPE.")})
@currentToken[:correct] = false
@tokenQueue.push(@currentToken)
@state = @states[:data]
else
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected character in DOCTYPE.")})
@state = @states[:bogusDoctype]
end
return true
@ -839,6 +1031,7 @@ module HTML5lib
@stream.queue.push(data)
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected end of file in bogus doctype.")})
@currentToken[:correct] = false
@tokenQueue.push(@currentToken)
@state = @states[:data]
end

View file

@ -27,6 +27,9 @@ module HTML5lib
childNodes << node
hpricot.children << node.hpricot
end
if (oldparent = node.hpricot.parent) != nil
oldparent.children.delete_at(oldparent.children.index(node.hpricot))
end
node.hpricot.parent = hpricot
node.parent = self
end

View file

@ -9,7 +9,7 @@ module HTML5lib
def node_details(node)
case node
when ::Hpricot::Elem
if !node.name
if node.name.empty?
[:DOCUMENT_FRAGMENT]
else
[:ELEMENT, node.name,