Sync with latest HTML5lib

This commit is contained in:
Jacques Distler 2007-06-22 03:12:08 -05:00
parent bf572e295f
commit 8e92e4a3ab
41 changed files with 1334 additions and 564 deletions

View file

@ -5,7 +5,7 @@ module HTML5lib
handle_start 'html', 'head'
handle_end 'html'
handle_end %w( html head body br ) => 'ImplyHead'
def processEOF
startTagHead('head', {})
@ -28,7 +28,7 @@ module HTML5lib
@parser.phase.processStartTag(name, attributes)
end
def endTagHtml(name)
def endTagImplyHead(name)
startTagHead('head', {})
@parser.phase.processEndTag(name)
end
@ -38,4 +38,4 @@ module HTML5lib
end
end
end
end

View file

@ -5,15 +5,20 @@ module HTML5lib
# http://www.whatwg.org/specs/web-apps/current-work/#in-body
handle_start 'html', 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image'
handle_start 'html'
handle_start %w( base link meta script style ) => 'ProcessInHead'
handle_start 'title'
handle_start 'input', 'textarea', 'select', 'isindex', %w( script style ), %w( marquee object )
handle_start 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image'
handle_start %w( li dd dt ) => 'ListItem', %w( base link meta title ) => 'FromHead'
handle_start 'input', 'textarea', 'select', 'isindex', %w( marquee object )
handle_start %w( li dd dt ) => 'ListItem'
handle_start %w( address blockquote center dir div dl fieldset listing menu ol p pre ul ) => 'CloseP'
handle_start %w( b big em font i nobr s small strike strong tt u ) => 'Formatting'
handle_start %w( b big em font i s small strike strong tt u ) => 'Formatting'
handle_start 'nobr'
handle_start %w( area basefont bgsound br embed img param spacer wbr ) => 'VoidFormatting'
@ -33,7 +38,9 @@ module HTML5lib
handle_end %w( head frameset select optgroup option table caption colgroup col thead tfoot tbody tr td th ) => 'Misplaced'
handle_end %w( area basefont bgsound br embed hr image img input isindex param spacer wbr frame ) => 'None'
handle_end 'br'
handle_end %w( area basefont bgsound embed hr image img input isindex param spacer wbr frame ) => 'None'
handle_end %w( noframes noscript noembed textarea xmp iframe ) => 'CdataTextAreaXmp'
@ -73,11 +80,11 @@ module HTML5lib
@tree.insertText(data)
end
def startTagScriptStyle(name, attributes)
def startTagProcessInHead(name, attributes)
@parser.phases[:inHead].processStartTag(name, attributes)
end
def startTagFromHead(name, attributes)
def startTagTitle(name, attributes)
@parser.parseError(_("Unexpected start tag (#{name}) that belongs in the head. Moved."))
@parser.phases[:inHead].processStartTag(name, attributes)
end
@ -120,7 +127,12 @@ module HTML5lib
@tree.openElements.reverse.each_with_index do |node, i|
if stopName.include?(node.name)
(i + 1).times { @tree.openElements.pop }
poppedNodes = (0..i).collect { @tree.openElements.pop }
if i >= 1
@parser.parseError("Missing end tag%s (%s)" % [
(i>1 ? 's' : ''),
poppedNodes.reverse.map {|item| item.name}.join(', ')])
end
break
end
@ -142,15 +154,19 @@ module HTML5lib
def startTagHeading(name, attributes)
endTagP('p') if in_scope?('p')
HEADING_ELEMENTS.each do |element|
if in_scope?(element)
@parser.parseError(_("Unexpected start tag (#{name})."))
remove_open_elements_until { |element| HEADING_ELEMENTS.include?(element.name) }
break
end
end
# Uncomment the following for IE7 behavior:
# HEADING_ELEMENTS.each do |element|
# if in_scope?(element)
# @parser.parseError(_("Unexpected start tag (#{name})."))
#
# remove_open_elements_until do |element|
# HEADING_ELEMENTS.include?(element.name)
# end
#
# break
# end
# end
@tree.insertElement(name, attributes)
end
@ -170,6 +186,12 @@ module HTML5lib
addFormattingElement(name, attributes)
end
def startTagNobr(name, attributes)
@tree.reconstructActiveFormattingElements
processEndTag('nobr') if in_scope?('nobr')
addFormattingElement(name, attributes)
end
def startTagButton(name, attributes)
if in_scope?('button')
@parser.parseError(_('Unexpected start tag (button) implied end tag (button).'))
@ -497,6 +519,13 @@ module HTML5lib
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
end
def endTagBr(name)
@parser.parseError(_("Unexpected end tag (br). Treated as br element."))
@tree.reconstructActiveFormattingElements
@tree.insertElement(name, {})
@tree.openElements.pop()
end
def endTagNone(name)
# This handles elements with no end tag.
@parser.parseError(_("This tag (#{name}) has no end tag"))

View file

@ -5,7 +5,9 @@ module HTML5lib
handle_start 'html', 'head', 'title', 'style', 'script', %w( base link meta )
handle_end 'head', 'html', %w( title style script )
handle_end 'head'
handle_end %w( html body br ) => 'ImplyAfterHead'
handle_end %w( title style script )
def processEOF
if ['title', 'style', 'script'].include?(name = @tree.openElements[-1].name)
@ -63,7 +65,11 @@ module HTML5lib
def startTagBaseLinkMeta(name, attributes)
element = @tree.createElement(name, attributes)
appendToHead(element)
if @tree.headPointer != nil and @parser.phase == @parser.phases[:inHead]
appendToHead(element)
else
@tree.openElements[-1].appendChild(element)
end
end
def startTagOther(name, attributes)
@ -80,7 +86,7 @@ module HTML5lib
@parser.phase = @parser.phases[:afterHead]
end
def endTagHtml(name)
def endTagImplyAfterHead(name)
anythingElse
@parser.phase.processEndTag(name)
end
@ -117,4 +123,4 @@ module HTML5lib
end
end
end
end

View file

@ -89,10 +89,10 @@ module HTML5lib
def endTagOther(name)
@parser.parseError(_("Unexpected end tag (#{name}) in table context caused voodoo mode."))
# Make all the special element rearranging voodoo kick in
@parser.insertFromTable = true
@tree.insertFromTable = true
# Process the end tag in the "in body" mode
@parser.phases[:inBody].processEndTag(name)
@parser.insertFromTable = false
@tree.insertFromTable = false
end
protected

View file

@ -17,9 +17,95 @@ module HTML5lib
@tree.insertComment(data, @tree.document)
end
def processDoctype(name, error)
@parser.parseError(_('Erroneous DOCTYPE.')) if error
def processDoctype(name, publicId, systemId, correct)
if name.downcase != 'html' or publicId or systemId
@parser.parseError(_('Erroneous DOCTYPE.'))
end
# XXX need to update DOCTYPE tokens
@tree.insertDoctype(name)
publicId = publicId.to_s.upcase
if name.downcase != 'html'
# XXX quirks mode
else
if ["+//silmaril//dtd html pro v0r11 19970101//en",
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en",
"-//as//dtd html 3.0 aswedit + extensions//en",
"-//ietf//dtd html 2.0 level 1//en",
"-//ietf//dtd html 2.0 level 2//en",
"-//ietf//dtd html 2.0 strict level 1//en",
"-//ietf//dtd html 2.0 strict level 2//en",
"-//ietf//dtd html 2.0 strict//en",
"-//ietf//dtd html 2.0//en",
"-//ietf//dtd html 2.1e//en",
"-//ietf//dtd html 3.0//en",
"-//ietf//dtd html 3.0//en//",
"-//ietf//dtd html 3.2 final//en",
"-//ietf//dtd html 3.2//en",
"-//ietf//dtd html 3//en",
"-//ietf//dtd html level 0//en",
"-//ietf//dtd html level 0//en//2.0",
"-//ietf//dtd html level 1//en",
"-//ietf//dtd html level 1//en//2.0",
"-//ietf//dtd html level 2//en",
"-//ietf//dtd html level 2//en//2.0",
"-//ietf//dtd html level 3//en",
"-//ietf//dtd html level 3//en//3.0",
"-//ietf//dtd html strict level 0//en",
"-//ietf//dtd html strict level 0//en//2.0",
"-//ietf//dtd html strict level 1//en",
"-//ietf//dtd html strict level 1//en//2.0",
"-//ietf//dtd html strict level 2//en",
"-//ietf//dtd html strict level 2//en//2.0",
"-//ietf//dtd html strict level 3//en",
"-//ietf//dtd html strict level 3//en//3.0",
"-//ietf//dtd html strict//en",
"-//ietf//dtd html strict//en//2.0",
"-//ietf//dtd html strict//en//3.0",
"-//ietf//dtd html//en",
"-//ietf//dtd html//en//2.0",
"-//ietf//dtd html//en//3.0",
"-//metrius//dtd metrius presentational//en",
"-//microsoft//dtd internet explorer 2.0 html strict//en",
"-//microsoft//dtd internet explorer 2.0 html//en",
"-//microsoft//dtd internet explorer 2.0 tables//en",
"-//microsoft//dtd internet explorer 3.0 html strict//en",
"-//microsoft//dtd internet explorer 3.0 html//en",
"-//microsoft//dtd internet explorer 3.0 tables//en",
"-//netscape comm. corp.//dtd html//en",
"-//netscape comm. corp.//dtd strict html//en",
"-//o'reilly and associates//dtd html 2.0//en",
"-//o'reilly and associates//dtd html extended 1.0//en",
"-//spyglass//dtd html 2.0 extended//en",
"-//sq//dtd html 2.0 hotmetal + extensions//en",
"-//sun microsystems corp.//dtd hotjava html//en",
"-//sun microsystems corp.//dtd hotjava strict html//en",
"-//w3c//dtd html 3 1995-03-24//en",
"-//w3c//dtd html 3.2 draft//en",
"-//w3c//dtd html 3.2 final//en",
"-//w3c//dtd html 3.2//en",
"-//w3c//dtd html 3.2s draft//en",
"-//w3c//dtd html 4.0 frameset//en",
"-//w3c//dtd html 4.0 transitional//en",
"-//w3c//dtd html experimental 19960712//en",
"-//w3c//dtd html experimental 970421//en",
"-//w3c//dtd w3 html//en",
"-//w3o//dtd w3 html 3.0//en",
"-//w3o//dtd w3 html 3.0//en//",
"-//w3o//dtd w3 html strict 3.0//en//",
"-//webtechs//dtd mozilla html 2.0//en",
"-//webtechs//dtd mozilla html//en",
"-/w3c/dtd html 4.0 transitional/en",
"html"].include?(publicId) or
(systemId == nil and
["-//w3c//dtd html 4.01 frameset//EN",
"-//w3c//dtd html 4.01 transitional//EN"].include?(publicId)) or
(systemId == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")
#XXX quirks mode
end
end
@parser.phase = @parser.phases[:rootElement]
end
@ -46,4 +132,4 @@ module HTML5lib
end
end
end
end

View file

@ -101,7 +101,7 @@ module HTML5lib
@tree.insertComment(data, @tree.openElements[-1])
end
def processDoctype(name, error)
def processDoctype(name, publicId, systemId, correct)
@parser.parseError(_('Unexpected DOCTYPE. Ignored.'))
end