Ruby 1.9 Compatibility

Completely removed the html5lib sanitizer.
Fixed the string-handling to work in both
Ruby 1.8.x and 1.9.2. There are still,
inexplicably, two functional tests that
fail. But the rest seems to work quite well.
This commit is contained in:
Jacques Distler 2009-11-30 16:28:18 -06:00
parent 79c8572053
commit a6429f8c22
142 changed files with 519 additions and 843 deletions

View file

@ -0,0 +1,46 @@
require 'html5/html5parser/phase'
module HTML5
class AfterBodyPhase < Phase
handle_end 'html'
def processComment(data)
# This is needed because data is to be appended to the <html> element
# here and not to whatever is currently open.
@tree.insert_comment(data, @tree.open_elements.first)
end
def processCharacters(data)
parse_error("unexpected-char-after-body")
@parser.phase = @parser.phases[:inBody]
@parser.phase.processCharacters(data)
end
def processStartTag(name, attributes)
parse_error("unexpected-start-tag-after-body", {"name" => name})
@parser.phase = @parser.phases[:inBody]
@parser.phase.processStartTag(name, attributes)
end
def endTagHtml(name)
if @parser.inner_html
parse_error "end-html-in-innerhtml"
else
# XXX: This may need to be done, not sure
# Don't set last_phase to the current phase but to the inBody phase
# instead. No need for extra parse errors if there's something after </html>.
# Try "<!doctype html>X</html>X" for instance.
@parser.last_phase = @parser.phase
@parser.phase = @parser.phases[:trailingEnd]
end
end
def endTagOther(name)
parse_error("unexpected-end-tag-after-body", {"name" => name})
@parser.phase = @parser.phases[:inBody]
@parser.phase.processEndTag(name)
end
end
end

View file

@ -0,0 +1,33 @@
require 'html5/html5parser/phase'
module HTML5
class AfterFramesetPhase < Phase
# http://www.whatwg.org/specs/web-apps/current-work/#after3
handle_start 'html', 'noframes'
handle_end 'html'
def processCharacters(data)
parse_error("unexpected-char-after-frameset")
end
def startTagNoframes(name, attributes)
@parser.phases[:inBody].processStartTag(name, attributes)
end
def startTagOther(name, attributes)
parse_error("unexpected-start-tag-after-frameset", {"name" => name})
end
def endTagHtml(name)
@parser.last_phase = @parser.phase
@parser.phase = @parser.phases[:trailingEnd]
end
def endTagOther(name)
parse_error("unexpected-end-tag-after-frameset", {"name" => name})
end
end
end

View file

@ -0,0 +1,50 @@
require 'html5/html5parser/phase'
module HTML5
class AfterHeadPhase < Phase
handle_start 'html', 'body', 'frameset', %w( base link meta script style title ) => 'FromHead'
def process_eof
anything_else
@parser.phase.process_eof
end
def processCharacters(data)
anything_else
@parser.phase.processCharacters(data)
end
def startTagBody(name, attributes)
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inBody]
end
def startTagFrameset(name, attributes)
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inFrameset]
end
def startTagFromHead(name, attributes)
parse_error("unexpected-start-tag-out-of-my-head", {"name" => name})
@parser.phase = @parser.phases[:inHead]
@parser.phase.processStartTag(name, attributes)
end
def startTagOther(name, attributes)
anything_else
@parser.phase.processStartTag(name, attributes)
end
def processEndTag(name)
anything_else
@parser.phase.processEndTag(name)
end
def anything_else
@tree.insert_element('body', {})
@parser.phase = @parser.phases[:inBody]
end
end
end

View file

@ -0,0 +1,41 @@
require 'html5/html5parser/phase'
module HTML5
class BeforeHeadPhase < Phase
handle_start 'html', 'head'
handle_end %w( html head body br p ) => 'ImplyHead'
def process_eof
startTagHead('head', {})
@parser.phase.process_eof
end
def processCharacters(data)
startTagHead('head', {})
@parser.phase.processCharacters(data)
end
def startTagHead(name, attributes)
@tree.insert_element(name, attributes)
@tree.head_pointer = @tree.open_elements[-1]
@parser.phase = @parser.phases[:inHead]
end
def startTagOther(name, attributes)
startTagHead('head', {})
@parser.phase.processStartTag(name, attributes)
end
def endTagImplyHead(name)
startTagHead('head', {})
@parser.phase.processEndTag(name)
end
def endTagOther(name)
parse_error("end-tag-after-implied-root", {"name" => name})
end
end
end

View file

@ -0,0 +1,609 @@
require 'html5/html5parser/phase'
module HTML5
class InBodyPhase < Phase
# http://www.whatwg.org/specs/web-apps/current-work/#in-body
handle_start 'html'
handle_start %w(base link meta script style) => 'ProcessInHead'
handle_start 'title'
handle_start 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image'
handle_start 'input', 'textarea', 'select', 'isindex', %w(marquee object)
handle_start %w(li dd dt) => 'ListItem'
handle_start %w(address blockquote center dir div dl fieldset listing menu ol p pre ul) => 'CloseP'
handle_start %w(b big em font i s small strike strong tt u) => 'Formatting'
handle_start 'nobr'
handle_start %w(area basefont bgsound br embed img param spacer wbr) => 'VoidFormatting'
handle_start %w(iframe noembed noframes noscript) => 'Cdata', HEADING_ELEMENTS => 'Heading'
handle_start %w(caption col colgroup frame frameset head option optgroup tbody td tfoot th thead tr) => 'Misplaced'
handle_start %w(event-source section nav article aside header footer datagrid command) => 'New'
handle_end 'p', 'body', 'html', 'form', %w(button marquee object), %w(dd dt li) => 'ListItem'
handle_end %w(address blockquote center div dl fieldset listing menu ol pre ul) => 'Block'
handle_end HEADING_ELEMENTS => 'Heading'
handle_end %w(a b big em font i nobr s small strike strong tt u) => 'Formatting'
handle_end %w(head frameset select optgroup option table caption colgroup col thead tfoot tbody tr td th) => 'Misplaced'
handle_end 'br'
handle_end %w(area basefont bgsound embed hr image img input isindex param spacer wbr frame) => 'None'
handle_end %w(noframes noscript noembed textarea xmp iframe ) => 'CdataTextAreaXmp'
handle_end %w(event-source section nav article aside header footer datagrid command) => 'New'
def initialize(parser, tree)
super(parser, tree)
# for special handling of whitespace in <pre>
class << self
alias processSpaceCharactersNonPre processSpaceCharacters
end
end
def processSpaceCharactersDropNewline(data)
# #Sometimes (start of <pre> blocks) we want to drop leading newlines
class << self
remove_method :processSpaceCharacters rescue nil
alias processSpaceCharacters processSpaceCharactersNonPre
end
if (data.length > 0 and data[0] == ?\n &&
%w[pre textarea].include?(@tree.open_elements.last.name) && !@tree.open_elements.last.hasContent)
data = data[1..-1]
end
if data.length > 0
@tree.reconstructActiveFormattingElements
@tree.insertText(data)
end
end
def processSpaceCharacters(data)
@tree.reconstructActiveFormattingElements()
@tree.insertText(data)
end
def processCharacters(data)
# XXX The specification says to do this for every character at the
# moment, but apparently that doesn't match the real world so we don't
# do it for space characters.
@tree.reconstructActiveFormattingElements
@tree.insertText(data)
end
def startTagProcessInHead(name, attributes)
@parser.phases[:inHead].processStartTag(name, attributes)
end
def startTagTitle(name, attributes)
parse_error("unexpected-start-tag-out-of-my-head", {"name" => name})
@parser.phases[:inHead].processStartTag(name, attributes)
end
def startTagBody(name, attributes)
parse_error("unexpected-start-tag", {"name" => "body"})
if @tree.open_elements.length == 1 || @tree.open_elements[1].name != 'body'
assert @parser.inner_html
else
attributes.each do |attr, value|
unless @tree.open_elements[1].attributes.has_key?(attr)
@tree.open_elements[1].attributes[attr] = value
end
end
end
end
def startTagCloseP(name, attributes)
endTagP('p') if in_scope?('p')
@tree.insert_element(name, attributes)
if name == 'pre'
class << self
remove_method :processSpaceCharacters rescue nil
alias processSpaceCharacters processSpaceCharactersDropNewline
end
end
end
def startTagForm(name, attributes)
if @tree.formPointer
parse_error("unexpected-start-tag", {"name" => name})
else
endTagP('p') if in_scope?('p')
@tree.insert_element(name, attributes)
@tree.formPointer = @tree.open_elements.last
end
end
def startTagListItem(name, attributes)
endTagP('p') if in_scope?('p')
stopNames = {'li' => ['li'], 'dd' => ['dd', 'dt'], 'dt' => ['dd', 'dt']}
stopName = stopNames[name]
@tree.open_elements.reverse.each_with_index do |node, i|
if stopName.include?(node.name)
poppedNodes = (0..i).collect { @tree.open_elements.pop }
if i >= 1
parse_error(
i == 1 ? "missing-end-tag" : "missing-end-tags",
{"name" => poppedNodes[0..-1].collect{|n| n.name}.join(", ")})
end
break
end
# Phrasing elements are all non special, non scoping, non
# formatting elements
break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) && !%w[address div].include?(node.name))
end
# Always insert an <li> element.
@tree.insert_element(name, attributes)
end
def startTagPlaintext(name, attributes)
endTagP('p') if in_scope?('p')
@tree.insert_element(name, attributes)
@parser.tokenizer.content_model_flag = :PLAINTEXT
end
def startTagHeading(name, attributes)
endTagP('p') if in_scope?('p')
# Uncomment the following for IE7 behavior:
# HEADING_ELEMENTS.each do |element|
# if in_scope?(element)
# parse_error("unexpected-start-tag", {"name" => name})
#
# remove_open_elements_until do |element|
# HEADING_ELEMENTS.include?(element.name)
# end
#
# break
# end
# end
@tree.insert_element(name, attributes)
end
def startTagA(name, attributes)
if afeAElement = @tree.elementInActiveFormattingElements('a')
parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "a", "endName" => "a"})
endTagFormatting('a')
@tree.open_elements.delete(afeAElement) if @tree.open_elements.include?(afeAElement)
@tree.activeFormattingElements.delete(afeAElement) if @tree.activeFormattingElements.include?(afeAElement)
end
@tree.reconstructActiveFormattingElements
addFormattingElement(name, attributes)
end
def startTagFormatting(name, attributes)
@tree.reconstructActiveFormattingElements
addFormattingElement(name, attributes)
end
def startTagNobr(name, attributes)
@tree.reconstructActiveFormattingElements
if in_scope?('nobr')
parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "nobr", "endName" => "nobr"})
processEndTag('nobr')
# XXX Need tests that trigger the following
@tree.reconstructActiveFormattingElements
end
addFormattingElement(name, attributes)
end
def startTagButton(name, attributes)
if in_scope?('button')
parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "button", "endName" => "button"})
processEndTag('button')
@parser.phase.processStartTag(name, attributes)
else
@tree.reconstructActiveFormattingElements
@tree.insert_element(name, attributes)
@tree.activeFormattingElements.push(Marker)
end
end
def startTagMarqueeObject(name, attributes)
@tree.reconstructActiveFormattingElements
@tree.insert_element(name, attributes)
@tree.activeFormattingElements.push(Marker)
end
def startTagXmp(name, attributes)
@tree.reconstructActiveFormattingElements
@tree.insert_element(name, attributes)
@parser.tokenizer.content_model_flag = :CDATA
end
def startTagTable(name, attributes)
processEndTag('p') if in_scope?('p')
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inTable]
end
def startTagVoidFormatting(name, attributes)
@tree.reconstructActiveFormattingElements
@tree.insert_element(name, attributes)
@tree.open_elements.pop
end
def startTagHr(name, attributes)
endTagP('p') if in_scope?('p')
@tree.insert_element(name, attributes)
@tree.open_elements.pop
end
def startTagImage(name, attributes)
# No really...
parse_error("unexpected-start-tag-treated-as", {"originalName" => "image", "newName" => "img"})
processStartTag('img', attributes)
end
def startTagInput(name, attributes)
@tree.reconstructActiveFormattingElements
@tree.insert_element(name, attributes)
if @tree.formPointer
# XXX Not exactly sure what to do here
# @tree.open_elements[-1].form = @tree.formPointer
end
@tree.open_elements.pop
end
def startTagIsindex(name, attributes)
parse_error("deprecated-tag", {"name" => "isindex"})
return if @tree.formPointer
processStartTag('form', {})
processStartTag('hr', {})
processStartTag('p', {})
processStartTag('label', {})
# XXX Localization ...
processCharacters('This is a searchable index. Insert your search keywords here: ')
attributes['name'] = 'isindex'
attrs = attributes.to_a
processStartTag('input', attributes)
processEndTag('label')
processEndTag('p')
processStartTag('hr', {})
processEndTag('form')
end
def startTagTextarea(name, attributes)
# XXX Form element pointer checking here as well...
@tree.insert_element(name, attributes)
@parser.tokenizer.content_model_flag = :RCDATA
class << self
remove_method :processSpaceCharacters rescue nil
alias processSpaceCharacters processSpaceCharactersDropNewline
end
end
# iframe, noembed noframes, noscript(if scripting enabled)
def startTagCdata(name, attributes)
@tree.insert_element(name, attributes)
@parser.tokenizer.content_model_flag = :CDATA
end
def startTagSelect(name, attributes)
@tree.reconstructActiveFormattingElements
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inSelect]
end
def startTagMisplaced(name, attributes)
# Elements that should be children of other elements that have a
# different insertion mode; here they are ignored
# "caption", "col", "colgroup", "frame", "frameset", "head",
# "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
# "tr", "noscript"
parse_error("unexpected-start-tag-ignored", {"name" => name})
end
def startTagNew(name, attributes)
# New HTML5 elements, "event-source", "section", "nav",
# "article", "aside", "header", "footer", "datagrid", "command"
# $stderr.puts("Warning: Undefined behaviour for start tag #{name}")
startTagOther(name, attributes)
#raise NotImplementedError
end
def startTagOther(name, attributes)
@tree.reconstructActiveFormattingElements
@tree.insert_element(name, attributes)
end
def endTagP(name)
@tree.generateImpliedEndTags('p') if in_scope?('p')
parse_error("unexpected-end-tag", {"name" => "p"}) unless @tree.open_elements.last.name == 'p'
if in_scope?('p')
@tree.open_elements.pop while in_scope?('p')
else
startTagCloseP('p', {})
endTagP('p')
end
end
def endTagBody(name)
# XXX Need to take open <p> tags into account here. We shouldn't imply
# </p> but we should not throw a parse error either. Specification is
# likely to be updated.
unless @tree.open_elements[1] && @tree.open_elements[1].name == 'body'
# inner_html case
parse_error "unexpected-end-tag", {:name => 'body'}
return
end
unless @tree.open_elements.last.name == 'body'
parse_error("expected-one-end-tag-but-got-another",
{"expectedName" => "body",
"gotName" => @tree.open_elements.last.name})
end
@parser.phase = @parser.phases[:afterBody]
end
def endTagHtml(name)
endTagBody(name)
@parser.phase.processEndTag(name) unless @parser.inner_html
end
def endTagBlock(name)
@tree.generateImpliedEndTags if in_scope?(name)
unless @tree.open_elements.last.name == name
parse_error("end-tag-too-early", {"name" => name})
end
if in_scope?(name)
remove_open_elements_until(name)
end
end
def endTagForm(name)
if in_scope?(name)
@tree.generateImpliedEndTags
end
if @tree.open_elements.last.name != name
parse_error("end-tag-too-early-ignored", {"name" => "form"})
else
@tree.open_elements.pop
end
@tree.formPointer = nil
end
def endTagListItem(name)
# AT Could merge this with the Block case
@tree.generateImpliedEndTags(name) if in_scope?(name)
unless @tree.open_elements.last.name == name
parse_error("end-tag-too-early", {"name" => name})
end
remove_open_elements_until(name) if in_scope?(name)
end
def endTagHeading(name)
HEADING_ELEMENTS.each do |element|
if in_scope?(element)
@tree.generateImpliedEndTags
break
end
end
unless @tree.open_elements.last.name == name
parse_error("end-tag-too-early", {"name" => name})
end
HEADING_ELEMENTS.each do |element|
if in_scope?(element)
remove_open_elements_until {|element| HEADING_ELEMENTS.include?(element.name)}
break
end
end
end
# The much-feared adoption agency algorithm
def endTagFormatting(name)
# http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
# XXX Better parse_error messages appreciated.
while true
# Step 1 paragraph 1
afeElement = @tree.elementInActiveFormattingElements(name)
if !afeElement or (@tree.open_elements.include?(afeElement) && !in_scope?(afeElement.name))
parse_error("adoption-agency-1.1", {"name" => name})
return
# Step 1 paragraph 2
elsif not @tree.open_elements.include?(afeElement)
parse_error("adoption-agency-1.2", {"name" => name})
@tree.activeFormattingElements.delete(afeElement)
return
end
# Step 1 paragraph 3
if afeElement != @tree.open_elements.last
parse_error("adoption-agency-1.3", {"name" => name})
end
# Step 2
# Start of the adoption agency algorithm proper
afeIndex = @tree.open_elements.index(afeElement)
furthestBlock = nil
@tree.open_elements[afeIndex..-1].each do |element|
if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(element.name)
furthestBlock = element
break
end
end
# Step 3
if furthestBlock.nil?
element = remove_open_elements_until {|element| element == afeElement }
@tree.activeFormattingElements.delete(element)
return
end
commonAncestor = @tree.open_elements[afeIndex - 1]
# Step 5
furthestBlock.parent.removeChild(furthestBlock) if furthestBlock.parent
# Step 6
# The bookmark is supposed to help us identify where to reinsert
# nodes in step 12. We have to ensure that we reinsert nodes after
# the node before the active formatting element. Note the bookmark
# can move in step 7.4
bookmark = @tree.activeFormattingElements.index(afeElement)
# Step 7
lastNode = node = furthestBlock
while true
# AT replace this with a function and recursion?
# Node is element before node in open elements
node = @tree.open_elements[@tree.open_elements.index(node) - 1]
until @tree.activeFormattingElements.include?(node)
tmpNode = node
node = @tree.open_elements[@tree.open_elements.index(node) - 1]
@tree.open_elements.delete(tmpNode)
end
# Step 7.3
break if node == afeElement
# Step 7.4
if lastNode == furthestBlock
# XXX should this be index(node) or index(node)+1
# Anne: I think +1 is ok. Given x = [2,3,4,5]
# x.index(3) gives 1 and then x[1 +1] gives 4...
bookmark = @tree.activeFormattingElements.index(node) + 1
end
# Step 7.5
cite = node.parent
if node.hasContent
clone = node.cloneNode
# Replace node with clone
@tree.activeFormattingElements[@tree.activeFormattingElements.index(node)] = clone
@tree.open_elements[@tree.open_elements.index(node)] = clone
node = clone
end
# Step 7.6
# Remove lastNode from its parents, if any
lastNode.parent.removeChild(lastNode) if lastNode.parent
node.appendChild(lastNode)
# Step 7.7
lastNode = node
# End of inner loop
end
# Step 8
lastNode.parent.removeChild(lastNode) if lastNode.parent
commonAncestor.appendChild(lastNode)
# Step 9
clone = afeElement.cloneNode
# Step 10
furthestBlock.reparentChildren(clone)
# Step 11
furthestBlock.appendChild(clone)
# Step 12
@tree.activeFormattingElements.delete(afeElement)
@tree.activeFormattingElements.insert([bookmark,@tree.activeFormattingElements.length].min, clone)
# Step 13
@tree.open_elements.delete(afeElement)
@tree.open_elements.insert(@tree.open_elements.index(furthestBlock) + 1, clone)
end
end
def endTagButtonMarqueeObject(name)
@tree.generateImpliedEndTags if in_scope?(name)
unless @tree.open_elements.last.name == name
parse_error("end-tag-too-early", {"name" => name})
end
if in_scope?(name)
remove_open_elements_until(name)
@tree.clearActiveFormattingElements
end
end
def endTagMisplaced(name)
# This handles elements with end tags in other insertion modes.
parse_error("unexpected-end-tag", {"name" => name})
end
def endTagBr(name)
parse_error("unexpected-end-tag-treated-as",
{"originalName" => "br", "newName" => "br element"})
@tree.reconstructActiveFormattingElements
@tree.insert_element(name, {})
@tree.open_elements.pop()
end
def endTagNone(name)
# This handles elements with no end tag.
parse_error("no-end-tag", {"name" => name})
end
def endTagCdataTextAreaXmp(name)
if @tree.open_elements.last.name == name
@tree.open_elements.pop
else
parse_error("unexpected-end-tag", {"name" => name})
end
end
def endTagNew(name)
# New HTML5 elements, "event-source", "section", "nav",
# "article", "aside", "header", "footer", "datagrid", "command"
# STDERR.puts "Warning: Undefined behaviour for end tag #{name}"
endTagOther(name)
#raise NotImplementedError
end
def endTagOther(name)
# XXX This logic should be moved into the treebuilder
@tree.open_elements.reverse.each do |node|
if node.name == name
@tree.generateImpliedEndTags
unless @tree.open_elements.last.name == name
parse_error("unexpected-end-tag", {"name" => name})
end
remove_open_elements_until {|element| element == node }
break
else
if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name)
parse_error("unexpected-end-tag", {"name" => name})
break
end
end
end
end
protected
def addFormattingElement(name, attributes)
@tree.insert_element(name, attributes)
@tree.activeFormattingElements.push(@tree.open_elements.last)
end
end
end

View file

@ -0,0 +1,69 @@
require 'html5/html5parser/phase'
module HTML5
class InCaptionPhase < Phase
# http://www.whatwg.org/specs/web-apps/current-work/#in-caption
handle_start 'html', %w(caption col colgroup tbody td tfoot th thead tr) => 'TableElement'
handle_end 'caption', 'table', %w(body col colgroup html tbody td tfoot th thead tr) => 'Ignore'
def ignoreEndTagCaption
!in_scope?('caption', true)
end
def processCharacters(data)
@parser.phases[:inBody].processCharacters(data)
end
def startTagTableElement(name, attributes)
parse_error "unexpected-end-tag", {"name" => name}
#XXX Have to duplicate logic here to find out if the tag is ignored
ignoreEndTag = ignoreEndTagCaption
@parser.phase.processEndTag('caption')
@parser.phase.processStartTag(name, attributes) unless ignoreEndTag
end
def startTagOther(name, attributes)
@parser.phases[:inBody].processStartTag(name, attributes)
end
def endTagCaption(name)
if ignoreEndTagCaption
# inner_html case
assert @parser.inner_html
parse_error "unexpected-end-tag", {"name" => name}
else
# AT this code is quite similar to endTagTable in "InTable"
@tree.generateImpliedEndTags
unless @tree.open_elements[-1].name == 'caption'
parse_error("expected-one-end-tag-but-got-another",
{"gotName" => "caption",
"expectedName" => @tree.open_elements.last.name})
end
remove_open_elements_until('caption')
@tree.clearActiveFormattingElements
@parser.phase = @parser.phases[:inTable]
end
end
def endTagTable(name)
parse_error "unexpected-end-table-in-caption"
ignoreEndTag = ignoreEndTagCaption
@parser.phase.processEndTag('caption')
@parser.phase.processEndTag(name) unless ignoreEndTag
end
def endTagIgnore(name)
parse_error("unexpected-end-tag", {"name" => name})
end
def endTagOther(name)
@parser.phases[:inBody].processEndTag(name)
end
end
end

View file

@ -0,0 +1,78 @@
require 'html5/html5parser/phase'
module HTML5
class InCellPhase < Phase
# http://www.whatwg.org/specs/web-apps/current-work/#in-cell
handle_start 'html', %w( caption col colgroup tbody td tfoot th thead tr ) => 'TableOther'
handle_end %w( td th ) => 'TableCell', %w( body caption col colgroup html ) => 'Ignore'
handle_end %w( table tbody tfoot thead tr ) => 'Imply'
def processCharacters(data)
@parser.phases[:inBody].processCharacters(data)
end
def startTagTableOther(name, attributes)
if in_scope?('td', true) or in_scope?('th', true)
closeCell
@parser.phase.processStartTag(name, attributes)
else
# inner_html case
parse_error
end
end
def startTagOther(name, attributes)
@parser.phases[:inBody].processStartTag(name, attributes)
end
def endTagTableCell(name)
if in_scope?(name, true)
@tree.generateImpliedEndTags(name)
if @tree.open_elements.last.name != name
parse_error("unexpected-cell-end-tag", {"name" => name})
remove_open_elements_until(name)
else
@tree.open_elements.pop
end
@tree.clearActiveFormattingElements
@parser.phase = @parser.phases[:inRow]
else
parse_error("unexpected-end-tag", {"name" => name})
end
end
def endTagIgnore(name)
parse_error("unexpected-end-tag", {"name" => name})
end
def endTagImply(name)
if in_scope?(name, true)
closeCell
@parser.phase.processEndTag(name)
else
# sometimes inner_html case
parse_error "unexpected-end-tag", {:name => name}
end
end
def endTagOther(name)
@parser.phases[:inBody].processEndTag(name)
end
protected
def closeCell
if in_scope?('td', true)
endTagTableCell('td')
elsif in_scope?('th', true)
endTagTableCell('th')
end
end
end
end

View file

@ -0,0 +1,55 @@
require 'html5/html5parser/phase'
module HTML5
class InColumnGroupPhase < Phase
# http://www.whatwg.org/specs/web-apps/current-work/#in-column
handle_start 'html', 'col'
handle_end 'colgroup', 'col'
def ignoreEndTagColgroup
@tree.open_elements[-1].name == 'html'
end
def processCharacters(data)
ignoreEndTag = ignoreEndTagColgroup
endTagColgroup("colgroup")
@parser.phase.processCharacters(data) unless ignoreEndTag
end
def startTagCol(name, attributes)
@tree.insert_element(name, attributes)
@tree.open_elements.pop
end
def startTagOther(name, attributes)
ignoreEndTag = ignoreEndTagColgroup
endTagColgroup('colgroup')
@parser.phase.processStartTag(name, attributes) unless ignoreEndTag
end
def endTagColgroup(name)
if ignoreEndTagColgroup
# inner_html case
assert @parser.inner_html
parse_error "unexpected-end-tag", {:name => name}
else
@tree.open_elements.pop
@parser.phase = @parser.phases[:inTable]
end
end
def endTagCol(name)
parse_error("no-end-tag", {"name" => "col"})
end
def endTagOther(name)
ignoreEndTag = ignoreEndTagColgroup
endTagColgroup('colgroup')
@parser.phase.processEndTag(name) unless ignoreEndTag
end
end
end

View file

@ -0,0 +1,56 @@
require 'html5/html5parser/phase'
module HTML5
class InFramesetPhase < Phase
# http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
handle_start 'html', 'frameset', 'frame', 'noframes'
handle_end 'frameset', 'noframes'
def processCharacters(data)
parse_error("unexpected-char-in-frameset")
end
def startTagFrameset(name, attributes)
@tree.insert_element(name, attributes)
end
def startTagFrame(name, attributes)
@tree.insert_element(name, attributes)
@tree.open_elements.pop
end
def startTagNoframes(name, attributes)
@parser.phases[:inBody].processStartTag(name, attributes)
end
def startTagOther(name, attributes)
parse_error("unexpected-start-tag-in-frameset", {"name" => name})
end
def endTagFrameset(name)
if @tree.open_elements.last.name == 'html'
# inner_html case
parse_error("unexpected-frameset-in-frameset-innerhtml")
else
@tree.open_elements.pop
end
if (not @parser.inner_html and
@tree.open_elements.last.name != 'frameset')
# If we're not in inner_html mode and the the current node is not a
# "frameset" element (anymore) then switch.
@parser.phase = @parser.phases[:afterFrameset]
end
end
def endTagNoframes(name)
@parser.phases[:inBody].processEndTag(name)
end
def endTagOther(name)
parse_error("unexpected-end-tag-in-frameset", {"name" => name})
end
end
end

View file

@ -0,0 +1,138 @@
require 'html5/html5parser/phase'
module HTML5
class InHeadPhase < Phase
handle_start 'html', 'head', 'title', 'style', 'script', 'noscript'
handle_start %w( base link meta )
handle_end 'head'
handle_end %w( html body br p ) => 'ImplyAfterHead'
handle_end %w( title style script noscript )
def process_eof
if ['title', 'style', 'script'].include?(name = @tree.open_elements.last.name)
parse_error("expected-named-closing-tag-but-got-eof", {"name" => @tree.open_elements.last.name})
@tree.open_elements.pop
end
anything_else
@parser.phase.process_eof
end
def processCharacters(data)
if %w[title style script noscript].include?(@tree.open_elements.last.name)
@tree.insertText(data)
else
anything_else
@parser.phase.processCharacters(data)
end
end
def startTagHead(name, attributes)
parse_error("two-heads-are-not-better-than-one")
end
def startTagTitle(name, attributes)
element = @tree.createElement(name, attributes)
appendToHead(element)
@tree.open_elements.push(element)
@parser.tokenizer.content_model_flag = :RCDATA
end
def startTagStyle(name, attributes)
element = @tree.createElement(name, attributes)
if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
appendToHead(element)
else
@tree.open_elements.last.appendChild(element)
end
@tree.open_elements.push(element)
@parser.tokenizer.content_model_flag = :CDATA
end
def startTagNoscript(name, attributes)
# XXX Need to decide whether to implement the scripting disabled case.
element = @tree.createElement(name, attributes)
if @tree.head_pointer !=nil and @parser.phase == @parser.phases[:inHead]
appendToHead(element)
else
@tree.open_elements.last.appendChild(element)
end
@tree.open_elements.push(element)
@parser.tokenizer.content_model_flag = :CDATA
end
def startTagScript(name, attributes)
#XXX Inner HTML case may be wrong
element = @tree.createElement(name, attributes)
element._flags.push("parser-inserted")
if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
appendToHead(element)
else
@tree.open_elements.last.appendChild(element)
end
@tree.open_elements.push(element)
@parser.tokenizer.content_model_flag = :CDATA
end
def startTagBaseLinkMeta(name, attributes)
element = @tree.createElement(name, attributes)
if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
appendToHead(element)
else
@tree.open_elements.last.appendChild(element)
end
end
def startTagOther(name, attributes)
anything_else
@parser.phase.processStartTag(name, attributes)
end
def endTagHead(name)
if @tree.open_elements.last.name == 'head'
@tree.open_elements.pop
else
parse_error("unexpected-end-tag", {"name" => "head"})
end
@parser.phase = @parser.phases[:afterHead]
end
def endTagImplyAfterHead(name)
anything_else
@parser.phase.processEndTag(name)
end
def endTagTitleStyleScriptNoscript(name)
if @tree.open_elements.last.name == name
@tree.open_elements.pop
else
parse_error("unexpected-end-tag", {"name" => name})
end
end
def endTagOther(name)
parse_error("unexpected-end-tag", {"name" => name})
end
def anything_else
if @tree.open_elements.last.name == 'head'
endTagHead('head')
else
@parser.phase = @parser.phases[:afterHead]
end
end
protected
def appendToHead(element)
if @tree.head_pointer.nil?
assert @parser.inner_html
@tree.open_elements.last.appendChild(element)
else
@tree.head_pointer.appendChild(element)
end
end
end
end

View file

@ -0,0 +1,88 @@
require 'html5/html5parser/phase'
module HTML5
class InRowPhase < Phase
# http://www.whatwg.org/specs/web-apps/current-work/#in-row
handle_start 'html', %w( td th ) => 'TableCell', %w( caption col colgroup tbody tfoot thead tr ) => 'TableOther'
handle_end 'tr', 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th ) => 'Ignore'
def processCharacters(data)
@parser.phases[:inTable].processCharacters(data)
end
def startTagTableCell(name, attributes)
clearStackToTableRowContext
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inCell]
@tree.activeFormattingElements.push(Marker)
end
def startTagTableOther(name, attributes)
ignoreEndTag = ignoreEndTagTr
endTagTr('tr')
# XXX how are we sure it's always ignored in the inner_html case?
@parser.phase.processStartTag(name, attributes) unless ignoreEndTag
end
def startTagOther(name, attributes)
@parser.phases[:inTable].processStartTag(name, attributes)
end
def endTagTr(name)
if ignoreEndTagTr
# inner_html case
assert @parser.inner_html
parse_error "unexpected-end-tag", {:name => name}
else
clearStackToTableRowContext
@tree.open_elements.pop
@parser.phase = @parser.phases[:inTableBody]
end
end
def endTagTable(name)
ignoreEndTag = ignoreEndTagTr
endTagTr('tr')
# Reprocess the current tag if the tr end tag was not ignored
# XXX how are we sure it's always ignored in the inner_html case?
@parser.phase.processEndTag(name) unless ignoreEndTag
end
def endTagTableRowGroup(name)
if in_scope?(name, true)
endTagTr('tr')
@parser.phase.processEndTag(name)
else
# inner_html case
parse_error "unexpected-end-tag", {:name => name}
end
end
def endTagIgnore(name)
parse_error("unexpected-end-tag-in-table-row",
{"name" => name})
end
def endTagOther(name)
@parser.phases[:inTable].processEndTag(name)
end
protected
# XXX unify this with other table helper methods
def clearStackToTableRowContext
until %w[tr html].include?(name = @tree.open_elements.last.name)
parse_error("unexpected-implied-end-tag-in-table-row", {"name" => @tree.open_elements.last.name})
@tree.open_elements.pop
end
end
def ignoreEndTagTr
not in_scope?('tr', :tableVariant => true)
end
end
end

View file

@ -0,0 +1,85 @@
require 'html5/html5parser/phase'
module HTML5
class InSelectPhase < Phase
# http://www.whatwg.org/specs/web-apps/current-work/#in-select
handle_start 'html', 'option', 'optgroup', 'select'
handle_end 'option', 'optgroup', 'select', %w( caption table tbody tfoot thead tr td th ) => 'TableElements'
def processCharacters(data)
@tree.insertText(data)
end
def startTagOption(name, attributes)
# We need to imply </option> if <option> is the current node.
@tree.open_elements.pop if @tree.open_elements.last.name == 'option'
@tree.insert_element(name, attributes)
end
def startTagOptgroup(name, attributes)
@tree.open_elements.pop if @tree.open_elements.last.name == 'option'
@tree.open_elements.pop if @tree.open_elements.last.name == 'optgroup'
@tree.insert_element(name, attributes)
end
def startTagSelect(name, attributes)
parse_error("unexpected-select-in-select")
endTagSelect('select')
end
def startTagOther(name, attributes)
parse_error("unexpected-start-tag-in-select", {"name" => name})
end
def endTagOption(name)
if @tree.open_elements.last.name == 'option'
@tree.open_elements.pop
else
parse_error("unexpected-end-tag-in-select", {"name" => "option"})
end
end
def endTagOptgroup(name)
# </optgroup> implicitly closes <option>
if @tree.open_elements.last.name == 'option' and @tree.open_elements[-2].name == 'optgroup'
@tree.open_elements.pop
end
# It also closes </optgroup>
if @tree.open_elements.last.name == 'optgroup'
@tree.open_elements.pop
# But nothing else
else
parse_error("unexpected-end-tag-in-select",
{"name" => "optgroup"})
end
end
def endTagSelect(name)
if in_scope?('select', true)
remove_open_elements_until('select')
@parser.reset_insertion_mode
else
# inner_html case
parse_error
end
end
def endTagTableElements(name)
parse_error("unexpected-end-tag-in-select", {"name" => name})
if in_scope?(name, true)
endTagSelect('select')
@parser.phase.processEndTag(name)
end
end
def endTagOther(name)
parse_error("unexpected-end-tag-in-select", {"name" => name})
end
end
end

View file

@ -0,0 +1,84 @@
require 'html5/html5parser/phase'
module HTML5
class InTableBodyPhase < Phase
# http://www.whatwg.org/specs/web-apps/current-work/#in-table0
handle_start 'html', 'tr', %w( td th ) => 'TableCell', %w( caption col colgroup tbody tfoot thead ) => 'TableOther'
handle_end 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th tr ) => 'Ignore'
def processCharacters(data)
@parser.phases[:inTable].processCharacters(data)
end
def startTagTr(name, attributes)
clearStackToTableBodyContext
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inRow]
end
def startTagTableCell(name, attributes)
parse_error("unexpected-cell-in-table-body", {"name" => name})
startTagTr('tr', {})
@parser.phase.processStartTag(name, attributes)
end
def startTagTableOther(name, attributes)
# XXX AT Any ideas on how to share this with endTagTable?
if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
clearStackToTableBodyContext
endTagTableRowGroup(@tree.open_elements.last.name)
@parser.phase.processStartTag(name, attributes)
else
# inner_html case
parse_error "unexpected-start-tag", {:name => name}
end
end
def startTagOther(name, attributes)
@parser.phases[:inTable].processStartTag(name, attributes)
end
def endTagTableRowGroup(name)
if in_scope?(name, true)
clearStackToTableBodyContext
@tree.open_elements.pop
@parser.phase = @parser.phases[:inTable]
else
parse_error("unexpected-end-tag-in-table-body", {"name" => name})
end
end
def endTagTable(name)
if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
clearStackToTableBodyContext
endTagTableRowGroup(@tree.open_elements.last.name)
@parser.phase.processEndTag(name)
else
# inner_html case
parse_error "unexpected-end-tag", {:name => name}
end
end
def endTagIgnore(name)
parse_error("unexpected-end-tag-in-table-body", {"name" => name})
end
def endTagOther(name)
@parser.phases[:inTable].processEndTag(name)
end
protected
def clearStackToTableBodyContext
until %w[tbody tfoot thead html].include?(name = @tree.open_elements.last.name)
parse_error("unexpected-implied-end-tag-in-table",
{"name" => @tree.open_elements.last.name})
@tree.open_elements.pop
end
end
end
end

View file

@ -0,0 +1,115 @@
require 'html5/html5parser/phase'
module HTML5
class InTablePhase < Phase
# http://www.whatwg.org/specs/web-apps/current-work/#in-table
handle_start 'html', 'caption', 'colgroup', 'col', 'table'
handle_start %w( tbody tfoot thead ) => 'RowGroup', %w( td th tr ) => 'ImplyTbody'
handle_end 'table', %w( body caption col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
def processCharacters(data)
parse_error("unexpected-char-implies-table-voodoo")
# Make all the special element rearranging voodoo kick in
@tree.insert_from_table = true
# Process the character in the "in body" mode
@parser.phases[:inBody].processCharacters(data)
@tree.insert_from_table = false
end
def startTagCaption(name, attributes)
clearStackToTableContext
@tree.activeFormattingElements.push(Marker)
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inCaption]
end
def startTagColgroup(name, attributes)
clearStackToTableContext
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inColumnGroup]
end
def startTagCol(name, attributes)
startTagColgroup('colgroup', {})
@parser.phase.processStartTag(name, attributes)
end
def startTagRowGroup(name, attributes)
clearStackToTableContext
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inTableBody]
end
def startTagImplyTbody(name, attributes)
startTagRowGroup('tbody', {})
@parser.phase.processStartTag(name, attributes)
end
def startTagTable(name, attributes)
parse_error("unexpected-start-tag-implies-end-tag",
{"startName" => "table", "endName" => "table"})
@parser.phase.processEndTag('table')
@parser.phase.processStartTag(name, attributes) unless @parser.inner_html
end
def startTagOther(name, attributes)
parse_error("unexpected-start-tag-implies-table-voodoo",
{"name" => name})
# Make all the special element rearranging voodoo kick in
@tree.insert_from_table = true
# Process the start tag in the "in body" mode
@parser.phases[:inBody].processStartTag(name, attributes)
@tree.insert_from_table = false
end
def endTagTable(name)
if in_scope?('table', true)
@tree.generateImpliedEndTags
unless @tree.open_elements.last.name == 'table'
parse_error("end-tag-too-early-named",
{"gotName" => "table",
"expectedName" => @tree.open_elements.last.name})
end
remove_open_elements_until('table')
@parser.reset_insertion_mode
else
# inner_html case
assert @parser.inner_html
parse_error "unexpected-end-tag", {:name => name}
end
end
def endTagIgnore(name)
parse_error("unexpected-end-tag", {"name" => name})
end
def endTagOther(name)
parse_error("unexpected-end-tag-implies-table-voodoo", {"name" => name})
# Make all the special element rearranging voodoo kick in
@tree.insert_from_table = true
# Process the end tag in the "in body" mode
@parser.phases[:inBody].processEndTag(name)
@tree.insert_from_table = false
end
protected
def clearStackToTableContext
# "clear the stack back to a table context"
until %w[table html].include?(name = @tree.open_elements.last.name)
parse_error("unexpected-implied-end-tag-in-table",
{"name" => @tree.open_elements.last.name})
@tree.open_elements.pop
end
# When the current node is <html> it's an inner_html case
end
end
end

View file

@ -0,0 +1,133 @@
require 'html5/html5parser/phase'
module HTML5
class InitialPhase < Phase
# This phase deals with error handling as well which is currently not
# covered in the specification. The error handling is typically known as
# "quirks mode". It is expected that a future version of HTML5 will define this.
def process_eof
parse_error("expected-doctype-but-got-eof")
@parser.phase = @parser.phases[:rootElement]
@parser.phase.process_eof
end
def processComment(data)
@tree.insert_comment(data, @tree.document)
end
def processDoctype(name, publicId, systemId, correct)
if name.downcase != 'html' or publicId or systemId
parse_error("unknown-doctype")
end
# XXX need to update DOCTYPE tokens
@tree.insertDoctype(name, publicId, systemId)
publicId = publicId.to_s.upcase
if name.downcase != 'html'
# XXX quirks mode
else
if ["+//silmaril//dtd html pro v0r11 19970101//en",
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en",
"-//as//dtd html 3.0 aswedit + extensions//en",
"-//ietf//dtd html 2.0 level 1//en",
"-//ietf//dtd html 2.0 level 2//en",
"-//ietf//dtd html 2.0 strict level 1//en",
"-//ietf//dtd html 2.0 strict level 2//en",
"-//ietf//dtd html 2.0 strict//en",
"-//ietf//dtd html 2.0//en",
"-//ietf//dtd html 2.1e//en",
"-//ietf//dtd html 3.0//en",
"-//ietf//dtd html 3.0//en//",
"-//ietf//dtd html 3.2 final//en",
"-//ietf//dtd html 3.2//en",
"-//ietf//dtd html 3//en",
"-//ietf//dtd html level 0//en",
"-//ietf//dtd html level 0//en//2.0",
"-//ietf//dtd html level 1//en",
"-//ietf//dtd html level 1//en//2.0",
"-//ietf//dtd html level 2//en",
"-//ietf//dtd html level 2//en//2.0",
"-//ietf//dtd html level 3//en",
"-//ietf//dtd html level 3//en//3.0",
"-//ietf//dtd html strict level 0//en",
"-//ietf//dtd html strict level 0//en//2.0",
"-//ietf//dtd html strict level 1//en",
"-//ietf//dtd html strict level 1//en//2.0",
"-//ietf//dtd html strict level 2//en",
"-//ietf//dtd html strict level 2//en//2.0",
"-//ietf//dtd html strict level 3//en",
"-//ietf//dtd html strict level 3//en//3.0",
"-//ietf//dtd html strict//en",
"-//ietf//dtd html strict//en//2.0",
"-//ietf//dtd html strict//en//3.0",
"-//ietf//dtd html//en",
"-//ietf//dtd html//en//2.0",
"-//ietf//dtd html//en//3.0",
"-//metrius//dtd metrius presentational//en",
"-//microsoft//dtd internet explorer 2.0 html strict//en",
"-//microsoft//dtd internet explorer 2.0 html//en",
"-//microsoft//dtd internet explorer 2.0 tables//en",
"-//microsoft//dtd internet explorer 3.0 html strict//en",
"-//microsoft//dtd internet explorer 3.0 html//en",
"-//microsoft//dtd internet explorer 3.0 tables//en",
"-//netscape comm. corp.//dtd html//en",
"-//netscape comm. corp.//dtd strict html//en",
"-//o'reilly and associates//dtd html 2.0//en",
"-//o'reilly and associates//dtd html extended 1.0//en",
"-//spyglass//dtd html 2.0 extended//en",
"-//sq//dtd html 2.0 hotmetal + extensions//en",
"-//sun microsystems corp.//dtd hotjava html//en",
"-//sun microsystems corp.//dtd hotjava strict html//en",
"-//w3c//dtd html 3 1995-03-24//en",
"-//w3c//dtd html 3.2 draft//en",
"-//w3c//dtd html 3.2 final//en",
"-//w3c//dtd html 3.2//en",
"-//w3c//dtd html 3.2s draft//en",
"-//w3c//dtd html 4.0 frameset//en",
"-//w3c//dtd html 4.0 transitional//en",
"-//w3c//dtd html experimental 19960712//en",
"-//w3c//dtd html experimental 970421//en",
"-//w3c//dtd w3 html//en",
"-//w3o//dtd w3 html 3.0//en",
"-//w3o//dtd w3 html 3.0//en//",
"-//w3o//dtd w3 html strict 3.0//en//",
"-//webtechs//dtd mozilla html 2.0//en",
"-//webtechs//dtd mozilla html//en",
"-/w3c/dtd html 4.0 transitional/en",
"html"].include?(publicId) or
(systemId == nil and
["-//w3c//dtd html 4.01 frameset//EN",
"-//w3c//dtd html 4.01 transitional//EN"].include?(publicId)) or
(systemId == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")
#XXX quirks mode
end
end
@parser.phase = @parser.phases[:rootElement]
end
def processSpaceCharacters(data)
end
def processCharacters(data)
parse_error("expected-doctype-but-got-chars")
@parser.phase = @parser.phases[:rootElement]
@parser.phase.processCharacters(data)
end
def processStartTag(name, attributes)
parse_error("expected-doctype-but-got-start-tag", {"name" => name})
@parser.phase = @parser.phases[:rootElement]
@parser.phase.processStartTag(name, attributes)
end
def processEndTag(name)
parse_error("expected-doctype-but-got-end-tag", {"name" => name})
@parser.phase = @parser.phases[:rootElement]
@parser.phase.processEndTag(name)
end
end
end

View file

@ -0,0 +1,154 @@
module HTML5
# Base class for helper objects that implement each phase of processing.
#
# Handler methods should be in the following order (they can be omitted):
#
# * EOF
# * Comment
# * Doctype
# * SpaceCharacters
# * Characters
# * StartTag
# - startTag* methods
# * EndTag
# - endTag* methods
#
class Phase
extend Forwardable
def_delegators :@parser, :parse_error
# The following example call:
#
# tag_handlers('startTag', 'html', %w( base link meta ), %w( li dt dd ) => 'ListItem')
#
# ...would return a hash equal to this:
#
# { 'html' => 'startTagHtml',
# 'base' => 'startTagBaseLinkMeta',
# 'link' => 'startTagBaseLinkMeta',
# 'meta' => 'startTagBaseLinkMeta',
# 'li' => 'startTagListItem',
# 'dt' => 'startTagListItem',
# 'dd' => 'startTagListItem' }
#
def self.tag_handlers(prefix, *tags)
mapping = {}
if tags.last.is_a?(Hash)
tags.pop.each do |names, handler_method_suffix|
handler_method = prefix + handler_method_suffix
Array(names).each {|name| mapping[name] = handler_method }
end
end
tags.each do |names|
names = Array(names)
handler_method = prefix + names.map {|name| name.capitalize }.join
names.each {|name| mapping[name] = handler_method }
end
mapping
end
def self.start_tag_handlers
@start_tag_handlers ||= Hash.new('startTagOther')
end
# Declare what start tags this Phase handles. Can be called more than once.
#
# Example usage:
#
# handle_start 'html'
# # html start tags will be handled by a method named 'startTagHtml'
#
# handle_start %( base link meta )
# # base, link and meta start tags will be handled by a method named 'startTagBaseLinkMeta'
#
# handle_start %( li dt dd ) => 'ListItem'
# # li, dt, and dd start tags will be handled by a method named 'startTagListItem'
#
def self.handle_start(*tags)
start_tag_handlers.update tag_handlers('startTag', *tags)
end
def self.end_tag_handlers
@end_tag_handlers ||= Hash.new('endTagOther')
end
# Declare what end tags this Phase handles. Behaves like handle_start.
#
def self.handle_end(*tags)
end_tag_handlers.update tag_handlers('endTag', *tags)
end
def initialize(parser, tree)
@parser, @tree = parser, tree
end
def process_eof
@tree.generateImpliedEndTags
if @tree.open_elements.length > 2
parse_error("expected-closing-tag-but-got-eof")
elsif @tree.open_elements.length == 2 and @tree.open_elements[1].name != 'body'
# This happens for framesets or something?
parse_error("expected-closing-tag-but-got-eof")
elsif @parser.inner_html and @tree.open_elements.length > 1
# XXX This is not what the specification says. Not sure what to do here.
parse_error("eof-in-innerhtml")
end
# Betting ends.
end
def processComment(data)
# For most phases the following is correct. Where it's not it will be
# overridden.
@tree.insert_comment(data, @tree.open_elements.last)
end
def processDoctype(name, publicId, systemId, correct)
parse_error("unexpected-doctype")
end
def processSpaceCharacters(data)
@tree.insertText(data)
end
def processStartTag(name, attributes)
send self.class.start_tag_handlers[name], name, attributes
end
def startTagHtml(name, attributes)
if @parser.first_start_tag == false and name == 'html'
parse_error("non-html-root")
end
# XXX Need a check here to see if the first start tag token emitted is
# this token... If it's not, invoke parse_error.
attributes.each do |attr, value|
unless @tree.open_elements.first.attributes.has_key?(attr)
@tree.open_elements.first.attributes[attr] = value
end
end
@parser.first_start_tag = false
end
def processEndTag(name)
send self.class.end_tag_handlers[name], name
end
def assert(value)
throw AssertionError.new unless value
end
def in_scope?(*args)
@tree.elementInScope(*args)
end
def remove_open_elements_until(name=nil)
finished = false
until finished || @tree.open_elements.length == 0
element = @tree.open_elements.pop
finished = name.nil? ? yield(element) : element.name == name
end
return element
end
end
end

View file

@ -0,0 +1,41 @@
require 'html5/html5parser/phase'
module HTML5
class RootElementPhase < Phase
def process_eof
insert_html_element
@parser.phase.process_eof
end
def processComment(data)
@tree.insert_comment(data, @tree.document)
end
def processSpaceCharacters(data)
end
def processCharacters(data)
insert_html_element
@parser.phase.processCharacters(data)
end
def processStartTag(name, attributes)
@parser.first_start_tag = true if name == 'html'
insert_html_element
@parser.phase.processStartTag(name, attributes)
end
def processEndTag(name)
insert_html_element
@parser.phase.processEndTag(name)
end
def insert_html_element
element = @tree.createElement('html', {})
@tree.open_elements << element
@tree.document.appendChild(element)
@parser.phase = @parser.phases[:beforeHead]
end
end
end

View file

@ -0,0 +1,35 @@
require 'html5/html5parser/phase'
module HTML5
class TrailingEndPhase < Phase
def process_eof
end
def processComment(data)
@tree.insert_comment(data, @tree.document)
end
def processSpaceCharacters(data)
@parser.last_phase.processSpaceCharacters(data)
end
def processCharacters(data)
parse_error("expected-eof-but-got-char")
@parser.phase = @parser.last_phase
@parser.phase.processCharacters(data)
end
def processStartTag(name, attributes)
parse_error("expected-eof-but-got-start-tag", {"name" => name})
@parser.phase = @parser.last_phase
@parser.phase.processStartTag(name, attributes)
end
def processEndTag(name)
parse_error("expected-eof-but-got-end-tag", {"name" => name})
@parser.phase = @parser.last_phase
@parser.phase.processEndTag(name)
end
end
end