require 'html5/html5parser/phase' module HTML5 class InBodyPhase < Phase # http://www.whatwg.org/specs/web-apps/current-work/#in-body handle_start 'html' handle_start %w(base link meta script style) => 'ProcessInHead' handle_start 'title' handle_start 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image' handle_start 'input', 'textarea', 'select', 'isindex', %w(marquee object) handle_start %w(li dd dt) => 'ListItem' handle_start %w(address blockquote center dir div dl fieldset listing menu ol p pre ul) => 'CloseP' handle_start %w(b big em font i s small strike strong tt u) => 'Formatting' handle_start 'nobr' handle_start %w(area basefont bgsound br embed img param spacer wbr) => 'VoidFormatting' handle_start %w(iframe noembed noframes noscript) => 'Cdata', HEADING_ELEMENTS => 'Heading' handle_start %w(caption col colgroup frame frameset head option optgroup tbody td tfoot th thead tr) => 'Misplaced' handle_start %w(event-source section nav article aside header footer datagrid command) => 'New' handle_end 'p', 'body', 'html', 'form', %w(button marquee object), %w(dd dt li) => 'ListItem' handle_end %w(address blockquote center div dl fieldset listing menu ol pre ul) => 'Block' handle_end HEADING_ELEMENTS => 'Heading' handle_end %w(a b big em font i nobr s small strike strong tt u) => 'Formatting' handle_end %w(head frameset select optgroup option table caption colgroup col thead tfoot tbody tr td th) => 'Misplaced' handle_end 'br' handle_end %w(area basefont bgsound embed hr image img input isindex param spacer wbr frame) => 'None' handle_end %w(noframes noscript noembed textarea xmp iframe ) => 'CdataTextAreaXmp' handle_end %w(event-source section nav article aside header footer datagrid command) => 'New' def initialize(parser, tree) super(parser, tree) # for special handling of whitespace in
class << self alias processSpaceCharactersNonPre processSpaceCharacters end end def processSpaceCharactersDropNewline(data) # #Sometimes (start ofblocks) we want to drop leading newlines class << self remove_method :processSpaceCharacters rescue nil alias processSpaceCharacters processSpaceCharactersNonPre end if (data.length > 0 and data[0] == ?\n && %w[pre textarea].include?(@tree.open_elements.last.name) && !@tree.open_elements.last.hasContent) data = data[1..-1] end if data.length > 0 @tree.reconstructActiveFormattingElements @tree.insertText(data) end end def processSpaceCharacters(data) @tree.reconstructActiveFormattingElements() @tree.insertText(data) end def processCharacters(data) # XXX The specification says to do this for every character at the # moment, but apparently that doesn't match the real world so we don't # do it for space characters. @tree.reconstructActiveFormattingElements @tree.insertText(data) end def startTagProcessInHead(name, attributes) @parser.phases[:inHead].processStartTag(name, attributes) end def startTagTitle(name, attributes) parse_error("unexpected-start-tag-out-of-my-head", {"name" => name}) @parser.phases[:inHead].processStartTag(name, attributes) end def startTagBody(name, attributes) parse_error("unexpected-start-tag", {"name" => "body"}) if @tree.open_elements.length == 1 || @tree.open_elements[1].name != 'body' assert @parser.inner_html else attributes.each do |attr, value| unless @tree.open_elements[1].attributes.has_key?(attr) @tree.open_elements[1].attributes[attr] = value end end end end def startTagCloseP(name, attributes) endTagP('p') if in_scope?('p') @tree.insert_element(name, attributes) if name == 'pre' class << self remove_method :processSpaceCharacters rescue nil alias processSpaceCharacters processSpaceCharactersDropNewline end end end def startTagForm(name, attributes) if @tree.formPointer parse_error("unexpected-start-tag", {"name" => name}) else endTagP('p') if in_scope?('p') @tree.insert_element(name, attributes) @tree.formPointer = @tree.open_elements.last end end def startTagListItem(name, attributes) endTagP('p') if in_scope?('p') stopNames = {'li' => ['li'], 'dd' => ['dd', 'dt'], 'dt' => ['dd', 'dt']} stopName = stopNames[name] @tree.open_elements.reverse.each_with_index do |node, i| if stopName.include?(node.name) poppedNodes = (0..i).collect { @tree.open_elements.pop } if i >= 1 parse_error( i == 1 ? "missing-end-tag" : "missing-end-tags", {"name" => poppedNodes[0..-1].collect{|n| n.name}.join(", ")}) end break end # Phrasing elements are all non special, non scoping, non # formatting elements break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) && !%w[address div].include?(node.name)) end # Always insert an
tags into account here. We shouldn't imply #
but we should not throw a parse error either. Specification is # likely to be updated. unless @tree.open_elements[1] && @tree.open_elements[1].name == 'body' # inner_html case parse_error "unexpected-end-tag", {:name => 'body'} return end unless @tree.open_elements.last.name == 'body' parse_error("expected-one-end-tag-but-got-another", {"expectedName" => "body", "gotName" => @tree.open_elements.last.name}) end @parser.phase = @parser.phases[:afterBody] end def endTagHtml(name) endTagBody(name) @parser.phase.processEndTag(name) unless @parser.inner_html end def endTagBlock(name) @tree.generateImpliedEndTags if in_scope?(name) unless @tree.open_elements.last.name == name parse_error("end-tag-too-early", {"name" => name}) end if in_scope?(name) remove_open_elements_until(name) end end def endTagForm(name) if in_scope?(name) @tree.generateImpliedEndTags end if @tree.open_elements.last.name != name parse_error("end-tag-too-early-ignored", {"name" => "form"}) else @tree.open_elements.pop end @tree.formPointer = nil end def endTagListItem(name) # AT Could merge this with the Block case @tree.generateImpliedEndTags(name) if in_scope?(name) unless @tree.open_elements.last.name == name parse_error("end-tag-too-early", {"name" => name}) end remove_open_elements_until(name) if in_scope?(name) end def endTagHeading(name) HEADING_ELEMENTS.each do |element| if in_scope?(element) @tree.generateImpliedEndTags break end end unless @tree.open_elements.last.name == name parse_error("end-tag-too-early", {"name" => name}) end HEADING_ELEMENTS.each do |element| if in_scope?(element) remove_open_elements_until {|element| HEADING_ELEMENTS.include?(element.name)} break end end end # The much-feared adoption agency algorithm def endTagFormatting(name) # http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency # XXX Better parse_error messages appreciated. while true # Step 1 paragraph 1 afeElement = @tree.elementInActiveFormattingElements(name) if !afeElement or (@tree.open_elements.include?(afeElement) && !in_scope?(afeElement.name)) parse_error("adoption-agency-1.1", {"name" => name}) return # Step 1 paragraph 2 elsif not @tree.open_elements.include?(afeElement) parse_error("adoption-agency-1.2", {"name" => name}) @tree.activeFormattingElements.delete(afeElement) return end # Step 1 paragraph 3 if afeElement != @tree.open_elements.last parse_error("adoption-agency-1.3", {"name" => name}) end # Step 2 # Start of the adoption agency algorithm proper afeIndex = @tree.open_elements.index(afeElement) furthestBlock = nil @tree.open_elements[afeIndex..-1].each do |element| if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(element.name) furthestBlock = element break end end # Step 3 if furthestBlock.nil? element = remove_open_elements_until {|element| element == afeElement } @tree.activeFormattingElements.delete(element) return end commonAncestor = @tree.open_elements[afeIndex - 1] # Step 5 furthestBlock.parent.removeChild(furthestBlock) if furthestBlock.parent # Step 6 # The bookmark is supposed to help us identify where to reinsert # nodes in step 12. We have to ensure that we reinsert nodes after # the node before the active formatting element. Note the bookmark # can move in step 7.4 bookmark = @tree.activeFormattingElements.index(afeElement) # Step 7 lastNode = node = furthestBlock while true # AT replace this with a function and recursion? # Node is element before node in open elements node = @tree.open_elements[@tree.open_elements.index(node) - 1] until @tree.activeFormattingElements.include?(node) tmpNode = node node = @tree.open_elements[@tree.open_elements.index(node) - 1] @tree.open_elements.delete(tmpNode) end # Step 7.3 break if node == afeElement # Step 7.4 if lastNode == furthestBlock # XXX should this be index(node) or index(node)+1 # Anne: I think +1 is ok. Given x = [2,3,4,5] # x.index(3) gives 1 and then x[1 +1] gives 4... bookmark = @tree.activeFormattingElements.index(node) + 1 end # Step 7.5 cite = node.parent if node.hasContent clone = node.cloneNode # Replace node with clone @tree.activeFormattingElements[@tree.activeFormattingElements.index(node)] = clone @tree.open_elements[@tree.open_elements.index(node)] = clone node = clone end # Step 7.6 # Remove lastNode from its parents, if any lastNode.parent.removeChild(lastNode) if lastNode.parent node.appendChild(lastNode) # Step 7.7 lastNode = node # End of inner loop end # Step 8 lastNode.parent.removeChild(lastNode) if lastNode.parent commonAncestor.appendChild(lastNode) # Step 9 clone = afeElement.cloneNode # Step 10 furthestBlock.reparentChildren(clone) # Step 11 furthestBlock.appendChild(clone) # Step 12 @tree.activeFormattingElements.delete(afeElement) @tree.activeFormattingElements.insert([bookmark,@tree.activeFormattingElements.length].min, clone) # Step 13 @tree.open_elements.delete(afeElement) @tree.open_elements.insert(@tree.open_elements.index(furthestBlock) + 1, clone) end end def endTagButtonMarqueeObject(name) @tree.generateImpliedEndTags if in_scope?(name) unless @tree.open_elements.last.name == name parse_error("end-tag-too-early", {"name" => name}) end if in_scope?(name) remove_open_elements_until(name) @tree.clearActiveFormattingElements end end def endTagMisplaced(name) # This handles elements with end tags in other insertion modes. parse_error("unexpected-end-tag", {"name" => name}) end def endTagBr(name) parse_error("unexpected-end-tag-treated-as", {"originalName" => "br", "newName" => "br element"}) @tree.reconstructActiveFormattingElements @tree.insert_element(name, {}) @tree.open_elements.pop() end def endTagNone(name) # This handles elements with no end tag. parse_error("no-end-tag", {"name" => name}) end def endTagCdataTextAreaXmp(name) if @tree.open_elements.last.name == name @tree.open_elements.pop else parse_error("unexpected-end-tag", {"name" => name}) end end def endTagNew(name) # New HTML5 elements, "event-source", "section", "nav", # "article", "aside", "header", "footer", "datagrid", "command" # STDERR.puts "Warning: Undefined behaviour for end tag #{name}" endTagOther(name) #raise NotImplementedError end def endTagOther(name) # XXX This logic should be moved into the treebuilder @tree.open_elements.reverse.each do |node| if node.name == name @tree.generateImpliedEndTags unless @tree.open_elements.last.name == name parse_error("unexpected-end-tag", {"name" => name}) end remove_open_elements_until {|element| element == node } break else if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) parse_error("unexpected-end-tag", {"name" => name}) break end end end end protected def addFormattingElement(name, attributes) @tree.insert_element(name, attributes) @tree.activeFormattingElements.push(@tree.open_elements.last) end end end