Merged with latest trunk.
This commit is contained in:
commit
aadfb55342
|
@ -8,7 +8,19 @@ OPTIONS = {
|
||||||
:ip => "0.0.0.0",
|
:ip => "0.0.0.0",
|
||||||
:environment => "production",
|
:environment => "production",
|
||||||
:server_root => File.expand_path(File.dirname(__FILE__) + "/../public/"),
|
:server_root => File.expand_path(File.dirname(__FILE__) + "/../public/"),
|
||||||
:server_type => WEBrick::SimpleServer
|
:server_type => WEBrick::SimpleServer,
|
||||||
|
:mime_types => WEBrick::HTTPUtils::DefaultMimeTypes.merge({
|
||||||
|
'avi' => 'video/x-msvideo',
|
||||||
|
'gz' => 'application/x-gzip',
|
||||||
|
'js' => 'application/x-javascript',
|
||||||
|
'nb' => 'application/mathematica',
|
||||||
|
'pdf' => 'application/pdf',
|
||||||
|
'svg' => 'application/svg+xml',
|
||||||
|
'tar' => 'application/x-tar',
|
||||||
|
'tex' => 'application/x-tex',
|
||||||
|
'xml' => 'application/xml',
|
||||||
|
'xslt' => 'application/xslt+xml'
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
ARGV.options do |opts|
|
ARGV.options do |opts|
|
||||||
|
|
1808
vendor/plugins/HTML5lib/lib/html5lib/html5parser.rb
vendored
1808
vendor/plugins/HTML5lib/lib/html5lib/html5parser.rb
vendored
File diff suppressed because it is too large
Load diff
46
vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_body_phase.rb
vendored
Normal file
46
vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_body_phase.rb
vendored
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class AfterBodyPhase < Phase
|
||||||
|
|
||||||
|
handle_end 'html'
|
||||||
|
|
||||||
|
def processComment(data)
|
||||||
|
# This is needed because data is to be appended to the <html> element
|
||||||
|
# here and not to whatever is currently open.
|
||||||
|
@tree.insertComment(data, @tree.openElements[0])
|
||||||
|
end
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
@parser.parseError(_('Unexpected non-space characters in the after body phase.'))
|
||||||
|
@parser.phase = @parser.phases[:inBody]
|
||||||
|
@parser.phase.processCharacters(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processStartTag(name, attributes)
|
||||||
|
@parser.parseError(_("Unexpected start tag token (#{name}) in the after body phase."))
|
||||||
|
@parser.phase = @parser.phases[:inBody]
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagHtml(name)
|
||||||
|
if @parser.innerHTML
|
||||||
|
@parser.parseError
|
||||||
|
else
|
||||||
|
# XXX: This may need to be done, not sure
|
||||||
|
# Don't set lastPhase to the current phase but to the inBody phase
|
||||||
|
# instead. No need for extra parse errors if there's something after </html>.
|
||||||
|
# Try "<!doctype html>X</html>X" for instance.
|
||||||
|
@parser.lastPhase = @parser.phase
|
||||||
|
@parser.phase = @parser.phases[:trailingEnd]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOther(name)
|
||||||
|
@parser.parseError(_("Unexpected end tag token (#{name}) in the after body phase."))
|
||||||
|
@parser.phase = @parser.phases[:inBody]
|
||||||
|
@parser.phase.processEndTag(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
34
vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_frameset_phase.rb
vendored
Normal file
34
vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_frameset_phase.rb
vendored
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class AfterFramesetPhase < Phase
|
||||||
|
|
||||||
|
# http://www.whatwg.org/specs/web-apps/current-work/#after3
|
||||||
|
|
||||||
|
handle_start 'html', 'noframes'
|
||||||
|
|
||||||
|
handle_end 'html'
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
@parser.parseError(_('Unexpected non-space characters in the after frameset phase. Ignored.'))
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagNoframes(name, attributes)
|
||||||
|
@parser.phases[:inBody].processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOther(name, attributes)
|
||||||
|
@parser.parseError(_("Unexpected start tag (#{name}) in the after frameset phase. Ignored."))
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagHtml(name)
|
||||||
|
@parser.lastPhase = @parser.phase
|
||||||
|
@parser.phase = @parser.phases[:trailingEnd]
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOther(name)
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}) in the after frameset phase. Ignored."))
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
50
vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_head_phase.rb
vendored
Normal file
50
vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_head_phase.rb
vendored
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class AfterHeadPhase < Phase
|
||||||
|
|
||||||
|
handle_start 'html', 'body', 'frameset', %w( base link meta script style title ) => 'FromHead'
|
||||||
|
|
||||||
|
def processEOF
|
||||||
|
anythingElse
|
||||||
|
@parser.phase.processEOF
|
||||||
|
end
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
anythingElse
|
||||||
|
@parser.phase.processCharacters(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagBody(name, attributes)
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@parser.phase = @parser.phases[:inBody]
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagFrameset(name, attributes)
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@parser.phase = @parser.phases[:inFrameset]
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagFromHead(name, attributes)
|
||||||
|
@parser.parseError(_("Unexpected start tag (#{name}) that can be in head. Moved."))
|
||||||
|
@parser.phase = @parser.phases[:inHead]
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOther(name, attributes)
|
||||||
|
anythingElse
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processEndTag(name)
|
||||||
|
anythingElse
|
||||||
|
@parser.phase.processEndTag(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
def anythingElse
|
||||||
|
@tree.insertElement('body', {})
|
||||||
|
@parser.phase = @parser.phases[:inBody]
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
41
vendor/plugins/HTML5lib/lib/html5lib/html5parser/before_head_phase.rb
vendored
Normal file
41
vendor/plugins/HTML5lib/lib/html5lib/html5parser/before_head_phase.rb
vendored
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class BeforeHeadPhase < Phase
|
||||||
|
|
||||||
|
handle_start 'html', 'head'
|
||||||
|
|
||||||
|
handle_end 'html'
|
||||||
|
|
||||||
|
def processEOF
|
||||||
|
startTagHead('head', {})
|
||||||
|
@parser.phase.processEOF
|
||||||
|
end
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
startTagHead('head', {})
|
||||||
|
@parser.phase.processCharacters(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagHead(name, attributes)
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@tree.headPointer = @tree.openElements[-1]
|
||||||
|
@parser.phase = @parser.phases[:inHead]
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOther(name, attributes)
|
||||||
|
startTagHead('head', {})
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagHtml(name)
|
||||||
|
startTagHead('head', {})
|
||||||
|
@parser.phase.processEndTag(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOther(name)
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}) after the (implied) root element."))
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
548
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_body_phase.rb
vendored
Normal file
548
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_body_phase.rb
vendored
Normal file
|
@ -0,0 +1,548 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class InBodyPhase < Phase
|
||||||
|
|
||||||
|
# http://www.whatwg.org/specs/web-apps/current-work/#in-body
|
||||||
|
|
||||||
|
handle_start 'html', 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image'
|
||||||
|
|
||||||
|
handle_start 'input', 'textarea', 'select', 'isindex', %w( script style ), %w( marquee object )
|
||||||
|
|
||||||
|
handle_start %w( li dd dt ) => 'ListItem', %w( base link meta title ) => 'FromHead'
|
||||||
|
|
||||||
|
handle_start %w( address blockquote center dir div dl fieldset listing menu ol p pre ul ) => 'CloseP'
|
||||||
|
|
||||||
|
handle_start %w( b big em font i nobr s small strike strong tt u ) => 'Formatting'
|
||||||
|
|
||||||
|
handle_start %w( area basefont bgsound br embed img param spacer wbr ) => 'VoidFormatting'
|
||||||
|
|
||||||
|
handle_start %w( iframe noembed noframes noscript ) => 'Cdata', HEADING_ELEMENTS => 'Heading'
|
||||||
|
|
||||||
|
handle_start %w( caption col colgroup frame frameset head option optgroup tbody td tfoot th thead tr ) => 'Misplaced'
|
||||||
|
|
||||||
|
handle_start %w( event-source section nav article aside header footer datagrid command ) => 'New'
|
||||||
|
|
||||||
|
handle_end 'p', 'body', 'html', 'form', %w( button marquee object ), %w( dd dt li ) => 'ListItem'
|
||||||
|
|
||||||
|
handle_end %w( address blockquote center div dl fieldset listing menu ol pre ul ) => 'Block'
|
||||||
|
|
||||||
|
handle_end %w( a b big em font i nobr s small strike strong tt u ) => 'Formatting'
|
||||||
|
|
||||||
|
handle_end %w( head frameset select optgroup option table caption colgroup col thead tfoot tbody tr td th ) => 'Misplaced'
|
||||||
|
|
||||||
|
handle_end %w( area basefont bgsound br embed hr image img input isindex param spacer wbr frame ) => 'None'
|
||||||
|
|
||||||
|
handle_end %w( noframes noscript noembed textarea xmp iframe ) => 'CdataTextAreaXmp'
|
||||||
|
|
||||||
|
handle_end %w( event-source section nav article aside header footer datagrid command ) => 'New'
|
||||||
|
|
||||||
|
def initialize(parser, tree)
|
||||||
|
super(parser, tree)
|
||||||
|
|
||||||
|
# for special handling of whitespace in <pre>
|
||||||
|
@processSpaceCharactersPre = false
|
||||||
|
end
|
||||||
|
|
||||||
|
def processSpaceCharactersPre(data)
|
||||||
|
#Sometimes (start of <pre> blocks) we want to drop leading newlines
|
||||||
|
@processSpaceCharactersPre = false
|
||||||
|
if (data.length > 0 and data[0] == ?\n and
|
||||||
|
@tree.openElements[-1].name == 'pre' and
|
||||||
|
not @tree.openElements[-1].hasContent)
|
||||||
|
data = data[1..-1]
|
||||||
|
end
|
||||||
|
@tree.insertText(data) if data.length > 0
|
||||||
|
end
|
||||||
|
|
||||||
|
def processSpaceCharacters(data)
|
||||||
|
if @processSpaceCharactersPre
|
||||||
|
processSpaceCharactersPre(data)
|
||||||
|
else
|
||||||
|
super(data)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
# XXX The specification says to do this for every character at the
|
||||||
|
# moment, but apparently that doesn't match the real world so we don't
|
||||||
|
# do it for space characters.
|
||||||
|
@tree.reconstructActiveFormattingElements
|
||||||
|
@tree.insertText(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagScriptStyle(name, attributes)
|
||||||
|
@parser.phases[:inHead].processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagFromHead(name, attributes)
|
||||||
|
@parser.parseError(_("Unexpected start tag (#{name}) that belongs in the head. Moved."))
|
||||||
|
@parser.phases[:inHead].processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagBody(name, attributes)
|
||||||
|
@parser.parseError(_('Unexpected start tag (body).'))
|
||||||
|
|
||||||
|
if (@tree.openElements.length == 1 or
|
||||||
|
@tree.openElements[1].name != 'body')
|
||||||
|
assert @parser.innerHTML
|
||||||
|
else
|
||||||
|
attributes.each do |attr, value|
|
||||||
|
unless @tree.openElements[1].attributes.has_key?(attr)
|
||||||
|
@tree.openElements[1].attributes[attr] = value
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagCloseP(name, attributes)
|
||||||
|
endTagP('p') if in_scope?('p')
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@processSpaceCharactersPre = true if name == 'pre'
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagForm(name, attributes)
|
||||||
|
if @tree.formPointer
|
||||||
|
@parser.parseError('Unexpected start tag (form). Ignored.')
|
||||||
|
else
|
||||||
|
endTagP('p') if in_scope?('p')
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@tree.formPointer = @tree.openElements[-1]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagListItem(name, attributes)
|
||||||
|
endTagP('p') if in_scope?('p')
|
||||||
|
stopNames = {'li' => ['li'], 'dd' => ['dd', 'dt'], 'dt' => ['dd', 'dt']}
|
||||||
|
stopName = stopNames[name]
|
||||||
|
|
||||||
|
@tree.openElements.reverse.each_with_index do |node, i|
|
||||||
|
if stopName.include?(node.name)
|
||||||
|
(i + 1).times { @tree.openElements.pop }
|
||||||
|
break
|
||||||
|
end
|
||||||
|
|
||||||
|
# Phrasing elements are all non special, non scoping, non
|
||||||
|
# formatting elements
|
||||||
|
break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) and
|
||||||
|
not ['address', 'div'].include?(node.name))
|
||||||
|
end
|
||||||
|
|
||||||
|
# Always insert an <li> element.
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagPlaintext(name, attributes)
|
||||||
|
endTagP('p') if in_scope?('p')
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@parser.tokenizer.contentModelFlag = :PLAINTEXT
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagHeading(name, attributes)
|
||||||
|
endTagP('p') if in_scope?('p')
|
||||||
|
HEADING_ELEMENTS.each do |element|
|
||||||
|
if in_scope?(element)
|
||||||
|
@parser.parseError(_("Unexpected start tag (#{name})."))
|
||||||
|
|
||||||
|
remove_open_elements_until { |element| HEADING_ELEMENTS.include?(element.name) }
|
||||||
|
|
||||||
|
break
|
||||||
|
end
|
||||||
|
end
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagA(name, attributes)
|
||||||
|
if afeAElement = @tree.elementInActiveFormattingElements('a')
|
||||||
|
@parser.parseError(_('Unexpected start tag (a) implies end tag (a).'))
|
||||||
|
endTagFormatting('a')
|
||||||
|
@tree.openElements.delete(afeAElement) if @tree.openElements.include?(afeAElement)
|
||||||
|
@tree.activeFormattingElements.delete(afeAElement) if @tree.activeFormattingElements.include?(afeAElement)
|
||||||
|
end
|
||||||
|
@tree.reconstructActiveFormattingElements
|
||||||
|
addFormattingElement(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagFormatting(name, attributes)
|
||||||
|
@tree.reconstructActiveFormattingElements
|
||||||
|
addFormattingElement(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagButton(name, attributes)
|
||||||
|
if in_scope?('button')
|
||||||
|
@parser.parseError(_('Unexpected start tag (button) implied end tag (button).'))
|
||||||
|
processEndTag('button')
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
else
|
||||||
|
@tree.reconstructActiveFormattingElements
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@tree.activeFormattingElements.push(Marker)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagMarqueeObject(name, attributes)
|
||||||
|
@tree.reconstructActiveFormattingElements
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@tree.activeFormattingElements.push(Marker)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagXmp(name, attributes)
|
||||||
|
@tree.reconstructActiveFormattingElements
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@parser.tokenizer.contentModelFlag = :CDATA
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagTable(name, attributes)
|
||||||
|
processEndTag('p') if in_scope?('p')
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@parser.phase = @parser.phases[:inTable]
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagVoidFormatting(name, attributes)
|
||||||
|
@tree.reconstructActiveFormattingElements
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@tree.openElements.pop
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagHr(name, attributes)
|
||||||
|
endTagP('p') if in_scope?('p')
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@tree.openElements.pop
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagImage(name, attributes)
|
||||||
|
# No really...
|
||||||
|
@parser.parseError(_('Unexpected start tag (image). Treated as img.'))
|
||||||
|
processStartTag('img', attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagInput(name, attributes)
|
||||||
|
@tree.reconstructActiveFormattingElements
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
if @tree.formPointer
|
||||||
|
# XXX Not exactly sure what to do here
|
||||||
|
# @tree.openElements[-1].form = @tree.formPointer
|
||||||
|
end
|
||||||
|
@tree.openElements.pop
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagIsindex(name, attributes)
|
||||||
|
@parser.parseError("Unexpected start tag isindex. Don't use it!")
|
||||||
|
return if @tree.formPointer
|
||||||
|
processStartTag('form', {})
|
||||||
|
processStartTag('hr', {})
|
||||||
|
processStartTag('p', {})
|
||||||
|
processStartTag('label', {})
|
||||||
|
# XXX Localization ...
|
||||||
|
processCharacters('This is a searchable index. Insert your search keywords here:')
|
||||||
|
attributes['name'] = 'isindex'
|
||||||
|
attrs = attributes.to_a
|
||||||
|
processStartTag('input', attributes)
|
||||||
|
processEndTag('label')
|
||||||
|
processEndTag('p')
|
||||||
|
processStartTag('hr', {})
|
||||||
|
processEndTag('form')
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagTextarea(name, attributes)
|
||||||
|
# XXX Form element pointer checking here as well...
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@parser.tokenizer.contentModelFlag = :RCDATA
|
||||||
|
end
|
||||||
|
|
||||||
|
# iframe, noembed noframes, noscript(if scripting enabled)
|
||||||
|
def startTagCdata(name, attributes)
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@parser.tokenizer.contentModelFlag = :CDATA
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagSelect(name, attributes)
|
||||||
|
@tree.reconstructActiveFormattingElements
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@parser.phase = @parser.phases[:inSelect]
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagMisplaced(name, attributes)
|
||||||
|
# Elements that should be children of other elements that have a
|
||||||
|
# different insertion mode; here they are ignored
|
||||||
|
# "caption", "col", "colgroup", "frame", "frameset", "head",
|
||||||
|
# "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
|
||||||
|
# "tr", "noscript"
|
||||||
|
@parser.parseError(_("Unexpected start tag (#{name}). Ignored."))
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagNew(name, attributes)
|
||||||
|
# New HTML5 elements, "event-source", "section", "nav",
|
||||||
|
# "article", "aside", "header", "footer", "datagrid", "command"
|
||||||
|
sys.stderr.write("Warning: Undefined behaviour for start tag #{name}")
|
||||||
|
startTagOther(name, attributes)
|
||||||
|
#raise NotImplementedError
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOther(name, attributes)
|
||||||
|
@tree.reconstructActiveFormattingElements
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagP(name)
|
||||||
|
@tree.generateImpliedEndTags('p') if in_scope?('p')
|
||||||
|
@parser.parseError('Unexpected end tag (p).') unless @tree.openElements[-1].name == 'p'
|
||||||
|
@tree.openElements.pop while in_scope?('p')
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagBody(name)
|
||||||
|
# XXX Need to take open <p> tags into account here. We shouldn't imply
|
||||||
|
# </p> but we should not throw a parse error either. Specification is
|
||||||
|
# likely to be updated.
|
||||||
|
unless @tree.openElements[1].name == 'body'
|
||||||
|
# innerHTML case
|
||||||
|
@parser.parseError
|
||||||
|
return
|
||||||
|
end
|
||||||
|
unless @tree.openElements[-1].name == 'body'
|
||||||
|
@parser.parseError(_("Unexpected end tag (body). Missing end tag (#{@tree.openElements[-1].name})."))
|
||||||
|
end
|
||||||
|
@parser.phase = @parser.phases[:afterBody]
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagHtml(name)
|
||||||
|
endTagBody(name)
|
||||||
|
@parser.phase.processEndTag(name) unless @parser.innerHTML
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagBlock(name)
|
||||||
|
#Put us back in the right whitespace handling mode
|
||||||
|
@processSpaceCharactersPre = false if name == 'pre'
|
||||||
|
|
||||||
|
@tree.generateImpliedEndTags if in_scope?(name)
|
||||||
|
|
||||||
|
unless @tree.openElements[-1].name == name
|
||||||
|
@parser.parseError(("End tag (#{name}) seen too early. Expected other end tag."))
|
||||||
|
end
|
||||||
|
|
||||||
|
if in_scope?(name)
|
||||||
|
remove_open_elements_until(name)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagForm(name)
|
||||||
|
endTagBlock(name)
|
||||||
|
@tree.formPointer = nil
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagListItem(name)
|
||||||
|
# AT Could merge this with the Block case
|
||||||
|
if in_scope?(name)
|
||||||
|
@tree.generateImpliedEndTags(name)
|
||||||
|
|
||||||
|
unless @tree.openElements[-1].name == name
|
||||||
|
@parser.parseError(("End tag (#{name}) seen too early. Expected other end tag."))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
remove_open_elements_until(name) if in_scope?(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagHeading(name)
|
||||||
|
HEADING_ELEMENTS.each do |element|
|
||||||
|
if in_scope?(element)
|
||||||
|
@tree.generateImpliedEndTags
|
||||||
|
break
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
unless @tree.openElements[-1].name == name
|
||||||
|
@parser.parseError(("Unexpected end tag (#{name}). Expected other end tag."))
|
||||||
|
end
|
||||||
|
|
||||||
|
HEADING_ELEMENTS.each do |element|
|
||||||
|
if in_scope?(element)
|
||||||
|
remove_open_elements_until { |element| HEADING_ELEMENTS.include?(element.name) }
|
||||||
|
break
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# The much-feared adoption agency algorithm
|
||||||
|
def endTagFormatting(name)
|
||||||
|
# http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
|
||||||
|
# XXX Better parseError messages appreciated.
|
||||||
|
while true
|
||||||
|
# Step 1 paragraph 1
|
||||||
|
afeElement = @tree.elementInActiveFormattingElements(name)
|
||||||
|
if not afeElement or (@tree.openElements.include?(afeElement) and not in_scope?(afeElement.name))
|
||||||
|
@parser.parseError(_("End tag (#{name}) violates step 1, paragraph 1 of the adoption agency algorithm."))
|
||||||
|
return
|
||||||
|
# Step 1 paragraph 2
|
||||||
|
elsif not @tree.openElements.include?(afeElement)
|
||||||
|
@parser.parseError(_("End tag (#{name}) violates step 1, paragraph 2 of the adoption agency algorithm."))
|
||||||
|
@tree.activeFormattingElements.delete(afeElement)
|
||||||
|
return
|
||||||
|
end
|
||||||
|
|
||||||
|
# Step 1 paragraph 3
|
||||||
|
if afeElement != @tree.openElements[-1]
|
||||||
|
@parser.parseError(_("End tag (#{name}) violates step 1, paragraph 3 of the adoption agency algorithm."))
|
||||||
|
end
|
||||||
|
|
||||||
|
# Step 2
|
||||||
|
# Start of the adoption agency algorithm proper
|
||||||
|
afeIndex = @tree.openElements.index(afeElement)
|
||||||
|
furthestBlock = nil
|
||||||
|
@tree.openElements[afeIndex..-1].each do |element|
|
||||||
|
if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(element.name)
|
||||||
|
furthestBlock = element
|
||||||
|
break
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Step 3
|
||||||
|
if furthestBlock.nil?
|
||||||
|
element = remove_open_elements_until { |element| element == afeElement }
|
||||||
|
@tree.activeFormattingElements.delete(element)
|
||||||
|
return
|
||||||
|
end
|
||||||
|
commonAncestor = @tree.openElements[afeIndex - 1]
|
||||||
|
|
||||||
|
# Step 5
|
||||||
|
furthestBlock.parent.removeChild(furthestBlock) if furthestBlock.parent
|
||||||
|
|
||||||
|
# Step 6
|
||||||
|
# The bookmark is supposed to help us identify where to reinsert
|
||||||
|
# nodes in step 12. We have to ensure that we reinsert nodes after
|
||||||
|
# the node before the active formatting element. Note the bookmark
|
||||||
|
# can move in step 7.4
|
||||||
|
bookmark = @tree.activeFormattingElements.index(afeElement)
|
||||||
|
|
||||||
|
# Step 7
|
||||||
|
lastNode = node = furthestBlock
|
||||||
|
while true
|
||||||
|
# AT replace this with a function and recursion?
|
||||||
|
# Node is element before node in open elements
|
||||||
|
node = @tree.openElements[@tree.openElements.index(node) - 1]
|
||||||
|
until @tree.activeFormattingElements.include?(node)
|
||||||
|
tmpNode = node
|
||||||
|
node = @tree.openElements[@tree.openElements.index(node) - 1]
|
||||||
|
@tree.openElements.delete(tmpNode)
|
||||||
|
end
|
||||||
|
# Step 7.3
|
||||||
|
break if node == afeElement
|
||||||
|
# Step 7.4
|
||||||
|
if lastNode == furthestBlock
|
||||||
|
# XXX should this be index(node) or index(node)+1
|
||||||
|
# Anne: I think +1 is ok. Given x = [2,3,4,5]
|
||||||
|
# x.index(3) gives 1 and then x[1 +1] gives 4...
|
||||||
|
bookmark = @tree.activeFormattingElements.index(node) + 1
|
||||||
|
end
|
||||||
|
# Step 7.5
|
||||||
|
cite = node.parent
|
||||||
|
if node.hasContent
|
||||||
|
clone = node.cloneNode
|
||||||
|
# Replace node with clone
|
||||||
|
@tree.activeFormattingElements[@tree.activeFormattingElements.index(node)] = clone
|
||||||
|
@tree.openElements[@tree.openElements.index(node)] = clone
|
||||||
|
node = clone
|
||||||
|
end
|
||||||
|
# Step 7.6
|
||||||
|
# Remove lastNode from its parents, if any
|
||||||
|
lastNode.parent.removeChild(lastNode) if lastNode.parent
|
||||||
|
node.appendChild(lastNode)
|
||||||
|
# Step 7.7
|
||||||
|
lastNode = node
|
||||||
|
# End of inner loop
|
||||||
|
end
|
||||||
|
|
||||||
|
# Step 8
|
||||||
|
lastNode.parent.removeChild(lastNode) if lastNode.parent
|
||||||
|
commonAncestor.appendChild(lastNode)
|
||||||
|
|
||||||
|
# Step 9
|
||||||
|
clone = afeElement.cloneNode
|
||||||
|
|
||||||
|
# Step 10
|
||||||
|
furthestBlock.reparentChildren(clone)
|
||||||
|
|
||||||
|
# Step 11
|
||||||
|
furthestBlock.appendChild(clone)
|
||||||
|
|
||||||
|
# Step 12
|
||||||
|
@tree.activeFormattingElements.delete(afeElement)
|
||||||
|
@tree.activeFormattingElements.insert([bookmark,@tree.activeFormattingElements.length].min, clone)
|
||||||
|
|
||||||
|
# Step 13
|
||||||
|
@tree.openElements.delete(afeElement)
|
||||||
|
@tree.openElements.insert(@tree.openElements.index(furthestBlock) + 1, clone)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagButtonMarqueeObject(name)
|
||||||
|
@tree.generateImpliedEndTags if in_scope?(name)
|
||||||
|
|
||||||
|
unless @tree.openElements[-1].name == name
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}). Expected other end tag first."))
|
||||||
|
end
|
||||||
|
|
||||||
|
if in_scope?(name)
|
||||||
|
remove_open_elements_until(name)
|
||||||
|
|
||||||
|
@tree.clearActiveFormattingElements
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagMisplaced(name)
|
||||||
|
# This handles elements with end tags in other insertion modes.
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagNone(name)
|
||||||
|
# This handles elements with no end tag.
|
||||||
|
@parser.parseError(_("This tag (#{name}) has no end tag"))
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagCdataTextAreaXmp(name)
|
||||||
|
if @tree.openElements[-1].name == name
|
||||||
|
@tree.openElements.pop
|
||||||
|
else
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagNew(name)
|
||||||
|
# New HTML5 elements, "event-source", "section", "nav",
|
||||||
|
# "article", "aside", "header", "footer", "datagrid", "command"
|
||||||
|
STDERR.puts "Warning: Undefined behaviour for end tag #{name}"
|
||||||
|
endTagOther(name)
|
||||||
|
#raise NotImplementedError
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOther(name)
|
||||||
|
# XXX This logic should be moved into the treebuilder
|
||||||
|
@tree.openElements.reverse.each do |node|
|
||||||
|
if node.name == name
|
||||||
|
@tree.generateImpliedEndTags
|
||||||
|
|
||||||
|
unless @tree.openElements[-1].name == name
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name})."))
|
||||||
|
end
|
||||||
|
|
||||||
|
remove_open_elements_until { |element| element == node }
|
||||||
|
|
||||||
|
break
|
||||||
|
else
|
||||||
|
if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name)
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||||
|
break
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
protected
|
||||||
|
|
||||||
|
def addFormattingElement(name, attributes)
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@tree.activeFormattingElements.push(@tree.openElements[-1])
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
68
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_caption_phase.rb
vendored
Normal file
68
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_caption_phase.rb
vendored
Normal file
|
@ -0,0 +1,68 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class InCaptionPhase < Phase
|
||||||
|
|
||||||
|
# http://www.whatwg.org/specs/web-apps/current-work/#in-caption
|
||||||
|
|
||||||
|
handle_start 'html', %w( caption col colgroup tbody td tfoot th thead tr ) => 'TableElement'
|
||||||
|
|
||||||
|
handle_end 'caption', 'table', %w( body col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
|
||||||
|
|
||||||
|
def ignoreEndTagCaption
|
||||||
|
not in_scope?('caption', true)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
@parser.phases[:inBody].processCharacters(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagTableElement(name, attributes)
|
||||||
|
@parser.parseError
|
||||||
|
#XXX Have to duplicate logic here to find out if the tag is ignored
|
||||||
|
ignoreEndTag = ignoreEndTagCaption
|
||||||
|
@parser.phase.processEndTag('caption')
|
||||||
|
@parser.phase.processStartTag(name, attributes) unless ignoreEndTag
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOther(name, attributes)
|
||||||
|
@parser.phases[:inBody].processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagCaption(name)
|
||||||
|
if ignoreEndTagCaption
|
||||||
|
# innerHTML case
|
||||||
|
assert @parser.innerHTML
|
||||||
|
@parser.parseError
|
||||||
|
else
|
||||||
|
# AT this code is quite similar to endTagTable in "InTable"
|
||||||
|
@tree.generateImpliedEndTags
|
||||||
|
|
||||||
|
unless @tree.openElements[-1].name == 'caption'
|
||||||
|
@parser.parseError(_("Unexpected end tag (caption). Missing end tags."))
|
||||||
|
end
|
||||||
|
|
||||||
|
remove_open_elements_until('caption')
|
||||||
|
|
||||||
|
@tree.clearActiveFormattingElements
|
||||||
|
@parser.phase = @parser.phases[:inTable]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagTable(name)
|
||||||
|
@parser.parseError
|
||||||
|
ignoreEndTag = ignoreEndTagCaption
|
||||||
|
@parser.phase.processEndTag('caption')
|
||||||
|
@parser.phase.processEndTag(name) unless ignoreEndTag
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagIgnore(name)
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOther(name)
|
||||||
|
@parser.phases[:inBody].processEndTag(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
78
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_cell_phase.rb
vendored
Normal file
78
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_cell_phase.rb
vendored
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class InCellPhase < Phase
|
||||||
|
|
||||||
|
# http://www.whatwg.org/specs/web-apps/current-work/#in-cell
|
||||||
|
|
||||||
|
handle_start 'html', %w( caption col colgroup tbody td tfoot th thead tr ) => 'TableOther'
|
||||||
|
|
||||||
|
handle_end %w( td th ) => 'TableCell', %w( body caption col colgroup html ) => 'Ignore'
|
||||||
|
|
||||||
|
handle_end %w( table tbody tfoot thead tr ) => 'Imply'
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
@parser.phases[:inBody].processCharacters(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagTableOther(name, attributes)
|
||||||
|
if in_scope?('td', true) or in_scope?('th', true)
|
||||||
|
closeCell
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
else
|
||||||
|
# innerHTML case
|
||||||
|
@parser.parseError
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOther(name, attributes)
|
||||||
|
@parser.phases[:inBody].processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagTableCell(name)
|
||||||
|
if in_scope?(name, true)
|
||||||
|
@tree.generateImpliedEndTags(name)
|
||||||
|
if @tree.openElements[-1].name != name
|
||||||
|
@parser.parseError("Got table cell end tag (#{name}) while required end tags are missing.")
|
||||||
|
|
||||||
|
remove_open_elements_until(name)
|
||||||
|
else
|
||||||
|
@tree.openElements.pop
|
||||||
|
end
|
||||||
|
@tree.clearActiveFormattingElements
|
||||||
|
@parser.phase = @parser.phases[:inRow]
|
||||||
|
else
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagIgnore(name)
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagImply(name)
|
||||||
|
if in_scope?(name, true)
|
||||||
|
closeCell
|
||||||
|
@parser.phase.processEndTag(name)
|
||||||
|
else
|
||||||
|
# sometimes innerHTML case
|
||||||
|
@parser.parseError
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOther(name)
|
||||||
|
@parser.phases[:inBody].processEndTag(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
protected
|
||||||
|
|
||||||
|
def closeCell
|
||||||
|
if in_scope?('td', true)
|
||||||
|
endTagTableCell('td')
|
||||||
|
elsif in_scope?('th', true)
|
||||||
|
endTagTableCell('th')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
55
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_column_group_phase.rb
vendored
Normal file
55
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_column_group_phase.rb
vendored
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class InColumnGroupPhase < Phase
|
||||||
|
|
||||||
|
# http://www.whatwg.org/specs/web-apps/current-work/#in-column
|
||||||
|
|
||||||
|
handle_start 'html', 'col'
|
||||||
|
|
||||||
|
handle_end 'colgroup', 'col'
|
||||||
|
|
||||||
|
def ignoreEndTagColgroup
|
||||||
|
@tree.openElements[-1].name == 'html'
|
||||||
|
end
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
ignoreEndTag = ignoreEndTagColgroup
|
||||||
|
endTagColgroup("colgroup")
|
||||||
|
@parser.phase.processCharacters(data) unless ignoreEndTag
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagCol(name, attributes)
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@tree.openElements.pop
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOther(name, attributes)
|
||||||
|
ignoreEndTag = ignoreEndTagColgroup
|
||||||
|
endTagColgroup('colgroup')
|
||||||
|
@parser.phase.processStartTag(name, attributes) unless ignoreEndTag
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagColgroup(name)
|
||||||
|
if ignoreEndTagColgroup
|
||||||
|
# innerHTML case
|
||||||
|
assert @parser.innerHTML
|
||||||
|
@parser.parseError
|
||||||
|
else
|
||||||
|
@tree.openElements.pop
|
||||||
|
@parser.phase = @parser.phases[:inTable]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagCol(name)
|
||||||
|
@parser.parseError(_('Unexpected end tag (col). col has no end tag.'))
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOther(name)
|
||||||
|
ignoreEndTag = ignoreEndTagColgroup
|
||||||
|
endTagColgroup('colgroup')
|
||||||
|
@parser.phase.processEndTag(name) unless ignoreEndTag
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
57
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_frameset_phase.rb
vendored
Normal file
57
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_frameset_phase.rb
vendored
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class InFramesetPhase < Phase
|
||||||
|
|
||||||
|
# http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
|
||||||
|
|
||||||
|
handle_start 'html', 'frameset', 'frame', 'noframes'
|
||||||
|
|
||||||
|
handle_end 'frameset', 'noframes'
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
@parser.parseError(_('Unexpected characters in the frameset phase. Characters ignored.'))
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagFrameset(name, attributes)
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagFrame(name, attributes)
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@tree.openElements.pop
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagNoframes(name, attributes)
|
||||||
|
@parser.phases[:inBody].processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOther(name, attributes)
|
||||||
|
@parser.parseError(_("Unexpected start tag token (#{name}) in the frameset phase. Ignored"))
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagFrameset(name)
|
||||||
|
if @tree.openElements[-1].name == 'html'
|
||||||
|
# innerHTML case
|
||||||
|
@parser.parseError(_("Unexpected end tag token (frameset) in the frameset phase (innerHTML)."))
|
||||||
|
else
|
||||||
|
@tree.openElements.pop
|
||||||
|
end
|
||||||
|
if (not @parser.innerHTML and
|
||||||
|
@tree.openElements[-1].name != 'frameset')
|
||||||
|
# If we're not in innerHTML mode and the the current node is not a
|
||||||
|
# "frameset" element (anymore) then switch.
|
||||||
|
@parser.phase = @parser.phases[:afterFrameset]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagNoframes(name)
|
||||||
|
@parser.phases[:inBody].processEndTag(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOther(name)
|
||||||
|
@parser.parseError(_("Unexpected end tag token (#{name}) in the frameset phase. Ignored."))
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
120
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_head_phase.rb
vendored
Normal file
120
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_head_phase.rb
vendored
Normal file
|
@ -0,0 +1,120 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class InHeadPhase < Phase
|
||||||
|
|
||||||
|
handle_start 'html', 'head', 'title', 'style', 'script', %w( base link meta )
|
||||||
|
|
||||||
|
handle_end 'head', 'html', %w( title style script )
|
||||||
|
|
||||||
|
def processEOF
|
||||||
|
if ['title', 'style', 'script'].include?(name = @tree.openElements[-1].name)
|
||||||
|
@parser.parseError(_("Unexpected end of file. Expected end tag (#{name})."))
|
||||||
|
@tree.openElements.pop
|
||||||
|
end
|
||||||
|
anythingElse
|
||||||
|
@parser.phase.processEOF
|
||||||
|
end
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
if ['title', 'style', 'script'].include?(@tree.openElements[-1].name)
|
||||||
|
@tree.insertText(data)
|
||||||
|
else
|
||||||
|
anythingElse
|
||||||
|
@parser.phase.processCharacters(data)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagHead(name, attributes)
|
||||||
|
@parser.parseError(_('Unexpected start tag head in existing head. Ignored'))
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagTitle(name, attributes)
|
||||||
|
element = @tree.createElement(name, attributes)
|
||||||
|
appendToHead(element)
|
||||||
|
@tree.openElements.push(element)
|
||||||
|
@parser.tokenizer.contentModelFlag = :RCDATA
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagStyle(name, attributes)
|
||||||
|
element = @tree.createElement(name, attributes)
|
||||||
|
if @tree.headPointer != nil and @parser.phase == @parser.phases[:inHead]
|
||||||
|
appendToHead(element)
|
||||||
|
else
|
||||||
|
@tree.openElements[-1].appendChild(element)
|
||||||
|
end
|
||||||
|
@tree.openElements.push(element)
|
||||||
|
@parser.tokenizer.contentModelFlag = :CDATA
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagScript(name, attributes)
|
||||||
|
#XXX Inner HTML case may be wrong
|
||||||
|
element = @tree.createElement(name, attributes)
|
||||||
|
element._flags.push("parser-inserted")
|
||||||
|
if (@tree.headPointer != nil and
|
||||||
|
@parser.phase == @parser.phases[:inHead])
|
||||||
|
appendToHead(element)
|
||||||
|
else
|
||||||
|
@tree.openElements[-1].appendChild(element)
|
||||||
|
end
|
||||||
|
@tree.openElements.push(element)
|
||||||
|
@parser.tokenizer.contentModelFlag = :CDATA
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagBaseLinkMeta(name, attributes)
|
||||||
|
element = @tree.createElement(name, attributes)
|
||||||
|
appendToHead(element)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOther(name, attributes)
|
||||||
|
anythingElse
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagHead(name)
|
||||||
|
if @tree.openElements[-1].name == 'head'
|
||||||
|
@tree.openElements.pop
|
||||||
|
else
|
||||||
|
@parser.parseError(_("Unexpected end tag (head). Ignored."))
|
||||||
|
end
|
||||||
|
@parser.phase = @parser.phases[:afterHead]
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagHtml(name)
|
||||||
|
anythingElse
|
||||||
|
@parser.phase.processEndTag(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagTitleStyleScript(name)
|
||||||
|
if @tree.openElements[-1].name == name
|
||||||
|
@tree.openElements.pop
|
||||||
|
else
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOther(name)
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||||
|
end
|
||||||
|
|
||||||
|
def anythingElse
|
||||||
|
if @tree.openElements[-1].name == 'head'
|
||||||
|
endTagHead('head')
|
||||||
|
else
|
||||||
|
@parser.phase = @parser.phases[:afterHead]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
protected
|
||||||
|
|
||||||
|
def appendToHead(element)
|
||||||
|
if @tree.headPointer.nil?
|
||||||
|
assert @parser.innerHTML
|
||||||
|
@tree.openElements[-1].appendChild(element)
|
||||||
|
else
|
||||||
|
@tree.headPointer.appendChild(element)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
87
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_row_phase.rb
vendored
Normal file
87
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_row_phase.rb
vendored
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class InRowPhase < Phase
|
||||||
|
|
||||||
|
# http://www.whatwg.org/specs/web-apps/current-work/#in-row
|
||||||
|
|
||||||
|
handle_start 'html', %w( td th ) => 'TableCell', %w( caption col colgroup tbody tfoot thead tr ) => 'TableOther'
|
||||||
|
|
||||||
|
handle_end 'tr', 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th ) => 'Ignore'
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
@parser.phases[:inTable].processCharacters(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagTableCell(name, attributes)
|
||||||
|
clearStackToTableRowContext
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@parser.phase = @parser.phases[:inCell]
|
||||||
|
@tree.activeFormattingElements.push(Marker)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagTableOther(name, attributes)
|
||||||
|
ignoreEndTag = ignoreEndTagTr
|
||||||
|
endTagTr('tr')
|
||||||
|
# XXX how are we sure it's always ignored in the innerHTML case?
|
||||||
|
@parser.phase.processStartTag(name, attributes) unless ignoreEndTag
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOther(name, attributes)
|
||||||
|
@parser.phases[:inTable].processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagTr(name)
|
||||||
|
if ignoreEndTagTr
|
||||||
|
# innerHTML case
|
||||||
|
assert @parser.innerHTML
|
||||||
|
@parser.parseError
|
||||||
|
else
|
||||||
|
clearStackToTableRowContext
|
||||||
|
@tree.openElements.pop
|
||||||
|
@parser.phase = @parser.phases[:inTableBody]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagTable(name)
|
||||||
|
ignoreEndTag = ignoreEndTagTr
|
||||||
|
endTagTr('tr')
|
||||||
|
# Reprocess the current tag if the tr end tag was not ignored
|
||||||
|
# XXX how are we sure it's always ignored in the innerHTML case?
|
||||||
|
@parser.phase.processEndTag(name) unless ignoreEndTag
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagTableRowGroup(name)
|
||||||
|
if in_scope?(name, true)
|
||||||
|
endTagTr('tr')
|
||||||
|
@parser.phase.processEndTag(name)
|
||||||
|
else
|
||||||
|
# innerHTML case
|
||||||
|
@parser.parseError
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagIgnore(name)
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}) in the row phase. Ignored."))
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOther(name)
|
||||||
|
@parser.phases[:inTable].processEndTag(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
protected
|
||||||
|
|
||||||
|
# XXX unify this with other table helper methods
|
||||||
|
def clearStackToTableRowContext
|
||||||
|
until ['tr', 'html'].include?(name = @tree.openElements[-1].name)
|
||||||
|
@parser.parseError(_("Unexpected implied end tag (#{name}) in the row phase."))
|
||||||
|
@tree.openElements.pop
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def ignoreEndTagTr
|
||||||
|
not in_scope?('tr', :tableVariant => true)
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
84
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_select_phase.rb
vendored
Normal file
84
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_select_phase.rb
vendored
Normal file
|
@ -0,0 +1,84 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class InSelectPhase < Phase
|
||||||
|
|
||||||
|
# http://www.whatwg.org/specs/web-apps/current-work/#in-select
|
||||||
|
|
||||||
|
handle_start 'html', 'option', 'optgroup', 'select'
|
||||||
|
|
||||||
|
handle_end 'option', 'optgroup', 'select', %w( caption table tbody tfoot thead tr td th ) => 'TableElements'
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
@tree.insertText(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOption(name, attributes)
|
||||||
|
# We need to imply </option> if <option> is the current node.
|
||||||
|
@tree.openElements.pop if @tree.openElements[-1].name == 'option'
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOptgroup(name, attributes)
|
||||||
|
@tree.openElements.pop if @tree.openElements[-1].name == 'option'
|
||||||
|
@tree.openElements.pop if @tree.openElements[-1].name == 'optgroup'
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagSelect(name, attributes)
|
||||||
|
@parser.parseError(_('Unexpected start tag (select) in the select phase implies select start tag.'))
|
||||||
|
endTagSelect('select')
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOther(name, attributes)
|
||||||
|
@parser.parseError(_('Unexpected start tag token (#{name}) in the select phase. Ignored.'))
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOption(name)
|
||||||
|
if @tree.openElements[-1].name == 'option'
|
||||||
|
@tree.openElements.pop
|
||||||
|
else
|
||||||
|
@parser.parseError(_('Unexpected end tag (option) in the select phase. Ignored.'))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOptgroup(name)
|
||||||
|
# </optgroup> implicitly closes <option>
|
||||||
|
if @tree.openElements[-1].name == 'option' and @tree.openElements[-2].name == 'optgroup'
|
||||||
|
@tree.openElements.pop
|
||||||
|
end
|
||||||
|
# It also closes </optgroup>
|
||||||
|
if @tree.openElements[-1].name == 'optgroup'
|
||||||
|
@tree.openElements.pop
|
||||||
|
# But nothing else
|
||||||
|
else
|
||||||
|
@parser.parseError(_('Unexpected end tag (optgroup) in the select phase. Ignored.'))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagSelect(name)
|
||||||
|
if in_scope?('select', true)
|
||||||
|
remove_open_elements_until('select')
|
||||||
|
|
||||||
|
@parser.resetInsertionMode
|
||||||
|
else
|
||||||
|
# innerHTML case
|
||||||
|
@parser.parseError
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagTableElements(name)
|
||||||
|
@parser.parseError(_("Unexpected table end tag (#{name}) in the select phase."))
|
||||||
|
|
||||||
|
if in_scope?(name, true)
|
||||||
|
endTagSelect('select')
|
||||||
|
@parser.phase.processEndTag(name)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOther(name)
|
||||||
|
@parser.parseError(_("Unexpected end tag token (#{name}) in the select phase. Ignored."))
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
83
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_body_phase.rb
vendored
Normal file
83
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_body_phase.rb
vendored
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class InTableBodyPhase < Phase
|
||||||
|
|
||||||
|
# http://www.whatwg.org/specs/web-apps/current-work/#in-table0
|
||||||
|
|
||||||
|
handle_start 'html', 'tr', %w( td th ) => 'TableCell', %w( caption col colgroup tbody tfoot thead ) => 'TableOther'
|
||||||
|
|
||||||
|
handle_end 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th tr ) => 'Ingore'
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
@parser.phases[:inTable].processCharacters(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagTr(name, attributes)
|
||||||
|
clearStackToTableBodyContext
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@parser.phase = @parser.phases[:inRow]
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagTableCell(name, attributes)
|
||||||
|
@parser.parseError(_("Unexpected table cell start tag (#{name}) in the table body phase."))
|
||||||
|
startTagTr('tr', {})
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagTableOther(name, attributes)
|
||||||
|
# XXX AT Any ideas on how to share this with endTagTable?
|
||||||
|
if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
|
||||||
|
clearStackToTableBodyContext
|
||||||
|
endTagTableRowGroup(@tree.openElements[-1].name)
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
else
|
||||||
|
# innerHTML case
|
||||||
|
@parser.parseError
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOther(name, attributes)
|
||||||
|
@parser.phases[:inTable].processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagTableRowGroup(name)
|
||||||
|
if in_scope?(name, true)
|
||||||
|
clearStackToTableBodyContext
|
||||||
|
@tree.openElements.pop
|
||||||
|
@parser.phase = @parser.phases[:inTable]
|
||||||
|
else
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagTable(name)
|
||||||
|
if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
|
||||||
|
clearStackToTableBodyContext
|
||||||
|
endTagTableRowGroup(@tree.openElements[-1].name)
|
||||||
|
@parser.phase.processEndTag(name)
|
||||||
|
else
|
||||||
|
# innerHTML case
|
||||||
|
@parser.parseError
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagIgnore(name)
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOther(name)
|
||||||
|
@parser.phases[:inTable].processEndTag(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
protected
|
||||||
|
|
||||||
|
def clearStackToTableBodyContext
|
||||||
|
until ['tbody', 'tfoot', 'thead', 'html'].include?(name = @tree.openElements[-1].name)
|
||||||
|
@parser.parseError(_("Unexpected implied end tag (#{name}) in the table body phase."))
|
||||||
|
@tree.openElements.pop
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
110
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_phase.rb
vendored
Normal file
110
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_phase.rb
vendored
Normal file
|
@ -0,0 +1,110 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class InTablePhase < Phase
|
||||||
|
|
||||||
|
# http://www.whatwg.org/specs/web-apps/current-work/#in-table
|
||||||
|
|
||||||
|
handle_start 'html', 'caption', 'colgroup', 'col', 'table'
|
||||||
|
|
||||||
|
handle_start %w( tbody tfoot thead ) => 'RowGroup', %w( td th tr ) => 'ImplyTbody'
|
||||||
|
|
||||||
|
handle_end 'table', %w( body caption col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
@parser.parseError(_("Unexpected non-space characters in table context caused voodoo mode."))
|
||||||
|
# Make all the special element rearranging voodoo kick in
|
||||||
|
@tree.insertFromTable = true
|
||||||
|
# Process the character in the "in body" mode
|
||||||
|
@parser.phases[:inBody].processCharacters(data)
|
||||||
|
@tree.insertFromTable = false
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagCaption(name, attributes)
|
||||||
|
clearStackToTableContext
|
||||||
|
@tree.activeFormattingElements.push(Marker)
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@parser.phase = @parser.phases[:inCaption]
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagColgroup(name, attributes)
|
||||||
|
clearStackToTableContext
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@parser.phase = @parser.phases[:inColumnGroup]
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagCol(name, attributes)
|
||||||
|
startTagColgroup('colgroup', {})
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagRowGroup(name, attributes)
|
||||||
|
clearStackToTableContext
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@parser.phase = @parser.phases[:inTableBody]
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagImplyTbody(name, attributes)
|
||||||
|
startTagRowGroup('tbody', {})
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagTable(name, attributes)
|
||||||
|
@parser.parseError(_("Unexpected start tag (table) in table phase. Implies end tag (table)."))
|
||||||
|
@parser.phase.processEndTag('table')
|
||||||
|
@parser.phase.processStartTag(name, attributes) unless @parser.innerHTML
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOther(name, attributes)
|
||||||
|
@parser.parseError(_("Unexpected start tag (#{name}) in table context caused voodoo mode."))
|
||||||
|
# Make all the special element rearranging voodoo kick in
|
||||||
|
@tree.insertFromTable = true
|
||||||
|
# Process the start tag in the "in body" mode
|
||||||
|
@parser.phases[:inBody].processStartTag(name, attributes)
|
||||||
|
@tree.insertFromTable = false
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagTable(name)
|
||||||
|
if in_scope?('table', true)
|
||||||
|
@tree.generateImpliedEndTags
|
||||||
|
|
||||||
|
unless @tree.openElements[-1].name == 'table'
|
||||||
|
@parser.parseError(_("Unexpected end tag (table). Expected end tag (#{@tree.openElements[-1].name})."))
|
||||||
|
end
|
||||||
|
|
||||||
|
remove_open_elements_until('table')
|
||||||
|
|
||||||
|
@parser.resetInsertionMode
|
||||||
|
else
|
||||||
|
# innerHTML case
|
||||||
|
assert @parser.innerHTML
|
||||||
|
@parser.parseError
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagIgnore(name)
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOther(name)
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}) in table context caused voodoo mode."))
|
||||||
|
# Make all the special element rearranging voodoo kick in
|
||||||
|
@parser.insertFromTable = true
|
||||||
|
# Process the end tag in the "in body" mode
|
||||||
|
@parser.phases[:inBody].processEndTag(name)
|
||||||
|
@parser.insertFromTable = false
|
||||||
|
end
|
||||||
|
|
||||||
|
protected
|
||||||
|
|
||||||
|
def clearStackToTableContext
|
||||||
|
# "clear the stack back to a table context"
|
||||||
|
until ['table', 'html'].include?(name = @tree.openElements[-1].name)
|
||||||
|
@parser.parseError(_("Unexpected implied end tag (#{name}) in the table phase."))
|
||||||
|
@tree.openElements.pop
|
||||||
|
end
|
||||||
|
# When the current node is <html> it's an innerHTML case
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
49
vendor/plugins/HTML5lib/lib/html5lib/html5parser/initial_phase.rb
vendored
Normal file
49
vendor/plugins/HTML5lib/lib/html5lib/html5parser/initial_phase.rb
vendored
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class InitialPhase < Phase
|
||||||
|
|
||||||
|
# This phase deals with error handling as well which is currently not
|
||||||
|
# covered in the specification. The error handling is typically known as
|
||||||
|
# "quirks mode". It is expected that a future version of HTML5 will define this.
|
||||||
|
|
||||||
|
def processEOF
|
||||||
|
@parser.parseError(_('Unexpected End of file. Expected DOCTYPE.'))
|
||||||
|
@parser.phase = @parser.phases[:rootElement]
|
||||||
|
@parser.phase.processEOF
|
||||||
|
end
|
||||||
|
|
||||||
|
def processComment(data)
|
||||||
|
@tree.insertComment(data, @tree.document)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processDoctype(name, error)
|
||||||
|
@parser.parseError(_('Erroneous DOCTYPE.')) if error
|
||||||
|
@tree.insertDoctype(name)
|
||||||
|
@parser.phase = @parser.phases[:rootElement]
|
||||||
|
end
|
||||||
|
|
||||||
|
def processSpaceCharacters(data)
|
||||||
|
@tree.insertText(data, @tree.document)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
@parser.parseError(_('Unexpected non-space characters. Expected DOCTYPE.'))
|
||||||
|
@parser.phase = @parser.phases[:rootElement]
|
||||||
|
@parser.phase.processCharacters(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processStartTag(name, attributes)
|
||||||
|
@parser.parseError(_("Unexpected start tag (#{name}). Expected DOCTYPE."))
|
||||||
|
@parser.phase = @parser.phases[:rootElement]
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processEndTag(name)
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}). Expected DOCTYPE."))
|
||||||
|
@parser.phase = @parser.phases[:rootElement]
|
||||||
|
@parser.phase.processEndTag(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
156
vendor/plugins/HTML5lib/lib/html5lib/html5parser/phase.rb
vendored
Normal file
156
vendor/plugins/HTML5lib/lib/html5lib/html5parser/phase.rb
vendored
Normal file
|
@ -0,0 +1,156 @@
|
||||||
|
module HTML5lib
|
||||||
|
# Base class for helper objects that implement each phase of processing.
|
||||||
|
#
|
||||||
|
# Handler methods should be in the following order (they can be omitted):
|
||||||
|
#
|
||||||
|
# * EOF
|
||||||
|
# * Comment
|
||||||
|
# * Doctype
|
||||||
|
# * SpaceCharacters
|
||||||
|
# * Characters
|
||||||
|
# * StartTag
|
||||||
|
# - startTag* methods
|
||||||
|
# * EndTag
|
||||||
|
# - endTag* methods
|
||||||
|
#
|
||||||
|
class Phase
|
||||||
|
|
||||||
|
# The following example call:
|
||||||
|
#
|
||||||
|
# tag_handlers('startTag', 'html', %( base link meta ), %( li dt dd ) => 'ListItem')
|
||||||
|
#
|
||||||
|
# ...would return a hash equal to this:
|
||||||
|
#
|
||||||
|
# { 'html' => 'startTagHtml',
|
||||||
|
# 'base' => 'startTagBaseLinkMeta',
|
||||||
|
# 'link' => 'startTagBaseLinkMeta',
|
||||||
|
# 'meta' => 'startTagBaseLinkMeta',
|
||||||
|
# 'li' => 'startTagListItem',
|
||||||
|
# 'dt' => 'startTagListItem',
|
||||||
|
# 'dd' => 'startTagListItem' }
|
||||||
|
#
|
||||||
|
def self.tag_handlers(prefix, *tags)
|
||||||
|
mapping = {}
|
||||||
|
if tags.last.is_a?(Hash)
|
||||||
|
tags.pop.each do |names, handler_method_suffix|
|
||||||
|
handler_method = prefix + handler_method_suffix
|
||||||
|
Array(names).each { |name| mapping[name] = handler_method }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
tags.each do |names|
|
||||||
|
names = Array(names)
|
||||||
|
handler_method = prefix + names.map { |name| name.capitalize }.join
|
||||||
|
names.each { |name| mapping[name] = handler_method }
|
||||||
|
end
|
||||||
|
return mapping
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.start_tag_handlers
|
||||||
|
@start_tag_handlers ||= Hash.new('startTagOther')
|
||||||
|
end
|
||||||
|
|
||||||
|
# Declare what start tags this Phase handles. Can be called more than once.
|
||||||
|
#
|
||||||
|
# Example usage:
|
||||||
|
#
|
||||||
|
# handle_start 'html'
|
||||||
|
# # html start tags will be handled by a method named 'startTagHtml'
|
||||||
|
#
|
||||||
|
# handle_start %( base link meta )
|
||||||
|
# # base, link and meta start tags will be handled by a method named 'startTagBaseLinkMeta'
|
||||||
|
#
|
||||||
|
# handle_start %( li dt dd ) => 'ListItem'
|
||||||
|
# # li, dt, and dd start tags will be handled by a method named 'startTagListItem'
|
||||||
|
#
|
||||||
|
def self.handle_start(*tags)
|
||||||
|
start_tag_handlers.update tag_handlers('startTag', *tags)
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.end_tag_handlers
|
||||||
|
@end_tag_handlers ||= Hash.new('endTagOther')
|
||||||
|
end
|
||||||
|
|
||||||
|
# Declare what end tags this Phase handles. Behaves like handle_start.
|
||||||
|
#
|
||||||
|
def self.handle_end(*tags)
|
||||||
|
end_tag_handlers.update tag_handlers('endTag', *tags)
|
||||||
|
end
|
||||||
|
|
||||||
|
def initialize(parser, tree)
|
||||||
|
@parser, @tree = parser, tree
|
||||||
|
end
|
||||||
|
|
||||||
|
def processEOF
|
||||||
|
@tree.generateImpliedEndTags
|
||||||
|
|
||||||
|
if @tree.openElements.length > 2
|
||||||
|
@parser.parseError(_('Unexpected end of file. Missing closing tags.'))
|
||||||
|
elsif @tree.openElements.length == 2 and @tree.openElements[1].name != 'body'
|
||||||
|
# This happens for framesets or something?
|
||||||
|
@parser.parseError(_("Unexpected end of file. Expected end tag (#{@tree.openElements[1].name}) first."))
|
||||||
|
elsif @parser.innerHTML and @tree.openElements.length > 1
|
||||||
|
# XXX This is not what the specification says. Not sure what to do here.
|
||||||
|
@parser.parseError(_('XXX innerHTML EOF'))
|
||||||
|
end
|
||||||
|
# Betting ends.
|
||||||
|
end
|
||||||
|
|
||||||
|
def processComment(data)
|
||||||
|
# For most phases the following is correct. Where it's not it will be
|
||||||
|
# overridden.
|
||||||
|
@tree.insertComment(data, @tree.openElements[-1])
|
||||||
|
end
|
||||||
|
|
||||||
|
def processDoctype(name, error)
|
||||||
|
@parser.parseError(_('Unexpected DOCTYPE. Ignored.'))
|
||||||
|
end
|
||||||
|
|
||||||
|
def processSpaceCharacters(data)
|
||||||
|
@tree.insertText(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processStartTag(name, attributes)
|
||||||
|
send self.class.start_tag_handlers[name], name, attributes
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagHtml(name, attributes)
|
||||||
|
if @parser.firstStartTag == false and name == 'html'
|
||||||
|
@parser.parseError(_('html needs to be the first start tag.'))
|
||||||
|
end
|
||||||
|
# XXX Need a check here to see if the first start tag token emitted is
|
||||||
|
# this token... If it's not, invoke @parser.parseError.
|
||||||
|
attributes.each do |attr, value|
|
||||||
|
unless @tree.openElements[0].attributes.has_key?(attr)
|
||||||
|
@tree.openElements[0].attributes[attr] = value
|
||||||
|
end
|
||||||
|
end
|
||||||
|
@parser.firstStartTag = false
|
||||||
|
end
|
||||||
|
|
||||||
|
def processEndTag(name)
|
||||||
|
send self.class.end_tag_handlers[name], name
|
||||||
|
end
|
||||||
|
|
||||||
|
def _(string)
|
||||||
|
string
|
||||||
|
end
|
||||||
|
|
||||||
|
def assert(value)
|
||||||
|
throw AssertionError.new unless value
|
||||||
|
end
|
||||||
|
|
||||||
|
def in_scope?(*args)
|
||||||
|
@tree.elementInScope(*args)
|
||||||
|
end
|
||||||
|
|
||||||
|
def remove_open_elements_until(name=nil)
|
||||||
|
finished = false
|
||||||
|
until finished
|
||||||
|
element = @tree.openElements.pop
|
||||||
|
finished = name.nil?? yield(element) : element.name == name
|
||||||
|
end
|
||||||
|
return element
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
43
vendor/plugins/HTML5lib/lib/html5lib/html5parser/root_element_phase.rb
vendored
Normal file
43
vendor/plugins/HTML5lib/lib/html5lib/html5parser/root_element_phase.rb
vendored
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class RootElementPhase < Phase
|
||||||
|
|
||||||
|
def processEOF
|
||||||
|
insertHtmlElement
|
||||||
|
@parser.phase.processEOF
|
||||||
|
end
|
||||||
|
|
||||||
|
def processComment(data)
|
||||||
|
@tree.insertComment(data, @tree.document)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processSpaceCharacters(data)
|
||||||
|
@tree.insertText(data, @tree.document)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
insertHtmlElement
|
||||||
|
@parser.phase.processCharacters(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processStartTag(name, attributes)
|
||||||
|
@parser.firstStartTag = true if name == 'html'
|
||||||
|
insertHtmlElement
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processEndTag(name)
|
||||||
|
insertHtmlElement
|
||||||
|
@parser.phase.processEndTag(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
def insertHtmlElement
|
||||||
|
element = @tree.createElement('html', {})
|
||||||
|
@tree.openElements.push(element)
|
||||||
|
@tree.document.appendChild(element)
|
||||||
|
@parser.phase = @parser.phases[:beforeHead]
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
36
vendor/plugins/HTML5lib/lib/html5lib/html5parser/trailing_end_phase.rb
vendored
Normal file
36
vendor/plugins/HTML5lib/lib/html5lib/html5parser/trailing_end_phase.rb
vendored
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class TrailingEndPhase < Phase
|
||||||
|
|
||||||
|
def processEOF
|
||||||
|
end
|
||||||
|
|
||||||
|
def processComment(data)
|
||||||
|
@tree.insertComment(data, @tree.document)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processSpaceCharacters(data)
|
||||||
|
@parser.lastPhase.processSpaceCharacters(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
@parser.parseError(_('Unexpected non-space characters. Expected end of file.'))
|
||||||
|
@parser.phase = @parser.lastPhase
|
||||||
|
@parser.phase.processCharacters(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processStartTag(name, attributes)
|
||||||
|
@parser.parseError(_('Unexpected start tag (#{name}). Expected end of file.'))
|
||||||
|
@parser.phase = @parser.lastPhase
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processEndTag(name)
|
||||||
|
@parser.parseError(_('Unexpected end tag (#{name}). Expected end of file.'))
|
||||||
|
@parser.phase = @parser.lastPhase
|
||||||
|
@parser.phase.processEndTag(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
283
vendor/plugins/HTML5lib/lib/html5lib/inputstream.rb
vendored
283
vendor/plugins/HTML5lib/lib/html5lib/inputstream.rb
vendored
|
@ -10,7 +10,7 @@ module HTML5lib
|
||||||
|
|
||||||
class HTMLInputStream
|
class HTMLInputStream
|
||||||
|
|
||||||
attr_accessor :queue, :charEncoding
|
attr_accessor :queue, :char_encoding
|
||||||
|
|
||||||
# Initialises the HTMLInputStream.
|
# Initialises the HTMLInputStream.
|
||||||
#
|
#
|
||||||
|
@ -28,16 +28,16 @@ class HTMLInputStream
|
||||||
|
|
||||||
def initialize(source, options = {})
|
def initialize(source, options = {})
|
||||||
@encoding = nil
|
@encoding = nil
|
||||||
@parseMeta = true
|
@parse_meta = true
|
||||||
@chardet = true
|
@chardet = true
|
||||||
|
|
||||||
options.each { |name, value| instance_variable_set("@#{name}", value) }
|
options.each { |name, value| instance_variable_set("@#{name}", value) }
|
||||||
|
|
||||||
# List of where new lines occur
|
# List of where new lines occur
|
||||||
@newLines = []
|
@new_lines = []
|
||||||
|
|
||||||
# Raw Stream
|
# Raw Stream
|
||||||
@rawStream = openStream(source)
|
@raw_stream = open_stream(source)
|
||||||
|
|
||||||
# Encoding Information
|
# Encoding Information
|
||||||
#Number of bytes to use when looking for a meta element with
|
#Number of bytes to use when looking for a meta element with
|
||||||
|
@ -47,15 +47,15 @@ class HTMLInputStream
|
||||||
@DEFAULT_ENCODING = 'windows-1252'
|
@DEFAULT_ENCODING = 'windows-1252'
|
||||||
|
|
||||||
#Detect encoding iff no explicit "transport level" encoding is supplied
|
#Detect encoding iff no explicit "transport level" encoding is supplied
|
||||||
if @encoding.nil? or not HTML5lib.isValidEncoding(@encoding)
|
if @encoding.nil? or not HTML5lib.is_valid_encoding(@encoding)
|
||||||
@charEncoding = detectEncoding
|
@char_encoding = detect_encoding
|
||||||
else
|
else
|
||||||
@charEncoding = @encoding
|
@char_encoding = @encoding
|
||||||
end
|
end
|
||||||
|
|
||||||
# Read bytes from stream decoding them into Unicode
|
# Read bytes from stream decoding them into Unicode
|
||||||
uString = @rawStream.read
|
uString = @raw_stream.read
|
||||||
unless @charEncoding == 'utf-8'
|
unless @char_encoding == 'utf-8'
|
||||||
begin
|
begin
|
||||||
require 'iconv'
|
require 'iconv'
|
||||||
uString = Iconv.iconv('utf-8', @encoding, uString)[0]
|
uString = Iconv.iconv('utf-8', @encoding, uString)[0]
|
||||||
|
@ -68,7 +68,7 @@ class HTMLInputStream
|
||||||
uString.gsub!("\x00", [0xFFFD].pack('U'))
|
uString.gsub!("\x00", [0xFFFD].pack('U'))
|
||||||
|
|
||||||
# Convert the unicode string into a list to be used as the data stream
|
# Convert the unicode string into a list to be used as the data stream
|
||||||
@dataStream = uString
|
@data_stream = uString
|
||||||
|
|
||||||
@queue = []
|
@queue = []
|
||||||
|
|
||||||
|
@ -79,7 +79,7 @@ class HTMLInputStream
|
||||||
# Produces a file object from source.
|
# Produces a file object from source.
|
||||||
#
|
#
|
||||||
# source can be either a file object, local filename or a string.
|
# source can be either a file object, local filename or a string.
|
||||||
def openStream(source)
|
def open_stream(source)
|
||||||
# Already an IO like object
|
# Already an IO like object
|
||||||
if source.respond_to?(:read)
|
if source.respond_to?(:read)
|
||||||
@stream = source
|
@stream = source
|
||||||
|
@ -90,24 +90,24 @@ class HTMLInputStream
|
||||||
return @stream
|
return @stream
|
||||||
end
|
end
|
||||||
|
|
||||||
def detectEncoding
|
def detect_encoding
|
||||||
|
|
||||||
#First look for a BOM
|
#First look for a BOM
|
||||||
#This will also read past the BOM if present
|
#This will also read past the BOM if present
|
||||||
encoding = detectBOM
|
encoding = detect_bom
|
||||||
#If there is no BOM need to look for meta elements with encoding
|
#If there is no BOM need to look for meta elements with encoding
|
||||||
#information
|
#information
|
||||||
if encoding.nil? and @parseMeta
|
if encoding.nil? and @parse_meta
|
||||||
encoding = detectEncodingMeta
|
encoding = detect_encoding_meta
|
||||||
end
|
end
|
||||||
#Guess with chardet, if avaliable
|
#Guess with chardet, if avaliable
|
||||||
if encoding.nil? and @chardet
|
if encoding.nil? and @chardet
|
||||||
begin
|
begin
|
||||||
require 'rubygems'
|
require 'rubygems'
|
||||||
require 'UniversalDetector' # gem install chardet
|
require 'UniversalDetector' # gem install chardet
|
||||||
buffer = @rawStream.read
|
buffer = @raw_stream.read
|
||||||
encoding = UniversalDetector::chardet(buffer)['encoding']
|
encoding = UniversalDetector::chardet(buffer)['encoding']
|
||||||
@rawStream = openStream(buffer)
|
@raw_stream = open_stream(buffer)
|
||||||
rescue LoadError
|
rescue LoadError
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -117,10 +117,10 @@ class HTMLInputStream
|
||||||
end
|
end
|
||||||
|
|
||||||
#Substitute for equivalent encodings:
|
#Substitute for equivalent encodings:
|
||||||
encodingSub = {'ascii' => 'windows-1252', 'iso-8859-1' => 'windows-1252'}
|
encoding_sub = {'ascii' => 'windows-1252', 'iso-8859-1' => 'windows-1252'}
|
||||||
|
|
||||||
if encodingSub.has_key?(encoding.downcase)
|
if encoding_sub.has_key?(encoding.downcase)
|
||||||
encoding = encodingSub[encoding.downcase]
|
encoding = encoding_sub[encoding.downcase]
|
||||||
end
|
end
|
||||||
|
|
||||||
return encoding
|
return encoding
|
||||||
|
@ -129,8 +129,8 @@ class HTMLInputStream
|
||||||
# Attempts to detect at BOM at the start of the stream. If
|
# Attempts to detect at BOM at the start of the stream. If
|
||||||
# an encoding can be determined from the BOM return the name of the
|
# an encoding can be determined from the BOM return the name of the
|
||||||
# encoding otherwise return nil
|
# encoding otherwise return nil
|
||||||
def detectBOM
|
def detect_bom
|
||||||
bomDict = {
|
bom_dict = {
|
||||||
"\xef\xbb\xbf" => 'utf-8',
|
"\xef\xbb\xbf" => 'utf-8',
|
||||||
"\xff\xfe" => 'utf-16-le',
|
"\xff\xfe" => 'utf-16-le',
|
||||||
"\xfe\xff" => 'utf-16-be',
|
"\xfe\xff" => 'utf-16-be',
|
||||||
|
@ -139,19 +139,19 @@ class HTMLInputStream
|
||||||
}
|
}
|
||||||
|
|
||||||
# Go to beginning of file and read in 4 bytes
|
# Go to beginning of file and read in 4 bytes
|
||||||
@rawStream.seek(0)
|
@raw_stream.seek(0)
|
||||||
string = @rawStream.read(4)
|
string = @raw_stream.read(4)
|
||||||
return nil unless string
|
return nil unless string
|
||||||
|
|
||||||
# Try detecting the BOM using bytes from the string
|
# Try detecting the BOM using bytes from the string
|
||||||
encoding = bomDict[string[0...3]] # UTF-8
|
encoding = bom_dict[string[0...3]] # UTF-8
|
||||||
seek = 3
|
seek = 3
|
||||||
unless encoding
|
unless encoding
|
||||||
# Need to detect UTF-32 before UTF-16
|
# Need to detect UTF-32 before UTF-16
|
||||||
encoding = bomDict[string] # UTF-32
|
encoding = bom_dict[string] # UTF-32
|
||||||
seek = 4
|
seek = 4
|
||||||
unless encoding
|
unless encoding
|
||||||
encoding = bomDict[string[0...2]] # UTF-16
|
encoding = bom_dict[string[0...2]] # UTF-16
|
||||||
seek = 2
|
seek = 2
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -159,36 +159,36 @@ class HTMLInputStream
|
||||||
#AT - move this to the caller?
|
#AT - move this to the caller?
|
||||||
# Set the read position past the BOM if one was found, otherwise
|
# Set the read position past the BOM if one was found, otherwise
|
||||||
# set it to the start of the stream
|
# set it to the start of the stream
|
||||||
@rawStream.seek(encoding ? seek : 0)
|
@raw_stream.seek(encoding ? seek : 0)
|
||||||
|
|
||||||
return encoding
|
return encoding
|
||||||
end
|
end
|
||||||
|
|
||||||
# Report the encoding declared by the meta element
|
# Report the encoding declared by the meta element
|
||||||
def detectEncodingMeta
|
def detect_encoding_meta
|
||||||
parser = EncodingParser.new(@rawStream.read(@NUM_BYTES_META))
|
parser = EncodingParser.new(@raw_stream.read(@NUM_BYTES_META))
|
||||||
@rawStream.seek(0)
|
@raw_stream.seek(0)
|
||||||
return parser.getEncoding
|
return parser.get_encoding
|
||||||
end
|
end
|
||||||
|
|
||||||
def determineNewLines
|
def determine_new_lines
|
||||||
# Looks through the stream to find where new lines occur so
|
# Looks through the stream to find where new lines occur so
|
||||||
# the position method can tell where it is.
|
# the position method can tell where it is.
|
||||||
@newLines.push(0)
|
@new_lines.push(0)
|
||||||
(0...@dataStream.length).each { |i| @newLines.push(i) if @dataStream[i] == ?\n }
|
(0...@data_stream.length).each { |i| @new_lines.push(i) if @data_stream[i] == ?\n }
|
||||||
end
|
end
|
||||||
|
|
||||||
# Returns (line, col) of the current position in the stream.
|
# Returns (line, col) of the current position in the stream.
|
||||||
def position
|
def position
|
||||||
# Generate list of new lines first time around
|
# Generate list of new lines first time around
|
||||||
determineNewLines if @newLines.empty?
|
determine_new_lines if @new_lines.empty?
|
||||||
line = 0
|
line = 0
|
||||||
tell = @tell
|
tell = @tell
|
||||||
@newLines.each do |pos|
|
@new_lines.each do |pos|
|
||||||
break unless pos < tell
|
break unless pos < tell
|
||||||
line += 1
|
line += 1
|
||||||
end
|
end
|
||||||
col = tell - @newLines[line-1] - 1
|
col = tell - @new_lines[line-1] - 1
|
||||||
return [line, col]
|
return [line, col]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -205,7 +205,7 @@ class HTMLInputStream
|
||||||
else
|
else
|
||||||
begin
|
begin
|
||||||
@tell += 1
|
@tell += 1
|
||||||
return @dataStream[@tell - 1].chr
|
return @data_stream[@tell - 1].chr
|
||||||
rescue
|
rescue
|
||||||
return :EOF
|
return :EOF
|
||||||
end
|
end
|
||||||
|
@ -215,22 +215,22 @@ class HTMLInputStream
|
||||||
# Returns a string of characters from the stream up to but not
|
# Returns a string of characters from the stream up to but not
|
||||||
# including any character in characters or EOF. characters can be
|
# including any character in characters or EOF. characters can be
|
||||||
# any container that supports the in method being called on it.
|
# any container that supports the in method being called on it.
|
||||||
def charsUntil(characters, opposite = false)
|
def chars_until(characters, opposite=false)
|
||||||
charStack = [char]
|
char_stack = [char]
|
||||||
|
|
||||||
unless charStack[0] == :EOF
|
unless char_stack[0] == :EOF
|
||||||
while (characters.include? charStack[-1]) == opposite
|
while (characters.include? char_stack[-1]) == opposite
|
||||||
unless @queue.empty?
|
unless @queue.empty?
|
||||||
# First from the queue
|
# First from the queue
|
||||||
charStack.push(@queue.shift)
|
char_stack.push(@queue.shift)
|
||||||
break if charStack[-1] == :EOF
|
break if char_stack[-1] == :EOF
|
||||||
else
|
else
|
||||||
# Then the rest
|
# Then the rest
|
||||||
begin
|
begin
|
||||||
charStack.push(@dataStream[@tell].chr)
|
char_stack.push(@data_stream[@tell].chr)
|
||||||
@tell += 1
|
@tell += 1
|
||||||
rescue
|
rescue
|
||||||
charStack.push(:EOF)
|
char_stack.push(:EOF)
|
||||||
break
|
break
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -239,8 +239,8 @@ class HTMLInputStream
|
||||||
|
|
||||||
# Put the character stopped on back to the front of the queue
|
# Put the character stopped on back to the front of the queue
|
||||||
# from where it came.
|
# from where it came.
|
||||||
@queue.insert(0, charStack.pop)
|
@queue.insert(0, char_stack.pop)
|
||||||
return charStack.join('')
|
return char_stack.join('')
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -263,14 +263,14 @@ class EncodingBytes < String
|
||||||
rescue EOF
|
rescue EOF
|
||||||
end
|
end
|
||||||
|
|
||||||
def currentByte
|
def current_byte
|
||||||
raise EOF if @position >= length
|
raise EOF if @position >= length
|
||||||
return self[@position].chr
|
return self[@position].chr
|
||||||
end
|
end
|
||||||
|
|
||||||
# Skip past a list of characters
|
# Skip past a list of characters
|
||||||
def skip(chars=SPACE_CHARACTERS)
|
def skip(chars=SPACE_CHARACTERS)
|
||||||
while chars.include?(currentByte)
|
while chars.include?(current_byte)
|
||||||
@position += 1
|
@position += 1
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -278,7 +278,7 @@ class EncodingBytes < String
|
||||||
# Look for a sequence of bytes at the start of a string. If the bytes
|
# Look for a sequence of bytes at the start of a string. If the bytes
|
||||||
# are found return true and advance the position to the byte after the
|
# are found return true and advance the position to the byte after the
|
||||||
# match. Otherwise return false and leave the position alone
|
# match. Otherwise return false and leave the position alone
|
||||||
def matchBytes(bytes, lower = false)
|
def match_bytes(bytes, lower=false)
|
||||||
data = self[position ... position+bytes.length]
|
data = self[position ... position+bytes.length]
|
||||||
data.downcase! if lower
|
data.downcase! if lower
|
||||||
rv = (data == bytes)
|
rv = (data == bytes)
|
||||||
|
@ -288,10 +288,10 @@ class EncodingBytes < String
|
||||||
|
|
||||||
# Look for the next sequence of bytes matching a given sequence. If
|
# Look for the next sequence of bytes matching a given sequence. If
|
||||||
# a match is found advance the position to the last byte of the match
|
# a match is found advance the position to the last byte of the match
|
||||||
def jumpTo(bytes)
|
def jump_to(bytes)
|
||||||
newPosition = self[position .. -1].index(bytes)
|
new_position = self[position .. -1].index(bytes)
|
||||||
if newPosition
|
if new_position
|
||||||
@position += (newPosition + bytes.length-1)
|
@position += (new_position + bytes.length-1)
|
||||||
return true
|
return true
|
||||||
else
|
else
|
||||||
raise EOF
|
raise EOF
|
||||||
|
@ -300,8 +300,8 @@ class EncodingBytes < String
|
||||||
|
|
||||||
# Move the pointer so it points to the next byte in a set of possible
|
# Move the pointer so it points to the next byte in a set of possible
|
||||||
# bytes
|
# bytes
|
||||||
def findNext(byteList)
|
def find_next(byte_list)
|
||||||
until byteList.include?(currentByte)
|
until byte_list.include?(current_byte)
|
||||||
@position += 1
|
@position += 1
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -317,139 +317,139 @@ class EncodingParser
|
||||||
end
|
end
|
||||||
|
|
||||||
@@method_dispatch = [
|
@@method_dispatch = [
|
||||||
['<!--', :handleComment],
|
['<!--', :handle_comment],
|
||||||
['<meta', :handleMeta],
|
['<meta', :handle_meta],
|
||||||
['</', :handlePossibleEndTag],
|
['</', :handle_possible_end_tag],
|
||||||
['<!', :handleOther],
|
['<!', :handle_other],
|
||||||
['<?', :handleOther],
|
['<?', :handle_other],
|
||||||
['<', :handlePossibleStartTag]
|
['<', :handle_possible_start_tag]
|
||||||
]
|
]
|
||||||
|
|
||||||
def getEncoding
|
def get_encoding
|
||||||
@data.each do |byte|
|
@data.each do |byte|
|
||||||
keepParsing = true
|
keep_parsing = true
|
||||||
@@method_dispatch.each do |(key, method)|
|
@@method_dispatch.each do |(key, method)|
|
||||||
if @data.matchBytes(key, lower = true)
|
if @data.match_bytes(key, lower = true)
|
||||||
keepParsing = send(method)
|
keep_parsing = send(method)
|
||||||
break
|
break
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
break unless keepParsing
|
break unless keep_parsing
|
||||||
end
|
end
|
||||||
@encoding = @encoding.strip unless @encoding.nil?
|
@encoding = @encoding.strip unless @encoding.nil?
|
||||||
return @encoding
|
return @encoding
|
||||||
end
|
end
|
||||||
|
|
||||||
# Skip over comments
|
# Skip over comments
|
||||||
def handleComment
|
def handle_comment
|
||||||
return @data.jumpTo('-->')
|
return @data.jump_to('-->')
|
||||||
end
|
end
|
||||||
|
|
||||||
def handleMeta
|
def handle_meta
|
||||||
# if we have <meta not followed by a space so just keep going
|
# if we have <meta not followed by a space so just keep going
|
||||||
return true unless SPACE_CHARACTERS.include?(@data.currentByte)
|
return true unless SPACE_CHARACTERS.include?(@data.current_byte)
|
||||||
|
|
||||||
#We have a valid meta element we want to search for attributes
|
#We have a valid meta element we want to search for attributes
|
||||||
while true
|
while true
|
||||||
#Try to find the next attribute after the current position
|
#Try to find the next attribute after the current position
|
||||||
attr = getAttribute
|
attr = get_attribute
|
||||||
|
|
||||||
return true if attr.nil?
|
return true if attr.nil?
|
||||||
|
|
||||||
if attr[0] == 'charset'
|
if attr[0] == 'charset'
|
||||||
tentativeEncoding = attr[1]
|
tentative_encoding = attr[1]
|
||||||
if HTML5lib.isValidEncoding(tentativeEncoding)
|
if HTML5lib.is_valid_encoding(tentative_encoding)
|
||||||
@encoding = tentativeEncoding
|
@encoding = tentative_encoding
|
||||||
return false
|
return false
|
||||||
end
|
end
|
||||||
elsif attr[0] == 'content'
|
elsif attr[0] == 'content'
|
||||||
contentParser = ContentAttrParser.new(EncodingBytes.new(attr[1]))
|
content_parser = ContentAttrParser.new(EncodingBytes.new(attr[1]))
|
||||||
tentativeEncoding = contentParser.parse
|
tentative_encoding = content_parser.parse
|
||||||
if HTML5lib.isValidEncoding(tentativeEncoding)
|
if HTML5lib.is_valid_encoding(tentative_encoding)
|
||||||
@encoding = tentativeEncoding
|
@encoding = tentative_encoding
|
||||||
return false
|
return false
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def handlePossibleStartTag
|
def handle_possible_start_tag
|
||||||
return handlePossibleTag(false)
|
return handle_possible_tag(false)
|
||||||
end
|
end
|
||||||
|
|
||||||
def handlePossibleEndTag
|
def handle_possible_end_tag
|
||||||
@data.position += 1
|
@data.position += 1
|
||||||
return handlePossibleTag(true)
|
return handle_possible_tag(true)
|
||||||
end
|
end
|
||||||
|
|
||||||
def handlePossibleTag(endTag)
|
def handle_possible_tag(end_tag)
|
||||||
unless ASCII_LETTERS.include?(@data.currentByte)
|
unless ASCII_LETTERS.include?(@data.current_byte)
|
||||||
#If the next byte is not an ascii letter either ignore this
|
#If the next byte is not an ascii letter either ignore this
|
||||||
#fragment (possible start tag case) or treat it according to
|
#fragment (possible start tag case) or treat it according to
|
||||||
#handleOther
|
#handleOther
|
||||||
if endTag
|
if end_tag
|
||||||
@data.position -= 1
|
@data.position -= 1
|
||||||
handleOther
|
handle_other
|
||||||
end
|
end
|
||||||
return true
|
return true
|
||||||
end
|
end
|
||||||
|
|
||||||
@data.findNext(SPACE_CHARACTERS + ['<', '>'])
|
@data.find_next(SPACE_CHARACTERS + ['<', '>'])
|
||||||
|
|
||||||
if @data.currentByte == '<'
|
if @data.current_byte == '<'
|
||||||
#return to the first step in the overall "two step" algorithm
|
#return to the first step in the overall "two step" algorithm
|
||||||
#reprocessing the < byte
|
#reprocessing the < byte
|
||||||
@data.position -= 1
|
@data.position -= 1
|
||||||
else
|
else
|
||||||
#Read all attributes
|
#Read all attributes
|
||||||
{} until getAttribute.nil?
|
{} until get_attribute.nil?
|
||||||
end
|
end
|
||||||
return true
|
return true
|
||||||
end
|
end
|
||||||
|
|
||||||
def handleOther
|
def handle_other
|
||||||
return @data.jumpTo('>')
|
return @data.jump_to('>')
|
||||||
end
|
end
|
||||||
|
|
||||||
# Return a name,value pair for the next attribute in the stream,
|
# Return a name,value pair for the next attribute in the stream,
|
||||||
# if one is found, or nil
|
# if one is found, or nil
|
||||||
def getAttribute
|
def get_attribute
|
||||||
@data.skip(SPACE_CHARACTERS + ['/'])
|
@data.skip(SPACE_CHARACTERS + ['/'])
|
||||||
|
|
||||||
if @data.currentByte == '<'
|
if @data.current_byte == '<'
|
||||||
@data.position -= 1
|
@data.position -= 1
|
||||||
return nil
|
return nil
|
||||||
elsif @data.currentByte == '>'
|
elsif @data.current_byte == '>'
|
||||||
return nil
|
return nil
|
||||||
end
|
end
|
||||||
|
|
||||||
attrName = []
|
attr_name = []
|
||||||
attrValue = []
|
attr_value = []
|
||||||
spaceFound = false
|
space_found = false
|
||||||
#Step 5 attribute name
|
#Step 5 attribute name
|
||||||
while true
|
while true
|
||||||
if @data.currentByte == '=' and attrName:
|
if @data.current_byte == '=' and attr_name:
|
||||||
break
|
break
|
||||||
elsif SPACE_CHARACTERS.include?(@data.currentByte)
|
elsif SPACE_CHARACTERS.include?(@data.current_byte)
|
||||||
spaceFound = true
|
space_found = true
|
||||||
break
|
break
|
||||||
elsif ['/', '<', '>'].include?(@data.currentByte)
|
elsif ['/', '<', '>'].include?(@data.current_byte)
|
||||||
return [attrName.join(''), '']
|
return [attr_name.join(''), '']
|
||||||
elsif ASCII_UPPERCASE.include?(@data.currentByte)
|
elsif ASCII_UPPERCASE.include?(@data.current_byte)
|
||||||
attrName.push(@data.currentByte.downcase)
|
attr_name.push(@data.current_byte.downcase)
|
||||||
else
|
else
|
||||||
attrName.push(@data.currentByte)
|
attr_name.push(@data.current_byte)
|
||||||
end
|
end
|
||||||
#Step 6
|
#Step 6
|
||||||
@data.position += 1
|
@data.position += 1
|
||||||
end
|
end
|
||||||
#Step 7
|
#Step 7
|
||||||
if spaceFound
|
if space_found
|
||||||
@data.skip
|
@data.skip
|
||||||
#Step 8
|
#Step 8
|
||||||
unless @data.currentByte == '='
|
unless @data.current_byte == '='
|
||||||
@data.position -= 1
|
@data.position -= 1
|
||||||
return [attrName.join(''), '']
|
return [attr_name.join(''), '']
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
#XXX need to advance position in both spaces and value case
|
#XXX need to advance position in both spaces and value case
|
||||||
|
@ -458,38 +458,38 @@ class EncodingParser
|
||||||
#Step 10
|
#Step 10
|
||||||
@data.skip
|
@data.skip
|
||||||
#Step 11
|
#Step 11
|
||||||
if ["'", '"'].include?(@data.currentByte)
|
if ["'", '"'].include?(@data.current_byte)
|
||||||
#11.1
|
#11.1
|
||||||
quoteChar = @data.currentByte
|
quote_char = @data.current_byte
|
||||||
while true
|
while true
|
||||||
@data.position+=1
|
@data.position+=1
|
||||||
#11.3
|
#11.3
|
||||||
if @data.currentByte == quoteChar
|
if @data.current_byte == quote_char
|
||||||
@data.position += 1
|
@data.position += 1
|
||||||
return [attrName.join(''), attrValue.join('')]
|
return [attr_name.join(''), attr_value.join('')]
|
||||||
#11.4
|
#11.4
|
||||||
elsif ASCII_UPPERCASE.include?(@data.currentByte)
|
elsif ASCII_UPPERCASE.include?(@data.current_byte)
|
||||||
attrValue.push(@data.currentByte.downcase)
|
attr_value.push(@data.current_byte.downcase)
|
||||||
#11.5
|
#11.5
|
||||||
else
|
else
|
||||||
attrValue.push(@data.currentByte)
|
attr_value.push(@data.current_byte)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
elsif ['>', '<'].include?(@data.currentByte)
|
elsif ['>', '<'].include?(@data.current_byte)
|
||||||
return [attrName.join(''), '']
|
return [attr_name.join(''), '']
|
||||||
elsif ASCII_UPPERCASE.include?(@data.currentByte)
|
elsif ASCII_UPPERCASE.include?(@data.current_byte)
|
||||||
attrValue.push(@data.currentByte.downcase)
|
attr_value.push(@data.current_byte.downcase)
|
||||||
else
|
else
|
||||||
attrValue.push(@data.currentByte)
|
attr_value.push(@data.current_byte)
|
||||||
end
|
end
|
||||||
while true
|
while true
|
||||||
@data.position += 1
|
@data.position += 1
|
||||||
if (SPACE_CHARACTERS + ['>', '<']).include?(@data.currentByte)
|
if (SPACE_CHARACTERS + ['>', '<']).include?(@data.current_byte)
|
||||||
return [attrName.join(''), attrValue.join('')]
|
return [attr_name.join(''), attr_value.join('')]
|
||||||
elsif ASCII_UPPERCASE.include?(@data.currentByte)
|
elsif ASCII_UPPERCASE.include?(@data.current_byte)
|
||||||
attrValue.push(@data.currentByte.downcase)
|
attr_value.push(@data.current_byte.downcase)
|
||||||
else
|
else
|
||||||
attrValue.push(@data.currentByte)
|
attr_value.push(@data.current_byte)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -499,40 +499,41 @@ class ContentAttrParser
|
||||||
def initialize(data)
|
def initialize(data)
|
||||||
@data = data
|
@data = data
|
||||||
end
|
end
|
||||||
|
|
||||||
def parse
|
def parse
|
||||||
begin
|
begin
|
||||||
#Skip to the first ";"
|
#Skip to the first ";"
|
||||||
@data.position = 0
|
@data.position = 0
|
||||||
@data.jumpTo(';')
|
@data.jump_to(';')
|
||||||
@data.position += 1
|
@data.position += 1
|
||||||
@data.skip
|
@data.skip
|
||||||
#Check if the attr name is charset
|
#Check if the attr name is charset
|
||||||
#otherwise return
|
#otherwise return
|
||||||
@data.jumpTo('charset')
|
@data.jump_to('charset')
|
||||||
@data.position += 1
|
@data.position += 1
|
||||||
@data.skip
|
@data.skip
|
||||||
unless @data.currentByte == '='
|
unless @data.current_byte == '='
|
||||||
#If there is no = sign keep looking for attrs
|
#If there is no = sign keep looking for attrs
|
||||||
return nil
|
return nil
|
||||||
end
|
end
|
||||||
@data.position += 1
|
@data.position += 1
|
||||||
@data.skip
|
@data.skip
|
||||||
#Look for an encoding between matching quote marks
|
#Look for an encoding between matching quote marks
|
||||||
if ['"', "'"].include?(@data.currentByte)
|
if ['"', "'"].include?(@data.current_byte)
|
||||||
quoteMark = @data.currentByte
|
quote_mark = @data.current_byte
|
||||||
@data.position += 1
|
@data.position += 1
|
||||||
oldPosition = @data.position
|
old_position = @data.position
|
||||||
@data.jumpTo(quoteMark)
|
@data.jump_to(quote_mark)
|
||||||
return @data[oldPosition ... @data.position]
|
return @data[old_position ... @data.position]
|
||||||
else
|
else
|
||||||
#Unquoted value
|
#Unquoted value
|
||||||
oldPosition = @data.position
|
old_position = @data.position
|
||||||
begin
|
begin
|
||||||
@data.findNext(SPACE_CHARACTERS)
|
@data.find_next(SPACE_CHARACTERS)
|
||||||
return @data[oldPosition ... @data.position]
|
return @data[old_position ... @data.position]
|
||||||
rescue EOF
|
rescue EOF
|
||||||
#Return the whole remaining value
|
#Return the whole remaining value
|
||||||
return @data[oldPosition .. -1]
|
return @data[old_position .. -1]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
rescue EOF
|
rescue EOF
|
||||||
|
@ -542,7 +543,7 @@ class ContentAttrParser
|
||||||
end
|
end
|
||||||
|
|
||||||
# Determine if a string is a supported encoding
|
# Determine if a string is a supported encoding
|
||||||
def self.isValidEncoding(encoding)
|
def self.is_valid_encoding(encoding)
|
||||||
(not encoding.nil? and encoding.kind_of?(String) and ENCODINGS.include?(encoding.downcase.strip))
|
(not encoding.nil? and encoding.kind_of?(String) and ENCODINGS.include?(encoding.downcase.strip))
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -144,7 +144,6 @@ class HTMLSanitizer < HTMLTokenizer
|
||||||
else
|
else
|
||||||
yield token
|
yield token
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -147,7 +147,7 @@ class HTMLTokenizer
|
||||||
charAsInt = 65533
|
charAsInt = 65533
|
||||||
end
|
end
|
||||||
|
|
||||||
if charAsInt <= 0x10FFF
|
if charAsInt <= 0x10FFFF
|
||||||
char = [charAsInt].pack('U')
|
char = [charAsInt].pack('U')
|
||||||
else
|
else
|
||||||
@tokenQueue.push({:type => :ParseError, :data =>
|
@tokenQueue.push({:type => :ParseError, :data =>
|
||||||
|
@ -261,13 +261,11 @@ class HTMLTokenizer
|
||||||
@state = @states[:data]
|
@state = @states[:data]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
# Below are the various tokenizer states worked out.
|
# Below are the various tokenizer states worked out.
|
||||||
|
|
||||||
# XXX AT Perhaps we should have Hixie run some evaluation on billions of
|
# XXX AT Perhaps we should have Hixie run some evaluation on billions of
|
||||||
# documents to figure out what the order of the various if and elsif
|
# documents to figure out what the order of the various if and elsif
|
||||||
# statements should be.
|
# statements should be.
|
||||||
|
|
||||||
def dataState
|
def dataState
|
||||||
data = @stream.char
|
data = @stream.char
|
||||||
if data == "&" and (@contentModelFlag == :PCDATA or
|
if data == "&" and (@contentModelFlag == :PCDATA or
|
||||||
|
@ -285,10 +283,10 @@ class HTMLTokenizer
|
||||||
# XXX need to check if we don't need a special "spaces" flag on
|
# XXX need to check if we don't need a special "spaces" flag on
|
||||||
# characters.
|
# characters.
|
||||||
@tokenQueue.push({:type => :SpaceCharacters, :data =>
|
@tokenQueue.push({:type => :SpaceCharacters, :data =>
|
||||||
data + @stream.charsUntil(SPACE_CHARACTERS, true)})
|
data + @stream.chars_until(SPACE_CHARACTERS, true)})
|
||||||
else
|
else
|
||||||
@tokenQueue.push({:type => :Characters, :data =>
|
@tokenQueue.push({:type => :Characters, :data =>
|
||||||
data + @stream.charsUntil(["&", "<"])})
|
data + @stream.chars_until(["&", "<"])})
|
||||||
end
|
end
|
||||||
return true
|
return true
|
||||||
end
|
end
|
||||||
|
@ -430,7 +428,7 @@ class HTMLTokenizer
|
||||||
emitCurrentToken
|
emitCurrentToken
|
||||||
elsif ASCII_LETTERS.include? data
|
elsif ASCII_LETTERS.include? data
|
||||||
@currentToken[:name] += data +\
|
@currentToken[:name] += data +\
|
||||||
@stream.charsUntil(ASCII_LETTERS, true)
|
@stream.chars_until(ASCII_LETTERS, true)
|
||||||
elsif data == ">"
|
elsif data == ">"
|
||||||
emitCurrentToken
|
emitCurrentToken
|
||||||
elsif data == "<"
|
elsif data == "<"
|
||||||
|
@ -450,7 +448,7 @@ class HTMLTokenizer
|
||||||
def beforeAttributeNameState
|
def beforeAttributeNameState
|
||||||
data = @stream.char
|
data = @stream.char
|
||||||
if SPACE_CHARACTERS.include? data
|
if SPACE_CHARACTERS.include? data
|
||||||
@stream.charsUntil(SPACE_CHARACTERS, true)
|
@stream.chars_until(SPACE_CHARACTERS, true)
|
||||||
elsif data == :EOF
|
elsif data == :EOF
|
||||||
@tokenQueue.push({:type => :ParseError, :data =>
|
@tokenQueue.push({:type => :ParseError, :data =>
|
||||||
_("Unexpected end of file. Expected attribute name instead.")})
|
_("Unexpected end of file. Expected attribute name instead.")})
|
||||||
|
@ -486,7 +484,7 @@ class HTMLTokenizer
|
||||||
leavingThisState = false
|
leavingThisState = false
|
||||||
elsif ASCII_LETTERS.include? data
|
elsif ASCII_LETTERS.include? data
|
||||||
@currentToken[:data][-1][0] += data +\
|
@currentToken[:data][-1][0] += data +\
|
||||||
@stream.charsUntil(ASCII_LETTERS, true)
|
@stream.chars_until(ASCII_LETTERS, true)
|
||||||
leavingThisState = false
|
leavingThisState = false
|
||||||
elsif data == ">"
|
elsif data == ">"
|
||||||
# XXX If we emit here the attributes are converted to a dict
|
# XXX If we emit here the attributes are converted to a dict
|
||||||
|
@ -529,7 +527,7 @@ class HTMLTokenizer
|
||||||
def afterAttributeNameState
|
def afterAttributeNameState
|
||||||
data = @stream.char
|
data = @stream.char
|
||||||
if SPACE_CHARACTERS.include? data
|
if SPACE_CHARACTERS.include? data
|
||||||
@stream.charsUntil(SPACE_CHARACTERS, true)
|
@stream.chars_until(SPACE_CHARACTERS, true)
|
||||||
elsif data == "="
|
elsif data == "="
|
||||||
@state = @states[:beforeAttributeValue]
|
@state = @states[:beforeAttributeValue]
|
||||||
elsif data == ">"
|
elsif data == ">"
|
||||||
|
@ -559,7 +557,7 @@ class HTMLTokenizer
|
||||||
def beforeAttributeValueState
|
def beforeAttributeValueState
|
||||||
data = @stream.char
|
data = @stream.char
|
||||||
if SPACE_CHARACTERS.include? data
|
if SPACE_CHARACTERS.include? data
|
||||||
@stream.charsUntil(SPACE_CHARACTERS, true)
|
@stream.chars_until(SPACE_CHARACTERS, true)
|
||||||
elsif data == "\""
|
elsif data == "\""
|
||||||
@state = @states[:attributeValueDoubleQuoted]
|
@state = @states[:attributeValueDoubleQuoted]
|
||||||
elsif data == "&"
|
elsif data == "&"
|
||||||
|
@ -597,7 +595,7 @@ class HTMLTokenizer
|
||||||
emitCurrentToken
|
emitCurrentToken
|
||||||
else
|
else
|
||||||
@currentToken[:data][-1][1] += data +\
|
@currentToken[:data][-1][1] += data +\
|
||||||
@stream.charsUntil(["\"", "&"])
|
@stream.chars_until(["\"", "&"])
|
||||||
end
|
end
|
||||||
return true
|
return true
|
||||||
end
|
end
|
||||||
|
@ -614,7 +612,7 @@ class HTMLTokenizer
|
||||||
emitCurrentToken
|
emitCurrentToken
|
||||||
else
|
else
|
||||||
@currentToken[:data][-1][1] += data +\
|
@currentToken[:data][-1][1] += data +\
|
||||||
@stream.charsUntil(["'", "&"])
|
@stream.chars_until(["'", "&"])
|
||||||
end
|
end
|
||||||
return true
|
return true
|
||||||
end
|
end
|
||||||
|
@ -638,17 +636,17 @@ class HTMLTokenizer
|
||||||
emitCurrentToken
|
emitCurrentToken
|
||||||
else
|
else
|
||||||
@currentToken[:data][-1][1] += data +
|
@currentToken[:data][-1][1] += data +
|
||||||
@stream.charsUntil(["&", ">","<"] + SPACE_CHARACTERS)
|
@stream.chars_until(["&", ">","<"] + SPACE_CHARACTERS)
|
||||||
end
|
end
|
||||||
return true
|
return true
|
||||||
end
|
end
|
||||||
|
|
||||||
def bogusCommentState
|
def bogusCommentState
|
||||||
# Make a new comment token and give it as value all the characters
|
# Make a new comment token and give it as value all the characters
|
||||||
# until the first > or :EOF (charsUntil checks for :EOF automatically)
|
# until the first > or :EOF (chars_until checks for :EOF automatically)
|
||||||
# and emit it.
|
# and emit it.
|
||||||
@tokenQueue.push(
|
@tokenQueue.push(
|
||||||
{:type => :Comment, :data => @stream.charsUntil((">"))})
|
{:type => :Comment, :data => @stream.chars_until((">"))})
|
||||||
|
|
||||||
# Eat the character directly after the bogus comment which is either a
|
# Eat the character directly after the bogus comment which is either a
|
||||||
# ">" or an :EOF.
|
# ">" or an :EOF.
|
||||||
|
@ -690,7 +688,7 @@ class HTMLTokenizer
|
||||||
@tokenQueue.push(@currentToken)
|
@tokenQueue.push(@currentToken)
|
||||||
@state = @states[:data]
|
@state = @states[:data]
|
||||||
else
|
else
|
||||||
@currentToken[:data] += data + @stream.charsUntil("-")
|
@currentToken[:data] += data + @stream.chars_until("-")
|
||||||
end
|
end
|
||||||
return true
|
return true
|
||||||
end
|
end
|
||||||
|
@ -706,7 +704,7 @@ class HTMLTokenizer
|
||||||
@state = @states[:data]
|
@state = @states[:data]
|
||||||
else
|
else
|
||||||
@currentToken[:data] += "-" + data +\
|
@currentToken[:data] += "-" + data +\
|
||||||
@stream.charsUntil("-")
|
@stream.chars_until("-")
|
||||||
# Consume the next character which is either a "-" or an :EOF as
|
# Consume the next character which is either a "-" or an :EOF as
|
||||||
# well so if there's a "-" directly after the "-" we go nicely to
|
# well so if there's a "-" directly after the "-" we go nicely to
|
||||||
# the "comment end state" without emitting a ParseError there.
|
# the "comment end state" without emitting a ParseError there.
|
||||||
|
|
|
@ -89,13 +89,16 @@ class Element < Node
|
||||||
def initialize(hpricot)
|
def initialize(hpricot)
|
||||||
@hpricot = hpricot
|
@hpricot = hpricot
|
||||||
end
|
end
|
||||||
|
|
||||||
def []=(k, v)
|
def []=(k, v)
|
||||||
@hpricot.stag.send(stag_attributes_method)[k] = v
|
@hpricot.stag.send(stag_attributes_method)[k] = v
|
||||||
end
|
end
|
||||||
|
|
||||||
def stag_attributes_method
|
def stag_attributes_method
|
||||||
# STag#attributes changed to STag#raw_attributes after Hpricot 0.5
|
# STag#attributes changed to STag#raw_attributes after Hpricot 0.5
|
||||||
@hpricot.stag.respond_to?(:raw_attributes) ? :raw_attributes : :attributes
|
@hpricot.stag.respond_to?(:raw_attributes) ? :raw_attributes : :attributes
|
||||||
end
|
end
|
||||||
|
|
||||||
def method_missing(*a, &b)
|
def method_missing(*a, &b)
|
||||||
@hpricot.attributes.send(*a, &b)
|
@hpricot.attributes.send(*a, &b)
|
||||||
end
|
end
|
||||||
|
|
12
vendor/plugins/HTML5lib/tests/preamble.rb
vendored
12
vendor/plugins/HTML5lib/tests/preamble.rb
vendored
|
@ -9,3 +9,15 @@ $:.unshift File.dirname(__FILE__)
|
||||||
def html5lib_test_files(subdirectory)
|
def html5lib_test_files(subdirectory)
|
||||||
Dir[File.join(HTML5LIB_BASE, 'tests', subdirectory, '*.*')]
|
Dir[File.join(HTML5LIB_BASE, 'tests', subdirectory, '*.*')]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
begin
|
||||||
|
require 'jsonx'
|
||||||
|
rescue LoadError
|
||||||
|
class JSON
|
||||||
|
def self.parse json
|
||||||
|
json.gsub! /"\s*:/, '"=>'
|
||||||
|
json.gsub!(/\\u[0-9a-fA-F]{4}/) {|x| [x[2..-1].to_i(16)].pack('U')}
|
||||||
|
eval json
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
|
@ -11,7 +11,7 @@ begin
|
||||||
def test_chardet
|
def test_chardet
|
||||||
File.open(File.join(HTML5LIB_BASE, 'tests', 'encoding', 'chardet', 'test_big5.txt')) do |file|
|
File.open(File.join(HTML5LIB_BASE, 'tests', 'encoding', 'chardet', 'test_big5.txt')) do |file|
|
||||||
stream = HTML5lib::HTMLInputStream.new(file, :chardet => true)
|
stream = HTML5lib::HTMLInputStream.new(file, :chardet => true)
|
||||||
assert_equal 'big5', stream.charEncoding.downcase
|
assert_equal 'big5', stream.char_encoding.downcase
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
rescue LoadError
|
rescue LoadError
|
||||||
|
@ -28,7 +28,7 @@ end
|
||||||
|
|
||||||
define_method 'test_%s_%d' % [ test_name, index + 1 ] do
|
define_method 'test_%s_%d' % [ test_name, index + 1 ] do
|
||||||
stream = HTML5lib::HTMLInputStream.new(input, :chardet => false)
|
stream = HTML5lib::HTMLInputStream.new(input, :chardet => false)
|
||||||
assert_equal encoding.downcase, stream.charEncoding.downcase, input
|
assert_equal encoding.downcase, stream.char_encoding.downcase, input
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -203,4 +203,8 @@ class SanitizeTest < Test::Unit::TestCase
|
||||||
sanitize_html(%(<img src='vbscript:msgbox("XSS")' />))
|
sanitize_html(%(<img src='vbscript:msgbox("XSS")' />))
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_should_handle_astral_plane_characters
|
||||||
|
assert_equal "<p>\360\235\222\265 \360\235\224\270</p>",
|
||||||
|
sanitize_html("<p>𝒵 𝔸</p>")
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
12
vendor/plugins/HTML5lib/tests/test_tokenizer.rb
vendored
12
vendor/plugins/HTML5lib/tests/test_tokenizer.rb
vendored
|
@ -4,18 +4,6 @@ require 'html5lib/tokenizer'
|
||||||
|
|
||||||
require 'tokenizer_test_parser'
|
require 'tokenizer_test_parser'
|
||||||
|
|
||||||
begin
|
|
||||||
require 'jsonx'
|
|
||||||
rescue LoadError
|
|
||||||
class JSON
|
|
||||||
def self.parse json
|
|
||||||
json.gsub! /"\s*:/, '"=>'
|
|
||||||
json.gsub!(/\\u[0-9a-fA-F]{4}/) {|x| [x[2..-1].to_i(16)].pack('U')}
|
|
||||||
eval json
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class Html5TokenizerTestCase < Test::Unit::TestCase
|
class Html5TokenizerTestCase < Test::Unit::TestCase
|
||||||
|
|
||||||
def type_of?(token_name, token)
|
def type_of?(token_name, token)
|
||||||
|
|
Loading…
Reference in a new issue