Sync with latest HTML5lib
This commit is contained in:
parent
bf572e295f
commit
8e92e4a3ab
41 changed files with 1334 additions and 564 deletions
|
@ -20,20 +20,43 @@ module HTML5lib
|
|||
|
||||
when :EmptyTag
|
||||
if token[:name].downcase == "meta"
|
||||
if token[:data].any? {|name,value| name=='charset'}
|
||||
# replace charset with actual encoding
|
||||
attrs=Hash[*token[:data].flatten]
|
||||
attrs['charset'] = @encoding
|
||||
token[:data] = attrs.to_a.sort
|
||||
meta_found = true
|
||||
# replace charset with actual encoding
|
||||
token[:data].each_with_index do |(name,value),index|
|
||||
if name == 'charset'
|
||||
token[:data][index][1]=@encoding
|
||||
meta_found = true
|
||||
end
|
||||
end
|
||||
|
||||
# replace charset with actual encoding
|
||||
has_http_equiv_content_type = false
|
||||
content_index = -1
|
||||
token[:data].each_with_index do |(name,value),i|
|
||||
if name.downcase == 'charset'
|
||||
token[:data][i] = ['charset', @encoding]
|
||||
meta_found = true
|
||||
break
|
||||
elsif name == 'http-equiv' and value.downcase == 'content-type'
|
||||
has_http_equiv_content_type = true
|
||||
elsif name == 'content'
|
||||
content_index = i
|
||||
end
|
||||
end
|
||||
|
||||
if not meta_found
|
||||
if has_http_equiv_content_type and content_index >= 0
|
||||
token[:data][content_index][1] =
|
||||
'text/html; charset=%s' % @encoding
|
||||
meta_found = true
|
||||
end
|
||||
end
|
||||
|
||||
elsif token[:name].downcase == "head" and not meta_found
|
||||
# insert meta into empty head
|
||||
yield({:type => :StartTag, :name => "head", :data => {}})
|
||||
yield({:type => :EmptyTag, :name => "meta",
|
||||
:data => {"charset" => @encoding}})
|
||||
yield({:type => :EndTag, :name => "head"})
|
||||
yield(:type => :StartTag, :name => "head", :data => token[:data])
|
||||
yield(:type => :EmptyTag, :name => "meta",
|
||||
:data => [["charset", @encoding]])
|
||||
yield(:type => :EndTag, :name => "head")
|
||||
meta_found = true
|
||||
next
|
||||
end
|
||||
|
@ -42,8 +65,8 @@ module HTML5lib
|
|||
if token[:name].downcase == "head" and pending.any?
|
||||
# insert meta into head (if necessary) and flush pending queue
|
||||
yield pending.shift
|
||||
yield({:type => :EmptyTag, :name => "meta",
|
||||
:data => {"charset" => @encoding}}) if not meta_found
|
||||
yield(:type => :EmptyTag, :name => "meta",
|
||||
:data => [["charset", @encoding]]) if not meta_found
|
||||
yield pending.shift while pending.any?
|
||||
meta_found = true
|
||||
state = :post_head
|
||||
|
|
|
@ -62,7 +62,8 @@ module HTML5lib
|
|||
@errors = []
|
||||
|
||||
@tokenizer = @tokenizer.class unless Class === @tokenizer
|
||||
@tokenizer = @tokenizer.new(stream, :encoding => encoding, :parseMeta => innerHTML)
|
||||
@tokenizer = @tokenizer.new(stream, :encoding => encoding,
|
||||
:parseMeta => !innerHTML)
|
||||
|
||||
if innerHTML
|
||||
case @innerHTML = container.downcase
|
||||
|
@ -99,10 +100,13 @@ module HTML5lib
|
|||
case token[:type]
|
||||
when :Characters, :SpaceCharacters, :Comment
|
||||
@phase.send method, token[:data]
|
||||
when :StartTag, :Doctype
|
||||
when :StartTag
|
||||
@phase.send method, token[:name], token[:data]
|
||||
when :EndTag
|
||||
@phase.send method, token[:name]
|
||||
when :Doctype
|
||||
@phase.send method, token[:name], token[:publicId],
|
||||
token[:systemId], token[:correct]
|
||||
else
|
||||
parseError(token[:data])
|
||||
end
|
||||
|
@ -147,10 +151,6 @@ module HTML5lib
|
|||
raise ParseError if @strict
|
||||
end
|
||||
|
||||
# This error is not an error
|
||||
def atheistParseError
|
||||
end
|
||||
|
||||
# HTML5 specific normalizations to the token stream
|
||||
def normalizeToken(token)
|
||||
|
||||
|
@ -160,9 +160,7 @@ module HTML5lib
|
|||
# element. If it matches a void element atheists did the wrong
|
||||
# thing and if it doesn't it's wrong for everyone.
|
||||
|
||||
if VOID_ELEMENTS.include?(token[:name])
|
||||
atheistParseError
|
||||
else
|
||||
unless VOID_ELEMENTS.include?(token[:name])
|
||||
parseError(_('Solidus (/) incorrectly placed in tag.'))
|
||||
end
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@ module HTML5lib
|
|||
|
||||
handle_start 'html', 'head'
|
||||
|
||||
handle_end 'html'
|
||||
handle_end %w( html head body br ) => 'ImplyHead'
|
||||
|
||||
def processEOF
|
||||
startTagHead('head', {})
|
||||
|
@ -28,7 +28,7 @@ module HTML5lib
|
|||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def endTagHtml(name)
|
||||
def endTagImplyHead(name)
|
||||
startTagHead('head', {})
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
|
@ -38,4 +38,4 @@ module HTML5lib
|
|||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -5,15 +5,20 @@ module HTML5lib
|
|||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-body
|
||||
|
||||
handle_start 'html', 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image'
|
||||
handle_start 'html'
|
||||
handle_start %w( base link meta script style ) => 'ProcessInHead'
|
||||
handle_start 'title'
|
||||
|
||||
handle_start 'input', 'textarea', 'select', 'isindex', %w( script style ), %w( marquee object )
|
||||
handle_start 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image'
|
||||
|
||||
handle_start %w( li dd dt ) => 'ListItem', %w( base link meta title ) => 'FromHead'
|
||||
handle_start 'input', 'textarea', 'select', 'isindex', %w( marquee object )
|
||||
|
||||
handle_start %w( li dd dt ) => 'ListItem'
|
||||
|
||||
handle_start %w( address blockquote center dir div dl fieldset listing menu ol p pre ul ) => 'CloseP'
|
||||
|
||||
handle_start %w( b big em font i nobr s small strike strong tt u ) => 'Formatting'
|
||||
handle_start %w( b big em font i s small strike strong tt u ) => 'Formatting'
|
||||
handle_start 'nobr'
|
||||
|
||||
handle_start %w( area basefont bgsound br embed img param spacer wbr ) => 'VoidFormatting'
|
||||
|
||||
|
@ -33,7 +38,9 @@ module HTML5lib
|
|||
|
||||
handle_end %w( head frameset select optgroup option table caption colgroup col thead tfoot tbody tr td th ) => 'Misplaced'
|
||||
|
||||
handle_end %w( area basefont bgsound br embed hr image img input isindex param spacer wbr frame ) => 'None'
|
||||
handle_end 'br'
|
||||
|
||||
handle_end %w( area basefont bgsound embed hr image img input isindex param spacer wbr frame ) => 'None'
|
||||
|
||||
handle_end %w( noframes noscript noembed textarea xmp iframe ) => 'CdataTextAreaXmp'
|
||||
|
||||
|
@ -73,11 +80,11 @@ module HTML5lib
|
|||
@tree.insertText(data)
|
||||
end
|
||||
|
||||
def startTagScriptStyle(name, attributes)
|
||||
def startTagProcessInHead(name, attributes)
|
||||
@parser.phases[:inHead].processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def startTagFromHead(name, attributes)
|
||||
def startTagTitle(name, attributes)
|
||||
@parser.parseError(_("Unexpected start tag (#{name}) that belongs in the head. Moved."))
|
||||
@parser.phases[:inHead].processStartTag(name, attributes)
|
||||
end
|
||||
|
@ -120,7 +127,12 @@ module HTML5lib
|
|||
|
||||
@tree.openElements.reverse.each_with_index do |node, i|
|
||||
if stopName.include?(node.name)
|
||||
(i + 1).times { @tree.openElements.pop }
|
||||
poppedNodes = (0..i).collect { @tree.openElements.pop }
|
||||
if i >= 1
|
||||
@parser.parseError("Missing end tag%s (%s)" % [
|
||||
(i>1 ? 's' : ''),
|
||||
poppedNodes.reverse.map {|item| item.name}.join(', ')])
|
||||
end
|
||||
break
|
||||
end
|
||||
|
||||
|
@ -142,15 +154,19 @@ module HTML5lib
|
|||
|
||||
def startTagHeading(name, attributes)
|
||||
endTagP('p') if in_scope?('p')
|
||||
HEADING_ELEMENTS.each do |element|
|
||||
if in_scope?(element)
|
||||
@parser.parseError(_("Unexpected start tag (#{name})."))
|
||||
|
||||
remove_open_elements_until { |element| HEADING_ELEMENTS.include?(element.name) }
|
||||
|
||||
break
|
||||
end
|
||||
end
|
||||
# Uncomment the following for IE7 behavior:
|
||||
# HEADING_ELEMENTS.each do |element|
|
||||
# if in_scope?(element)
|
||||
# @parser.parseError(_("Unexpected start tag (#{name})."))
|
||||
#
|
||||
# remove_open_elements_until do |element|
|
||||
# HEADING_ELEMENTS.include?(element.name)
|
||||
# end
|
||||
#
|
||||
# break
|
||||
# end
|
||||
# end
|
||||
@tree.insertElement(name, attributes)
|
||||
end
|
||||
|
||||
|
@ -170,6 +186,12 @@ module HTML5lib
|
|||
addFormattingElement(name, attributes)
|
||||
end
|
||||
|
||||
def startTagNobr(name, attributes)
|
||||
@tree.reconstructActiveFormattingElements
|
||||
processEndTag('nobr') if in_scope?('nobr')
|
||||
addFormattingElement(name, attributes)
|
||||
end
|
||||
|
||||
def startTagButton(name, attributes)
|
||||
if in_scope?('button')
|
||||
@parser.parseError(_('Unexpected start tag (button) implied end tag (button).'))
|
||||
|
@ -497,6 +519,13 @@ module HTML5lib
|
|||
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||
end
|
||||
|
||||
def endTagBr(name)
|
||||
@parser.parseError(_("Unexpected end tag (br). Treated as br element."))
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insertElement(name, {})
|
||||
@tree.openElements.pop()
|
||||
end
|
||||
|
||||
def endTagNone(name)
|
||||
# This handles elements with no end tag.
|
||||
@parser.parseError(_("This tag (#{name}) has no end tag"))
|
||||
|
|
|
@ -5,7 +5,9 @@ module HTML5lib
|
|||
|
||||
handle_start 'html', 'head', 'title', 'style', 'script', %w( base link meta )
|
||||
|
||||
handle_end 'head', 'html', %w( title style script )
|
||||
handle_end 'head'
|
||||
handle_end %w( html body br ) => 'ImplyAfterHead'
|
||||
handle_end %w( title style script )
|
||||
|
||||
def processEOF
|
||||
if ['title', 'style', 'script'].include?(name = @tree.openElements[-1].name)
|
||||
|
@ -63,7 +65,11 @@ module HTML5lib
|
|||
|
||||
def startTagBaseLinkMeta(name, attributes)
|
||||
element = @tree.createElement(name, attributes)
|
||||
appendToHead(element)
|
||||
if @tree.headPointer != nil and @parser.phase == @parser.phases[:inHead]
|
||||
appendToHead(element)
|
||||
else
|
||||
@tree.openElements[-1].appendChild(element)
|
||||
end
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
|
@ -80,7 +86,7 @@ module HTML5lib
|
|||
@parser.phase = @parser.phases[:afterHead]
|
||||
end
|
||||
|
||||
def endTagHtml(name)
|
||||
def endTagImplyAfterHead(name)
|
||||
anythingElse
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
|
@ -117,4 +123,4 @@ module HTML5lib
|
|||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -89,10 +89,10 @@ module HTML5lib
|
|||
def endTagOther(name)
|
||||
@parser.parseError(_("Unexpected end tag (#{name}) in table context caused voodoo mode."))
|
||||
# Make all the special element rearranging voodoo kick in
|
||||
@parser.insertFromTable = true
|
||||
@tree.insertFromTable = true
|
||||
# Process the end tag in the "in body" mode
|
||||
@parser.phases[:inBody].processEndTag(name)
|
||||
@parser.insertFromTable = false
|
||||
@tree.insertFromTable = false
|
||||
end
|
||||
|
||||
protected
|
||||
|
|
|
@ -17,9 +17,95 @@ module HTML5lib
|
|||
@tree.insertComment(data, @tree.document)
|
||||
end
|
||||
|
||||
def processDoctype(name, error)
|
||||
@parser.parseError(_('Erroneous DOCTYPE.')) if error
|
||||
def processDoctype(name, publicId, systemId, correct)
|
||||
if name.downcase != 'html' or publicId or systemId
|
||||
@parser.parseError(_('Erroneous DOCTYPE.'))
|
||||
end
|
||||
# XXX need to update DOCTYPE tokens
|
||||
@tree.insertDoctype(name)
|
||||
|
||||
publicId = publicId.to_s.upcase
|
||||
|
||||
if name.downcase != 'html'
|
||||
# XXX quirks mode
|
||||
else
|
||||
if ["+//silmaril//dtd html pro v0r11 19970101//en",
|
||||
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en",
|
||||
"-//as//dtd html 3.0 aswedit + extensions//en",
|
||||
"-//ietf//dtd html 2.0 level 1//en",
|
||||
"-//ietf//dtd html 2.0 level 2//en",
|
||||
"-//ietf//dtd html 2.0 strict level 1//en",
|
||||
"-//ietf//dtd html 2.0 strict level 2//en",
|
||||
"-//ietf//dtd html 2.0 strict//en",
|
||||
"-//ietf//dtd html 2.0//en",
|
||||
"-//ietf//dtd html 2.1e//en",
|
||||
"-//ietf//dtd html 3.0//en",
|
||||
"-//ietf//dtd html 3.0//en//",
|
||||
"-//ietf//dtd html 3.2 final//en",
|
||||
"-//ietf//dtd html 3.2//en",
|
||||
"-//ietf//dtd html 3//en",
|
||||
"-//ietf//dtd html level 0//en",
|
||||
"-//ietf//dtd html level 0//en//2.0",
|
||||
"-//ietf//dtd html level 1//en",
|
||||
"-//ietf//dtd html level 1//en//2.0",
|
||||
"-//ietf//dtd html level 2//en",
|
||||
"-//ietf//dtd html level 2//en//2.0",
|
||||
"-//ietf//dtd html level 3//en",
|
||||
"-//ietf//dtd html level 3//en//3.0",
|
||||
"-//ietf//dtd html strict level 0//en",
|
||||
"-//ietf//dtd html strict level 0//en//2.0",
|
||||
"-//ietf//dtd html strict level 1//en",
|
||||
"-//ietf//dtd html strict level 1//en//2.0",
|
||||
"-//ietf//dtd html strict level 2//en",
|
||||
"-//ietf//dtd html strict level 2//en//2.0",
|
||||
"-//ietf//dtd html strict level 3//en",
|
||||
"-//ietf//dtd html strict level 3//en//3.0",
|
||||
"-//ietf//dtd html strict//en",
|
||||
"-//ietf//dtd html strict//en//2.0",
|
||||
"-//ietf//dtd html strict//en//3.0",
|
||||
"-//ietf//dtd html//en",
|
||||
"-//ietf//dtd html//en//2.0",
|
||||
"-//ietf//dtd html//en//3.0",
|
||||
"-//metrius//dtd metrius presentational//en",
|
||||
"-//microsoft//dtd internet explorer 2.0 html strict//en",
|
||||
"-//microsoft//dtd internet explorer 2.0 html//en",
|
||||
"-//microsoft//dtd internet explorer 2.0 tables//en",
|
||||
"-//microsoft//dtd internet explorer 3.0 html strict//en",
|
||||
"-//microsoft//dtd internet explorer 3.0 html//en",
|
||||
"-//microsoft//dtd internet explorer 3.0 tables//en",
|
||||
"-//netscape comm. corp.//dtd html//en",
|
||||
"-//netscape comm. corp.//dtd strict html//en",
|
||||
"-//o'reilly and associates//dtd html 2.0//en",
|
||||
"-//o'reilly and associates//dtd html extended 1.0//en",
|
||||
"-//spyglass//dtd html 2.0 extended//en",
|
||||
"-//sq//dtd html 2.0 hotmetal + extensions//en",
|
||||
"-//sun microsystems corp.//dtd hotjava html//en",
|
||||
"-//sun microsystems corp.//dtd hotjava strict html//en",
|
||||
"-//w3c//dtd html 3 1995-03-24//en",
|
||||
"-//w3c//dtd html 3.2 draft//en",
|
||||
"-//w3c//dtd html 3.2 final//en",
|
||||
"-//w3c//dtd html 3.2//en",
|
||||
"-//w3c//dtd html 3.2s draft//en",
|
||||
"-//w3c//dtd html 4.0 frameset//en",
|
||||
"-//w3c//dtd html 4.0 transitional//en",
|
||||
"-//w3c//dtd html experimental 19960712//en",
|
||||
"-//w3c//dtd html experimental 970421//en",
|
||||
"-//w3c//dtd w3 html//en",
|
||||
"-//w3o//dtd w3 html 3.0//en",
|
||||
"-//w3o//dtd w3 html 3.0//en//",
|
||||
"-//w3o//dtd w3 html strict 3.0//en//",
|
||||
"-//webtechs//dtd mozilla html 2.0//en",
|
||||
"-//webtechs//dtd mozilla html//en",
|
||||
"-/w3c/dtd html 4.0 transitional/en",
|
||||
"html"].include?(publicId) or
|
||||
(systemId == nil and
|
||||
["-//w3c//dtd html 4.01 frameset//EN",
|
||||
"-//w3c//dtd html 4.01 transitional//EN"].include?(publicId)) or
|
||||
(systemId == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")
|
||||
#XXX quirks mode
|
||||
end
|
||||
end
|
||||
|
||||
@parser.phase = @parser.phases[:rootElement]
|
||||
end
|
||||
|
||||
|
@ -46,4 +132,4 @@ module HTML5lib
|
|||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -101,7 +101,7 @@ module HTML5lib
|
|||
@tree.insertComment(data, @tree.openElements[-1])
|
||||
end
|
||||
|
||||
def processDoctype(name, error)
|
||||
def processDoctype(name, publicId, systemId, correct)
|
||||
@parser.parseError(_('Unexpected DOCTYPE. Ignored.'))
|
||||
end
|
||||
|
||||
|
|
|
@ -33,9 +33,6 @@ module HTML5lib
|
|||
|
||||
options.each { |name, value| instance_variable_set("@#{name}", value) }
|
||||
|
||||
# List of where new lines occur
|
||||
@new_lines = [0]
|
||||
|
||||
# Raw Stream
|
||||
@raw_stream = open_stream(source)
|
||||
|
||||
|
@ -77,6 +74,8 @@ module HTML5lib
|
|||
|
||||
# Reset position in the list to read from
|
||||
@tell = 0
|
||||
@line = @col = 0
|
||||
@line_lengths = []
|
||||
end
|
||||
|
||||
# Produces a file object from source.
|
||||
|
@ -112,7 +111,7 @@ module HTML5lib
|
|||
require 'UniversalDetector' # gem install chardet
|
||||
buffer = @raw_stream.read
|
||||
encoding = UniversalDetector::chardet(buffer)['encoding']
|
||||
@raw_stream = open_stream(buffer)
|
||||
seek(buffer, 0)
|
||||
rescue LoadError
|
||||
end
|
||||
end
|
||||
|
@ -122,7 +121,7 @@ module HTML5lib
|
|||
encoding = @DEFAULT_ENCODING
|
||||
end
|
||||
|
||||
#Substitute for equivalent encodings:
|
||||
#Substitute for equivalent encodings
|
||||
encoding_sub = {'iso-8859-1' => 'windows-1252'}
|
||||
|
||||
if encoding_sub.has_key?(encoding.downcase)
|
||||
|
@ -145,7 +144,6 @@ module HTML5lib
|
|||
}
|
||||
|
||||
# Go to beginning of file and read in 4 bytes
|
||||
@raw_stream.seek(0)
|
||||
string = @raw_stream.read(4)
|
||||
return nil unless string
|
||||
|
||||
|
@ -162,30 +160,80 @@ module HTML5lib
|
|||
end
|
||||
end
|
||||
|
||||
#AT - move this to the caller?
|
||||
# Set the read position past the BOM if one was found, otherwise
|
||||
# set it to the start of the stream
|
||||
@raw_stream.seek(encoding ? seek : 0)
|
||||
seek(string, encoding ? seek : 0)
|
||||
|
||||
return encoding
|
||||
end
|
||||
|
||||
def seek(buffer, n)
|
||||
if @raw_stream.respond_to?(:unget)
|
||||
@raw_stream.unget(buffer[n..-1])
|
||||
return
|
||||
end
|
||||
|
||||
if @raw_stream.respond_to?(:seek)
|
||||
begin
|
||||
@raw_stream.seek(n)
|
||||
return
|
||||
rescue Errno::ESPIPE
|
||||
end
|
||||
end
|
||||
|
||||
require 'delegate'
|
||||
@raw_stream = SimpleDelegator.new(@raw_stream)
|
||||
|
||||
class << @raw_stream
|
||||
def read(chars=-1)
|
||||
if chars == -1 or chars > @data.length
|
||||
result = @data
|
||||
@data = ''
|
||||
return result if __getobj__.eof?
|
||||
return result + __getobj__.read if chars == -1
|
||||
return result + __getobj__.read(chars-result.length)
|
||||
elsif @data.empty?
|
||||
return __getobj__.read(chars)
|
||||
else
|
||||
result = @data[1...chars]
|
||||
@data = @data[chars..-1]
|
||||
return result
|
||||
end
|
||||
end
|
||||
|
||||
def unget(data)
|
||||
if !@data or @data.empty?
|
||||
@data = data
|
||||
else
|
||||
@data += data
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@raw_stream.unget(buffer[n .. -1])
|
||||
end
|
||||
|
||||
# Report the encoding declared by the meta element
|
||||
def detect_encoding_meta
|
||||
parser = EncodingParser.new(@raw_stream.read(@NUM_BYTES_META))
|
||||
@raw_stream.seek(0)
|
||||
buffer = @raw_stream.read(@NUM_BYTES_META)
|
||||
parser = EncodingParser.new(buffer)
|
||||
seek(buffer, 0)
|
||||
return parser.get_encoding
|
||||
end
|
||||
|
||||
# Returns (line, col) of the current position in the stream.
|
||||
def position
|
||||
line = 0
|
||||
@new_lines.each do |pos|
|
||||
break unless pos < @tell
|
||||
line += 1
|
||||
line, col = @line, @col
|
||||
@queue.reverse.each do |c|
|
||||
if c == "\n"
|
||||
line -= 1
|
||||
raise RuntimeError.new("col=#{col}") unless col == 0
|
||||
col = @line_lengths[line]
|
||||
else
|
||||
col -= 1
|
||||
end
|
||||
end
|
||||
col = @tell - @new_lines[line-1] - 1
|
||||
return [line, col]
|
||||
return [line+1, col]
|
||||
end
|
||||
|
||||
# Read one character from the stream or queue if available. Return
|
||||
|
@ -205,9 +253,14 @@ module HTML5lib
|
|||
c = 0x0A
|
||||
end
|
||||
|
||||
# record where newlines occur so that the position method
|
||||
# can tell where it is
|
||||
@new_lines << @tell-1 if c == 0x0A
|
||||
# update position in stream
|
||||
if c == 0x0a
|
||||
@line_lengths << @col
|
||||
@line += 1
|
||||
@col = 0
|
||||
else
|
||||
@col += 1
|
||||
end
|
||||
|
||||
c.chr
|
||||
|
||||
|
@ -261,11 +314,7 @@ module HTML5lib
|
|||
# Put the character stopped on back to the front of the queue
|
||||
# from where it came.
|
||||
c = char_stack.pop
|
||||
if c == :EOF or @data_stream[@tell-1] == c[0]
|
||||
@tell -= 1
|
||||
else
|
||||
@queue.insert(0, c)
|
||||
end
|
||||
@queue.insert(0, c) unless c == :EOF
|
||||
return char_stack.join('')
|
||||
end
|
||||
end
|
||||
|
@ -454,7 +503,7 @@ module HTML5lib
|
|||
space_found = false
|
||||
#Step 5 attribute name
|
||||
while true
|
||||
if @data.current_byte == '=' and attr_name:
|
||||
if @data.current_byte == '=' and attr_name
|
||||
break
|
||||
elsif SPACE_CHARACTERS.include?(@data.current_byte)
|
||||
space_found = true
|
||||
|
|
|
@ -69,15 +69,22 @@ module HTML5lib
|
|||
|
||||
# ensure that non-void XHTML elements have content so that separate
|
||||
# open and close tags are emitted
|
||||
if token[:type] == :EndTag and \
|
||||
not VOID_ELEMENTS.include? token[:name] and \
|
||||
token[:name] == @tree.openElements[-1].name and \
|
||||
not @tree.openElements[-1].hasContent
|
||||
@tree.insertText('') unless
|
||||
@tree.openElements.any? {|e|
|
||||
e.attributes.keys.include? 'xmlns' and
|
||||
e.attributes['xmlns'] != 'http://www.w3.org/1999/xhtml'
|
||||
}
|
||||
if token[:type] == :EndTag
|
||||
if VOID_ELEMENTS.include? token[:name]
|
||||
if @tree.openElements[-1].name != token["name"]:
|
||||
token[:type] = :EmptyTag
|
||||
token["data"] ||= {}
|
||||
end
|
||||
else
|
||||
if token[:name] == @tree.openElements[-1].name and \
|
||||
not @tree.openElements[-1].hasContent
|
||||
@tree.insertText('') unless
|
||||
@tree.openElements.any? {|e|
|
||||
e.attributes.keys.include? 'xmlns' and
|
||||
e.attributes['xmlns'] != 'http://www.w3.org/1999/xhtml'
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return token
|
||||
|
|
180
vendor/plugins/HTML5lib/lib/html5lib/serializer.rb
vendored
180
vendor/plugins/HTML5lib/lib/html5lib/serializer.rb
vendored
|
@ -1,178 +1,2 @@
|
|||
require 'html5lib/constants'
|
||||
|
||||
module HTML5lib
|
||||
|
||||
class HTMLSerializer
|
||||
CDATA_ELEMENTS = %w[style script xmp iframe noembed noframes noscript]
|
||||
|
||||
def self.serialize(stream, options = {})
|
||||
new(options).serialize(stream, options[:encoding])
|
||||
end
|
||||
|
||||
def initialize(options={})
|
||||
@quote_attr_values = false
|
||||
@quote_char = '"'
|
||||
@use_best_quote_char = true
|
||||
@minimize_boolean_attributes = true
|
||||
|
||||
@use_trailing_solidus = false
|
||||
@space_before_trailing_solidus = true
|
||||
|
||||
@omit_optional_tags = true
|
||||
@sanitize = false
|
||||
|
||||
@strip_whitespace = false
|
||||
|
||||
@inject_meta_charset = true
|
||||
|
||||
options.each do |name, value|
|
||||
next unless %w(quote_attr_values quote_char use_best_quote_char
|
||||
minimize_boolean_attributes use_trailing_solidus
|
||||
space_before_trailing_solidus omit_optional_tags sanitize
|
||||
strip_whitespace inject_meta_charset).include? name.to_s
|
||||
@use_best_quote_char = false if name.to_s == 'quote_char'
|
||||
instance_variable_set("@#{name}", value)
|
||||
end
|
||||
|
||||
@errors = []
|
||||
end
|
||||
|
||||
def serialize(treewalker, encoding=nil)
|
||||
in_cdata = false
|
||||
@errors = []
|
||||
|
||||
if encoding and @inject_meta_charset
|
||||
require 'html5lib/filters/inject_meta_charset'
|
||||
treewalker = Filters::InjectMetaCharset.new(treewalker, encoding)
|
||||
end
|
||||
|
||||
if @strip_whitespace
|
||||
require 'html5lib/filters/whitespace'
|
||||
treewalker = Filters::WhitespaceFilter.new(treewalker)
|
||||
end
|
||||
|
||||
if @sanitize
|
||||
require 'html5lib/filters/sanitizer'
|
||||
treewalker = Filters::HTMLSanitizeFilter.new(treewalker)
|
||||
end
|
||||
|
||||
if @omit_optional_tags
|
||||
require 'html5lib/filters/optionaltags'
|
||||
treewalker = Filters::OptionalTagFilter.new(treewalker)
|
||||
end
|
||||
|
||||
result = []
|
||||
treewalker.each do |token|
|
||||
type = token[:type]
|
||||
if type == :Doctype
|
||||
doctype = "<!DOCTYPE %s>" % token[:name]
|
||||
result << doctype
|
||||
|
||||
elsif [:Characters, :SpaceCharacters].include? type
|
||||
if type == :SpaceCharacters or in_cdata
|
||||
if in_cdata and token[:data].include?("</")
|
||||
serializeError(_("Unexpected </ in CDATA"))
|
||||
end
|
||||
result << token[:data]
|
||||
else
|
||||
result << token[:data].
|
||||
gsub("&", "&").
|
||||
gsub("<", "<").
|
||||
gsub(">", ">")
|
||||
end
|
||||
|
||||
elsif [:StartTag, :EmptyTag].include? type
|
||||
name = token[:name]
|
||||
if CDATA_ELEMENTS.include?(name)
|
||||
in_cdata = true
|
||||
elsif in_cdata
|
||||
serializeError(_("Unexpected child element of a CDATA element"))
|
||||
end
|
||||
attributes = []
|
||||
for k,v in attrs = token[:data].to_a.sort
|
||||
attributes << ' '
|
||||
|
||||
attributes << k
|
||||
if not @minimize_boolean_attributes or \
|
||||
(!(BOOLEAN_ATTRIBUTES[name]||[]).include?(k) \
|
||||
and !BOOLEAN_ATTRIBUTES[:global].include?(k))
|
||||
attributes << "="
|
||||
if @quote_attr_values or v.empty?
|
||||
quote_attr = true
|
||||
else
|
||||
quote_attr = (SPACE_CHARACTERS + %w(< > " ')).any? {|c| v.include?(c)}
|
||||
end
|
||||
v = v.gsub("&", "&")
|
||||
if quote_attr
|
||||
quote_char = @quote_char
|
||||
if @use_best_quote_char
|
||||
if v.index("'") and !v.index('"')
|
||||
quote_char = '"'
|
||||
elsif v.index('"') and !v.index("'")
|
||||
quote_char = "'"
|
||||
end
|
||||
end
|
||||
if quote_char == "'"
|
||||
v = v.gsub("'", "'")
|
||||
else
|
||||
v = v.gsub('"', """)
|
||||
end
|
||||
attributes << quote_char << v << quote_char
|
||||
else
|
||||
attributes << v
|
||||
end
|
||||
end
|
||||
end
|
||||
if VOID_ELEMENTS.include?(name) and @use_trailing_solidus
|
||||
if @space_before_trailing_solidus
|
||||
attributes << " /"
|
||||
else
|
||||
attributes << "/"
|
||||
end
|
||||
end
|
||||
result << "<%s%s>" % [name, attributes.join('')]
|
||||
|
||||
elsif type == :EndTag
|
||||
name = token[:name]
|
||||
if CDATA_ELEMENTS.include?(name)
|
||||
in_cdata = false
|
||||
elsif in_cdata
|
||||
serializeError(_("Unexpected child element of a CDATA element"))
|
||||
end
|
||||
end_tag = "</#{name}>"
|
||||
result << end_tag
|
||||
|
||||
elsif type == :Comment
|
||||
data = token[:data]
|
||||
serializeError(_("Comment contains --")) if data.index("--")
|
||||
comment = "<!--%s-->" % token[:data]
|
||||
result << comment
|
||||
|
||||
else
|
||||
serializeError(token[:data])
|
||||
end
|
||||
end
|
||||
|
||||
if encoding and encoding != 'utf-8'
|
||||
require 'iconv'
|
||||
Iconv.iconv(encoding, 'utf-8', result.join('')).first
|
||||
else
|
||||
result.join('')
|
||||
end
|
||||
end
|
||||
|
||||
alias :render :serialize
|
||||
|
||||
def serializeError(data="XXX ERROR MESSAGE NEEDED")
|
||||
# XXX The idea is to make data mandatory.
|
||||
@errors.push(data)
|
||||
if @strict
|
||||
raise SerializeError
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Error in serialized tree
|
||||
class SerializeError < Exception
|
||||
end
|
||||
end
|
||||
require 'html5lib/serializer/htmlserializer'
|
||||
require 'html5lib/serializer/xhtmlserializer'
|
||||
|
|
177
vendor/plugins/HTML5lib/lib/html5lib/serializer/htmlserializer.rb
vendored
Normal file
177
vendor/plugins/HTML5lib/lib/html5lib/serializer/htmlserializer.rb
vendored
Normal file
|
@ -0,0 +1,177 @@
|
|||
require 'html5lib/constants'
|
||||
|
||||
module HTML5lib
|
||||
|
||||
class HTMLSerializer
|
||||
|
||||
def self.serialize(stream, options = {})
|
||||
new(options).serialize(stream, options[:encoding])
|
||||
end
|
||||
|
||||
def escape(string)
|
||||
string.gsub("&", "&").gsub("<", "<").gsub(">", ">")
|
||||
end
|
||||
|
||||
def initialize(options={})
|
||||
@quote_attr_values = false
|
||||
@quote_char = '"'
|
||||
@use_best_quote_char = true
|
||||
@minimize_boolean_attributes = true
|
||||
|
||||
@use_trailing_solidus = false
|
||||
@space_before_trailing_solidus = true
|
||||
@escape_lt_in_attrs = false
|
||||
|
||||
@omit_optional_tags = true
|
||||
@sanitize = false
|
||||
|
||||
@strip_whitespace = false
|
||||
|
||||
@inject_meta_charset = true
|
||||
|
||||
options.each do |name, value|
|
||||
next unless instance_variables.include?("@#{name}")
|
||||
@use_best_quote_char = false if name.to_s == 'quote_char'
|
||||
instance_variable_set("@#{name}", value)
|
||||
end
|
||||
|
||||
@errors = []
|
||||
end
|
||||
|
||||
def serialize(treewalker, encoding=nil)
|
||||
in_cdata = false
|
||||
@errors = []
|
||||
|
||||
if encoding and @inject_meta_charset
|
||||
require 'html5lib/filters/inject_meta_charset'
|
||||
treewalker = Filters::InjectMetaCharset.new(treewalker, encoding)
|
||||
end
|
||||
|
||||
if @strip_whitespace
|
||||
require 'html5lib/filters/whitespace'
|
||||
treewalker = Filters::WhitespaceFilter.new(treewalker)
|
||||
end
|
||||
|
||||
if @sanitize
|
||||
require 'html5lib/filters/sanitizer'
|
||||
treewalker = Filters::HTMLSanitizeFilter.new(treewalker)
|
||||
end
|
||||
|
||||
if @omit_optional_tags
|
||||
require 'html5lib/filters/optionaltags'
|
||||
treewalker = Filters::OptionalTagFilter.new(treewalker)
|
||||
end
|
||||
|
||||
result = []
|
||||
treewalker.each do |token|
|
||||
type = token[:type]
|
||||
if type == :Doctype
|
||||
doctype = "<!DOCTYPE %s>" % token[:name]
|
||||
result << doctype
|
||||
|
||||
elsif [:Characters, :SpaceCharacters].include? type
|
||||
if type == :SpaceCharacters or in_cdata
|
||||
if in_cdata and token[:data].include?("</")
|
||||
serializeError(_("Unexpected </ in CDATA"))
|
||||
end
|
||||
result << token[:data]
|
||||
else
|
||||
result << escape(token[:data])
|
||||
end
|
||||
|
||||
elsif [:StartTag, :EmptyTag].include? type
|
||||
name = token[:name]
|
||||
if RCDATA_ELEMENTS.include?(name)
|
||||
in_cdata = true
|
||||
elsif in_cdata
|
||||
serializeError(_("Unexpected child element of a CDATA element"))
|
||||
end
|
||||
attributes = []
|
||||
for k,v in attrs = token[:data].to_a.sort
|
||||
attributes << ' '
|
||||
|
||||
attributes << k
|
||||
if not @minimize_boolean_attributes or \
|
||||
(!(BOOLEAN_ATTRIBUTES[name]||[]).include?(k) \
|
||||
and !BOOLEAN_ATTRIBUTES[:global].include?(k))
|
||||
attributes << "="
|
||||
if @quote_attr_values or v.empty?
|
||||
quote_attr = true
|
||||
else
|
||||
quote_attr = (SPACE_CHARACTERS + %w(< > " ')).any? {|c| v.include?(c)}
|
||||
end
|
||||
v = v.gsub("&", "&")
|
||||
v = v.gsub("<", "<") if @escape_lt_in_attrs
|
||||
if quote_attr
|
||||
quote_char = @quote_char
|
||||
if @use_best_quote_char
|
||||
if v.index("'") and !v.index('"')
|
||||
quote_char = '"'
|
||||
elsif v.index('"') and !v.index("'")
|
||||
quote_char = "'"
|
||||
end
|
||||
end
|
||||
if quote_char == "'"
|
||||
v = v.gsub("'", "'")
|
||||
else
|
||||
v = v.gsub('"', """)
|
||||
end
|
||||
attributes << quote_char << v << quote_char
|
||||
else
|
||||
attributes << v
|
||||
end
|
||||
end
|
||||
end
|
||||
if VOID_ELEMENTS.include?(name) and @use_trailing_solidus
|
||||
if @space_before_trailing_solidus
|
||||
attributes << " /"
|
||||
else
|
||||
attributes << "/"
|
||||
end
|
||||
end
|
||||
result << "<%s%s>" % [name, attributes.join('')]
|
||||
|
||||
elsif type == :EndTag
|
||||
name = token[:name]
|
||||
if RCDATA_ELEMENTS.include?(name)
|
||||
in_cdata = false
|
||||
elsif in_cdata
|
||||
serializeError(_("Unexpected child element of a CDATA element"))
|
||||
end
|
||||
end_tag = "</#{name}>"
|
||||
result << end_tag
|
||||
|
||||
elsif type == :Comment
|
||||
data = token[:data]
|
||||
serializeError(_("Comment contains --")) if data.index("--")
|
||||
comment = "<!--%s-->" % token[:data]
|
||||
result << comment
|
||||
|
||||
else
|
||||
serializeError(token[:data])
|
||||
end
|
||||
end
|
||||
|
||||
if encoding and encoding != 'utf-8'
|
||||
require 'iconv'
|
||||
Iconv.iconv(encoding, 'utf-8', result.join('')).first
|
||||
else
|
||||
result.join('')
|
||||
end
|
||||
end
|
||||
|
||||
alias :render :serialize
|
||||
|
||||
def serializeError(data="XXX ERROR MESSAGE NEEDED")
|
||||
# XXX The idea is to make data mandatory.
|
||||
@errors.push(data)
|
||||
if @strict
|
||||
raise SerializeError
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Error in serialized tree
|
||||
class SerializeError < Exception
|
||||
end
|
||||
end
|
19
vendor/plugins/HTML5lib/lib/html5lib/serializer/xhtmlserializer.rb
vendored
Normal file
19
vendor/plugins/HTML5lib/lib/html5lib/serializer/xhtmlserializer.rb
vendored
Normal file
|
@ -0,0 +1,19 @@
|
|||
require 'html5lib/serializer/htmlserializer'
|
||||
|
||||
module HTML5lib
|
||||
|
||||
class XHTMLSerializer < HTMLSerializer
|
||||
DEFAULTS = {
|
||||
:quote_attr_values => true,
|
||||
:minimize_boolean_attributes => false,
|
||||
:use_trailing_solidus => true,
|
||||
:escape_lt_in_attrs => true,
|
||||
:omit_optional_tags => false
|
||||
}
|
||||
|
||||
def initialize(options={})
|
||||
super(DEFAULTS.clone.update(options))
|
||||
end
|
||||
end
|
||||
|
||||
end
|
401
vendor/plugins/HTML5lib/lib/html5lib/tokenizer.rb
vendored
401
vendor/plugins/HTML5lib/lib/html5lib/tokenizer.rb
vendored
|
@ -41,19 +41,31 @@ module HTML5lib
|
|||
:attributeValueUnQuoted => :attributeValueUnQuotedState,
|
||||
:bogusComment => :bogusCommentState,
|
||||
:markupDeclarationOpen => :markupDeclarationOpenState,
|
||||
:commentStart => :commentStartState,
|
||||
:commentStartDash => :commentStartDashState,
|
||||
:comment => :commentState,
|
||||
:commentDash => :commentDashState,
|
||||
:commentEndDash => :commentEndDashState,
|
||||
:commentEnd => :commentEndState,
|
||||
:doctype => :doctypeState,
|
||||
:beforeDoctypeName => :beforeDoctypeNameState,
|
||||
:doctypeName => :doctypeNameState,
|
||||
:afterDoctypeName => :afterDoctypeNameState,
|
||||
:beforeDoctypePublicIdentifier => :beforeDoctypePublicIdentifierState,
|
||||
:doctypePublicIdentifierDoubleQuoted => :doctypePublicIdentifierDoubleQuotedState,
|
||||
:doctypePublicIdentifierSingleQuoted => :doctypePublicIdentifierSingleQuotedState,
|
||||
:afterDoctypePublicIdentifier => :afterDoctypePublicIdentifierState,
|
||||
:beforeDoctypeSystemIdentifier => :beforeDoctypeSystemIdentifierState,
|
||||
:doctypeSystemIdentifierDoubleQuoted => :doctypeSystemIdentifierDoubleQuotedState,
|
||||
:doctypeSystemIdentifierSingleQuoted => :doctypeSystemIdentifierSingleQuotedState,
|
||||
:afterDoctypeSystemIdentifier => :afterDoctypeSystemIdentifierState,
|
||||
:bogusDoctype => :bogusDoctypeState
|
||||
}
|
||||
|
||||
# Setup the initial tokenizer state
|
||||
@contentModelFlag = :PCDATA
|
||||
@state = @states[:data]
|
||||
@escapeFlag = false
|
||||
@lastFourChars = []
|
||||
|
||||
# The current token being created
|
||||
@currentToken = nil
|
||||
|
@ -133,24 +145,14 @@ module HTML5lib
|
|||
# If the integer is between 127 and 160 (so 128 and bigger and 159 and
|
||||
# smaller) we need to do the "windows trick".
|
||||
if (127...160).include? charAsInt
|
||||
#XXX - removed parse error from windows 1252 entity for now
|
||||
#we may want to reenable this later
|
||||
#@tokenQueue.push({:type => :ParseError, :data =>
|
||||
# _("Entity used with illegal number (windows-1252 reference).")})
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Entity used with illegal number (windows-1252 reference).")})
|
||||
|
||||
charAsInt = ENTITIES_WINDOWS1252[charAsInt - 128]
|
||||
end
|
||||
|
||||
# 0 is not a good number.
|
||||
if charAsInt == 0
|
||||
charAsInt = 65533
|
||||
end
|
||||
|
||||
if charAsInt <= 0x10FFFF
|
||||
if charAsInt > 0 and charAsInt <= 1114111
|
||||
char = [charAsInt].pack('U')
|
||||
else
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Numeric entity couldn't be converted to character.")})
|
||||
end
|
||||
|
||||
# Discard the ; if present. Otherwise, put it back on the queue and
|
||||
|
@ -167,7 +169,10 @@ module HTML5lib
|
|||
def consumeEntity
|
||||
char = nil
|
||||
charStack = [@stream.char]
|
||||
if charStack[0] == "#"
|
||||
if SPACE_CHARACTERS.include?(charStack[0]) or
|
||||
[:EOF, '<', '&'].include?(charStack[0])
|
||||
@stream.queue+= charStack
|
||||
elsif charStack[0] == "#"
|
||||
# We might have a number entity here.
|
||||
charStack += [@stream.char, @stream.char]
|
||||
if charStack.include? :EOF
|
||||
|
@ -194,10 +199,6 @@ module HTML5lib
|
|||
_("Numeric entity expected but none found.")})
|
||||
end
|
||||
end
|
||||
# Break out if we reach the end of the file
|
||||
elsif charStack[0] == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Entity expected. Got end of file instead.")})
|
||||
else
|
||||
# At this point in the process might have named entity. Entities
|
||||
# are stored in the global variable "entities".
|
||||
|
@ -267,14 +268,33 @@ module HTML5lib
|
|||
# statements should be.
|
||||
def dataState
|
||||
data = @stream.char
|
||||
if data == "&" and (@contentModelFlag == :PCDATA or
|
||||
@contentModelFlag == :RCDATA)
|
||||
|
||||
if @contentModelFlag == :CDATA or @contentModelFlag == :RCDATA
|
||||
@lastFourChars << data
|
||||
@lastFourChars.shift if @lastFourChars.length > 4
|
||||
end
|
||||
|
||||
if data == "&" and [:PCDATA,:RCDATA].include?(@contentModelFlag)
|
||||
@state = @states[:entityData]
|
||||
elsif data == "<" and @contentModelFlag != :PLAINTEXT
|
||||
@state = @states[:tagOpen]
|
||||
|
||||
elsif data == "-" and [:CDATA,:RCDATA].include?(@contentModelFlag) and
|
||||
@escapeFlag == false and @lastFourChars.join('') == "<!--"
|
||||
@escapeFlag = true
|
||||
@tokenQueue.push({:type => :Characters, :data => data})
|
||||
|
||||
elsif data == "<" and @escapeFlag == false and
|
||||
[:PCDATA,:CDATA,:RCDATA].include?(@contentModelFlag)
|
||||
@state = @states[:tagOpen]
|
||||
|
||||
elsif data == ">" and [:CDATA,:RCDATA].include?(@contentModelFlag) and
|
||||
@escapeFlag == true and @lastFourChars[1..-1].join('') == "-->"
|
||||
@escapeFlag = false
|
||||
@tokenQueue.push({:type => :Characters, :data => data})
|
||||
|
||||
elsif data == :EOF
|
||||
# Tokenization ends.
|
||||
return false
|
||||
|
||||
elsif SPACE_CHARACTERS.include? data
|
||||
# Directly after emitting a token you switch back to the "data
|
||||
# state". At that point SPACE_CHARACTERS are important so they are
|
||||
|
@ -285,7 +305,7 @@ module HTML5lib
|
|||
data + @stream.chars_until(SPACE_CHARACTERS, true)})
|
||||
else
|
||||
@tokenQueue.push({:type => :Characters, :data =>
|
||||
data + @stream.chars_until(["&", "<"])})
|
||||
data + @stream.chars_until(%w[& < > -])})
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
@ -380,8 +400,6 @@ module HTML5lib
|
|||
# emitting the end tag token.
|
||||
@contentModelFlag = :PCDATA
|
||||
else
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Expected closing tag after seeing '</'. None found.")})
|
||||
@tokenQueue.push({:type => :Characters, :data => "</"})
|
||||
@state = @states[:data]
|
||||
|
||||
|
@ -391,29 +409,27 @@ module HTML5lib
|
|||
end
|
||||
end
|
||||
|
||||
if @contentModelFlag == :PCDATA
|
||||
data = @stream.char
|
||||
if data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Expected closing tag. Unexpected end of file.")})
|
||||
@tokenQueue.push({:type => :Characters, :data => "</"})
|
||||
@state = @states[:data]
|
||||
elsif ASCII_LETTERS.include? data
|
||||
@currentToken =\
|
||||
{:type => :EndTag, :name => data, :data => []}
|
||||
@state = @states[:tagName]
|
||||
elsif data == ">"
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Expected closing tag. Got '>' instead. Ignoring '</>'.")})
|
||||
@state = @states[:data]
|
||||
else
|
||||
# XXX data can be _'_...
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Expected closing tag. Unexpected character '" + data + "' found.")})
|
||||
@stream.queue.push(data)
|
||||
@state = @states[:bogusComment]
|
||||
end
|
||||
data = @stream.char
|
||||
if data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Expected closing tag. Unexpected end of file.")})
|
||||
@tokenQueue.push({:type => :Characters, :data => "</"})
|
||||
@state = @states[:data]
|
||||
elsif ASCII_LETTERS.include? data
|
||||
@currentToken = {:type => :EndTag, :name => data, :data => []}
|
||||
@state = @states[:tagName]
|
||||
elsif data == ">"
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Expected closing tag. Got '>' instead. Ignoring '</>'.")})
|
||||
@state = @states[:data]
|
||||
else
|
||||
# XXX data can be _'_...
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Expected closing tag. Unexpected character '#{data}' found.")})
|
||||
@stream.queue.push(data)
|
||||
@state = @states[:bogusComment]
|
||||
end
|
||||
|
||||
return true
|
||||
end
|
||||
|
||||
|
@ -430,11 +446,6 @@ module HTML5lib
|
|||
@stream.chars_until(ASCII_LETTERS, true)
|
||||
elsif data == ">"
|
||||
emitCurrentToken
|
||||
elsif data == "<"
|
||||
@stream.queue.push(data)
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected < character when getting the tag name.")})
|
||||
emitCurrentToken
|
||||
elsif data == "/"
|
||||
processSolidusInTag
|
||||
@state = @states[:beforeAttributeName]
|
||||
|
@ -459,11 +470,6 @@ module HTML5lib
|
|||
emitCurrentToken
|
||||
elsif data == "/"
|
||||
processSolidusInTag
|
||||
elsif data == "<"
|
||||
@stream.queue.push(data)
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected < character. Expected attribute name instead.")})
|
||||
emitCurrentToken
|
||||
else
|
||||
@currentToken[:data].push([data, ""])
|
||||
@state = @states[:attributeName]
|
||||
|
@ -494,12 +500,6 @@ module HTML5lib
|
|||
elsif data == "/"
|
||||
processSolidusInTag
|
||||
@state = @states[:beforeAttributeName]
|
||||
elsif data == "<"
|
||||
@stream.queue.push(data)
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected < character in attribute name.")})
|
||||
emitCurrentToken
|
||||
leavingThisState = false
|
||||
else
|
||||
@currentToken[:data][-1][0] += data
|
||||
leavingThisState = false
|
||||
|
@ -537,11 +537,6 @@ module HTML5lib
|
|||
elsif data == "/"
|
||||
processSolidusInTag
|
||||
@state = @states[:beforeAttributeName]
|
||||
elsif data == "<"
|
||||
@stream.queue.push(data)
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected < character. Expected = or end of tag.")})
|
||||
emitCurrentToken
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file. Expected = or end of tag.")})
|
||||
|
@ -566,11 +561,6 @@ module HTML5lib
|
|||
@state = @states[:attributeValueSingleQuoted]
|
||||
elsif data == ">"
|
||||
emitCurrentToken
|
||||
elsif data == "<"
|
||||
@stream.queue.push(data)
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected < character. Expected attribute value.")})
|
||||
emitCurrentToken
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file. Expected attribute value.")})
|
||||
|
@ -624,11 +614,6 @@ module HTML5lib
|
|||
processEntityInAttribute
|
||||
elsif data == ">"
|
||||
emitCurrentToken
|
||||
elsif data == "<"
|
||||
@stream.queue.push(data)
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected < character in attribute value.")})
|
||||
emitCurrentToken
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in attribute value.")})
|
||||
|
@ -658,14 +643,15 @@ module HTML5lib
|
|||
charStack = [@stream.char, @stream.char]
|
||||
if charStack == ["-", "-"]
|
||||
@currentToken = {:type => :Comment, :data => ""}
|
||||
@state = @states[:comment]
|
||||
@state = @states[:commentStart]
|
||||
else
|
||||
5.times { charStack.push(@stream.char) }
|
||||
# Put in explicit :EOF check
|
||||
if ((not charStack.include? :EOF) and
|
||||
charStack.join("").upcase == "DOCTYPE")
|
||||
@currentToken =\
|
||||
{:type => :Doctype, :name => "", :data => true}
|
||||
{:type => :Doctype, :name => "",
|
||||
:publicId => nil, :systemId => nil, :correct => true}
|
||||
@state = @states[:doctype]
|
||||
else
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
|
@ -677,10 +663,52 @@ module HTML5lib
|
|||
return true
|
||||
end
|
||||
|
||||
def commentStartState
|
||||
data = @stream.char
|
||||
if data == "-"
|
||||
@state = @states[:commentStartDash]
|
||||
elsif data == ">"
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Incorrect comment.")})
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
elsif data == EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in comment.")})
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
else
|
||||
@currentToken[:data] += data + @stream.chars_until("-")
|
||||
@state = @states[:comment]
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def commentStartDashState
|
||||
data = @stream.char
|
||||
if data == "-"
|
||||
@state = @states[:commentEnd]
|
||||
elsif data == ">"
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Incorrect comment.")})
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
elsif data == EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in comment.")})
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
else
|
||||
@currentToken[:data] += data + @stream.chars_until("-")
|
||||
@state = @states[:comment]
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def commentState
|
||||
data = @stream.char
|
||||
if data == "-"
|
||||
@state = @states[:commentDash]
|
||||
@state = @states[:commentEndDash]
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in comment.")})
|
||||
|
@ -692,7 +720,7 @@ module HTML5lib
|
|||
return true
|
||||
end
|
||||
|
||||
def commentDashState
|
||||
def commentEndDashState
|
||||
data = @stream.char
|
||||
if data == "-"
|
||||
@state = @states[:commentEnd]
|
||||
|
@ -752,19 +780,16 @@ module HTML5lib
|
|||
def beforeDoctypeNameState
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include? data
|
||||
elsif ASCII_LOWERCASE.include? data
|
||||
@currentToken[:name] = data.upcase
|
||||
@state = @states[:doctypeName]
|
||||
elsif data == ">"
|
||||
# Character needs to be consumed per the specification so don't
|
||||
# invoke emitCurrentTokenWithParseError with :data as argument.
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected > character. Expected DOCTYPE name.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file. Expected DOCTYPE name.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
else
|
||||
|
@ -776,33 +801,21 @@ module HTML5lib
|
|||
|
||||
def doctypeNameState
|
||||
data = @stream.char
|
||||
needsDoctypeCheck = false
|
||||
if SPACE_CHARACTERS.include? data
|
||||
@state = @states[:afterDoctypeName]
|
||||
needsDoctypeCheck = true
|
||||
elsif data == ">"
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in DOCTYPE name.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
else
|
||||
# We can't just uppercase everything that arrives here. For
|
||||
# instance, non-ASCII characters.
|
||||
if ASCII_LOWERCASE.include? data
|
||||
data = data.upcase
|
||||
end
|
||||
@currentToken[:name] += data
|
||||
needsDoctypeCheck = true
|
||||
end
|
||||
|
||||
# After some iterations through this state it should eventually say
|
||||
# "HTML". Otherwise there's an error.
|
||||
if needsDoctypeCheck and @currentToken[:name] == "HTML"
|
||||
@currentToken[:data] = false
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
|
@ -814,16 +827,195 @@ module HTML5lib
|
|||
@state = @states[:data]
|
||||
elsif data == :EOF
|
||||
@currentToken[:data] = true
|
||||
# XXX EMIT
|
||||
@stream.queue.push(data)
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in DOCTYPE.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
else
|
||||
charStack = [data]
|
||||
5.times { charStack << stream.char }
|
||||
token = charStack.join('').tr(ASCII_UPPERCASE,ASCII_LOWERCASE)
|
||||
if token == "public"
|
||||
@state = @states[:beforeDoctypePublicIdentifier]
|
||||
elsif token == "system"
|
||||
@state = @states[:beforeDoctypeSystemIdentifier]
|
||||
else
|
||||
@stream.queue += charStack
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Expected 'public' or 'system'. Got '#{charStack.join('')}'")})
|
||||
@state = @states[:bogusDoctype]
|
||||
end
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def beforeDoctypePublicIdentifierState
|
||||
data = @stream.char
|
||||
|
||||
if SPACE_CHARACTERS.include?(data)
|
||||
elsif data == "\""
|
||||
@currentToken[:publicId] = ""
|
||||
@state = @states[:doctypePublicIdentifierDoubleQuoted]
|
||||
elsif data == "'"
|
||||
@currentToken[:publicId] = ""
|
||||
@state = @states[:doctypePublicIdentifierSingleQuoted]
|
||||
elsif data == ">"
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of DOCTYPE.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in DOCTYPE.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
else
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Expected space or '>'. Got '" + data + "'")})
|
||||
@currentToken[:data] = true
|
||||
_("Unexpected character in DOCTYPE.")})
|
||||
@state = @states[:bogusDoctype]
|
||||
end
|
||||
|
||||
return true
|
||||
end
|
||||
|
||||
def doctypePublicIdentifierDoubleQuotedState
|
||||
data = @stream.char
|
||||
if data == "\""
|
||||
@state = @states[:afterDoctypePublicIdentifier]
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in DOCTYPE.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
else
|
||||
@currentToken[:publicId] += data
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def doctypePublicIdentifierSingleQuotedState
|
||||
data = @stream.char
|
||||
if data == "'"
|
||||
@state = @states[:afterDoctypePublicIdentifier]
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in DOCTYPE.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
else
|
||||
@currentToken[:publicId] += data
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def afterDoctypePublicIdentifierState
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include?(data)
|
||||
elsif data == "\""
|
||||
@currentToken[:systemId] = ""
|
||||
@state = @states[:doctypeSystemIdentifierDoubleQuoted]
|
||||
elsif data == "'"
|
||||
@currentToken[:systemId] = ""
|
||||
@state = @states[:doctypeSystemIdentifierSingleQuoted]
|
||||
elsif data == ">"
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in DOCTYPE.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
else
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected character in DOCTYPE.")})
|
||||
@state = @states[:bogusDoctype]
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def beforeDoctypeSystemIdentifierState
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include?(data)
|
||||
elsif data == "\""
|
||||
@currentToken[:systemId] = ""
|
||||
@state = @states[:doctypeSystemIdentifierDoubleQuoted]
|
||||
elsif data == "'"
|
||||
@currentToken[:systemId] = ""
|
||||
@state = @states[:doctypeSystemIdentifierSingleQuoted]
|
||||
elsif data == ">"
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected character in DOCTYPE.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in DOCTYPE.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
else
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected character in DOCTYPE.")})
|
||||
@state = @states[:bogusDoctype]
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def doctypeSystemIdentifierDoubleQuotedState
|
||||
data = @stream.char
|
||||
if data == "\""
|
||||
@state = @states[:afterDoctypeSystemIdentifier]
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in DOCTYPE.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
else
|
||||
@currentToken[:systemId] += data
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def doctypeSystemIdentifierSingleQuotedState
|
||||
data = @stream.char
|
||||
if data == "'"
|
||||
@state = @states[:afterDoctypeSystemIdentifier]
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in DOCTYPE.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
else
|
||||
@currentToken[:systemId] += data
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def afterDoctypeSystemIdentifierState
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include?(data)
|
||||
elsif data == ">"
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in DOCTYPE.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
else
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected character in DOCTYPE.")})
|
||||
@state = @states[:bogusDoctype]
|
||||
end
|
||||
return true
|
||||
|
@ -839,6 +1031,7 @@ module HTML5lib
|
|||
@stream.queue.push(data)
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in bogus doctype.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
end
|
||||
|
|
|
@ -27,6 +27,9 @@ module HTML5lib
|
|||
childNodes << node
|
||||
hpricot.children << node.hpricot
|
||||
end
|
||||
if (oldparent = node.hpricot.parent) != nil
|
||||
oldparent.children.delete_at(oldparent.children.index(node.hpricot))
|
||||
end
|
||||
node.hpricot.parent = hpricot
|
||||
node.parent = self
|
||||
end
|
||||
|
|
|
@ -9,7 +9,7 @@ module HTML5lib
|
|||
def node_details(node)
|
||||
case node
|
||||
when ::Hpricot::Elem
|
||||
if !node.name
|
||||
if node.name.empty?
|
||||
[:DOCUMENT_FRAGMENT]
|
||||
else
|
||||
[:ELEMENT, node.name,
|
||||
|
|
115
vendor/plugins/HTML5lib/parse.rb
vendored
115
vendor/plugins/HTML5lib/parse.rb
vendored
|
@ -5,12 +5,20 @@
|
|||
$:.unshift File.dirname(__FILE__),'lib'
|
||||
|
||||
def parse(opts, args)
|
||||
encoding = nil
|
||||
|
||||
f = args[-1]
|
||||
if f
|
||||
begin
|
||||
require 'open-uri' if f[0..6] == 'http://'
|
||||
f = open(f)
|
||||
if f[0..6] == 'http://'
|
||||
require 'open-uri'
|
||||
f = URI.parse(f).open
|
||||
encoding = f.charset
|
||||
elsif f == '-'
|
||||
f = $stdin
|
||||
else
|
||||
f = open(f)
|
||||
end
|
||||
rescue
|
||||
end
|
||||
else
|
||||
|
@ -29,22 +37,28 @@ def parse(opts, args)
|
|||
p = HTML5lib::HTMLParser.new(:tree=>treebuilder)
|
||||
end
|
||||
|
||||
if opts.parsemethod == :parse
|
||||
args = [f, encoding]
|
||||
else
|
||||
args = [f, 'div', encoding]
|
||||
end
|
||||
|
||||
if opts.profile
|
||||
require 'profiler'
|
||||
Profiler__::start_profile
|
||||
p.send(opts.parsemethod,f)
|
||||
p.send(opts.parsemethod, *args)
|
||||
Profiler__::stop_profile
|
||||
Profiler__::print_profile($stderr)
|
||||
elsif opts.time
|
||||
require 'time'
|
||||
t0 = Time.new
|
||||
document = p.send(opts.parsemethod,f)
|
||||
document = p.send(opts.parsemethod, *args)
|
||||
t1 = Time.new
|
||||
printOutput(p, document, opts)
|
||||
t2 = Time.new
|
||||
puts "\n\nRun took: %fs (plus %fs to print the output)"%[t1-t0, t2-t1]
|
||||
else
|
||||
document = p.send(opts.parsemethod,f)
|
||||
document = p.send(opts.parsemethod, *args)
|
||||
printOutput(p, document, opts)
|
||||
end
|
||||
end
|
||||
|
@ -59,7 +73,7 @@ def printOutput(parser, document, opts)
|
|||
require 'html5lib/treewalkers'
|
||||
tokens = HTML5lib::TreeWalkers[opts.treebuilder].new(document)
|
||||
require 'html5lib/serializer'
|
||||
print HTML5lib::HTMLSerializer.serialize(tokens, opts.serializer)
|
||||
puts HTML5lib::HTMLSerializer.serialize(tokens, opts.serializer)
|
||||
when :hilite
|
||||
print document.hilite
|
||||
when :tree
|
||||
|
@ -93,26 +107,35 @@ options.serializer = {
|
|||
|
||||
require 'optparse'
|
||||
opts = OptionParser.new do |opts|
|
||||
opts.on("-p", "--[no-]profile", "Profile the run") do |profile|
|
||||
options.profile = profile
|
||||
end
|
||||
|
||||
opts.on("-t", "--[no-]time", "Time the run") do |time|
|
||||
options.time = time
|
||||
end
|
||||
|
||||
opts.separator ""
|
||||
opts.separator "Parse Options:"
|
||||
|
||||
opts.on("-b", "--treebuilder NAME") do |treebuilder|
|
||||
options.treebuilder = treebuilder
|
||||
end
|
||||
|
||||
opts.on("-e", "--error", "Print a list of parse errors") do |error|
|
||||
options.error = error
|
||||
end
|
||||
|
||||
opts.on("-f", "--fragment", "Parse as a fragment") do |parse|
|
||||
options.parsemethod = :parseFragment
|
||||
end
|
||||
|
||||
opts.separator ""
|
||||
opts.separator "Filter Options:"
|
||||
|
||||
opts.on("--[no-]inject-meta-charset", "inject <meta charset>") do |inject|
|
||||
options.serializer[:inject_meta_charset] = inject
|
||||
end
|
||||
|
||||
opts.on("--[no-]strip-whitespace", "strip unnecessary whitespace") do |strip|
|
||||
options.serializer[:strip_whitespace] = strip
|
||||
end
|
||||
|
||||
opts.on("--[no-]sanitize", "escape unsafe tags") do |sanitize|
|
||||
options.serializer[:sanitize] = sanitize
|
||||
end
|
||||
|
||||
opts.separator ""
|
||||
opts.separator "Output Options:"
|
||||
|
||||
opts.on("--tree", "output as debug tree") do |tree|
|
||||
options.output = :tree
|
||||
end
|
||||
|
@ -130,26 +153,56 @@ opts = OptionParser.new do |opts|
|
|||
options.output = :hilite
|
||||
end
|
||||
|
||||
opts.on("-c", "--[no-]encoding", "Print character encoding used") do |encoding|
|
||||
options.encoding = encoding
|
||||
opts.on("-e", "--error", "Print a list of parse errors") do |error|
|
||||
options.error = error
|
||||
end
|
||||
|
||||
opts.on("--[no-]inject-meta-charset", "inject <meta charset>") do |inject|
|
||||
options.serializer[:inject_meta_charset] = inject
|
||||
end
|
||||
|
||||
opts.on("--[no-]strip-whitespace", "strip unnecessary whitespace") do |strip|
|
||||
options.serializer[:strip_whitespace] = strip
|
||||
end
|
||||
|
||||
opts.on("--[no-]sanitize", "escape unsafe tags") do |sanitize|
|
||||
options.serializer[:sanitize] = sanitize
|
||||
end
|
||||
opts.separator ""
|
||||
opts.separator "Serialization Options:"
|
||||
|
||||
opts.on("--[no-]omit-optional-tags", "Omit optional tags") do |omit|
|
||||
options.serializer[:omit_optional_tags] = omit
|
||||
end
|
||||
|
||||
opts.on("--[no-]quote-attr-values", "Quote attribute values") do |quote|
|
||||
options.serializer[:quote_attr_values] = quote
|
||||
end
|
||||
|
||||
opts.on("--[no-]use-best-quote-char", "Use best quote character") do |best|
|
||||
options.serializer[:use_best_quote_char] = best
|
||||
end
|
||||
|
||||
opts.on("--quote-char C", "Use specified quote character") do |c|
|
||||
options.serializer[:quote_char] = c
|
||||
end
|
||||
|
||||
opts.on("--[no-]minimize-boolean-attributes", "Minimize boolean attributes") do |min|
|
||||
options.serializer[:minimize_boolean_attributes] = min
|
||||
end
|
||||
|
||||
opts.on("--[no-]use-trailing-solidus", "Use trailing solidus") do |slash|
|
||||
options.serializer[:use_trailing_solidus] = slash
|
||||
end
|
||||
|
||||
opts.on("--[no-]escape-lt-in-attrs", "Escape less than signs in attribute values") do |lt|
|
||||
options.serializer[:escape_lt_in_attrs] = lt
|
||||
end
|
||||
|
||||
opts.separator ""
|
||||
opts.separator "Other Options:"
|
||||
|
||||
opts.on("-p", "--[no-]profile", "Profile the run") do |profile|
|
||||
options.profile = profile
|
||||
end
|
||||
|
||||
opts.on("-t", "--[no-]time", "Time the run") do |time|
|
||||
options.time = time
|
||||
end
|
||||
|
||||
opts.on("-c", "--[no-]encoding", "Print character encoding used") do |encoding|
|
||||
options.encoding = encoding
|
||||
end
|
||||
|
||||
opts.on_tail("-h", "--help", "Show this message") do
|
||||
puts opts
|
||||
exit
|
||||
|
|
|
@ -7,4 +7,4 @@
|
|||
<meta name="description" content="日本最大級のポータルサイト。検索、オークション、ニュース、メール、コミュニティ、ショッピング、など80以上のサービスを展開。あなたの生活をより豊かにする「ライフ・エンジン」を目指していきます。">
|
||||
<style type="text/css" media="all">
|
||||
#encoding
|
||||
euc-jp
|
||||
euc-jp
|
||||
|
|
|
@ -322,12 +322,14 @@ Windows-1252
|
|||
#encoding
|
||||
Windows-1252
|
||||
|
||||
#data <!-- 4096 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-->
|
||||
#data
|
||||
<!-- 4096 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-->
|
||||
<meta charset="ISO-8859-9">
|
||||
#encoding
|
||||
Windows-1252
|
||||
|
||||
#data <!-- 4097 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz-->
|
||||
#data
|
||||
<!-- 4097 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz-->
|
||||
<meta charset="ISO-8859-9">
|
||||
#encoding
|
||||
Windows-1252
|
||||
|
|
|
@ -35,7 +35,7 @@
|
|||
|
||||
{
|
||||
"name": "div_background_image_unicode_encoded",
|
||||
"input": "<div style=\"background-image:\a5\a2\006C\0028'\006a\0061\a6\0061\a3\0063\a2\0069\a0\a4\003a\0061\006c\0065\a2\a4\0028.1027\0058.1053\0053\0027\0029'\0029\">foo</div>",
|
||||
"input": "<div style=\"background-image:\u00a5\u00a2\u006C\u0028'\u006a\u0061\u00a6\u0061\u00a3\u0063\u00a2\u0069\u00a0\u00a4\u003a\u0061\u006c\u0065\u00a2\u00a4\u0028.1027\u0058.1053\u0053\u0027\u0029'\u0029\">foo</div>",
|
||||
"output": "<div style=''>foo</div>"
|
||||
},
|
||||
|
||||
|
@ -48,14 +48,14 @@
|
|||
{
|
||||
"name": "double_open_angle_brackets",
|
||||
"input": "<img src=http://ha.ckers.org/scriptlet.html <",
|
||||
"output": "<img src='http://ha.ckers.org/scriptlet.html'/><",
|
||||
"output": "<img src='http://ha.ckers.org/scriptlet.html'/>",
|
||||
"rexml": "Ill-formed XHTML!"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "double_open_angle_brackets_2",
|
||||
"input": "<script src=http://ha.ckers.org/scriptlet.html <",
|
||||
"output": "<script src=\"http://ha.ckers.org/scriptlet.html\"><",
|
||||
"output": "<script src=\"http://ha.ckers.org/scriptlet.html\" <=\"\">",
|
||||
"rexml": "Ill-formed XHTML!"
|
||||
},
|
||||
|
||||
|
@ -110,7 +110,7 @@
|
|||
{
|
||||
"name": "no_closing_script_tags",
|
||||
"input": "<script src=http://ha.ckers.org/xss.js?<b>",
|
||||
"output": "<script src=\"http://ha.ckers.org/xss.js?\"><b/>",
|
||||
"output": "<script src=\"http://ha.ckers.org/xss.js?&lt;b\">",
|
||||
"rexml": "Ill-formed XHTML!"
|
||||
},
|
||||
|
||||
|
@ -123,7 +123,7 @@
|
|||
|
||||
{
|
||||
"name": "non_alpha_non_digit_2",
|
||||
"input": "<a onclick!\#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>foo</a>",
|
||||
"input": "<a onclick!\\#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>foo</a>",
|
||||
"output": "<a>foo</a>",
|
||||
"rexml": "Ill-formed XHTML!"
|
||||
},
|
||||
|
@ -137,7 +137,7 @@
|
|||
|
||||
{
|
||||
"name": "non_alpha_non_digit_II",
|
||||
"input": "<a href!\#$%&()*~+-_.,:;?@[/|]^`=alert('XSS')>foo</a>",
|
||||
"input": "<a href!\\#$%&()*~+-_.,:;?@[/|]^`=alert('XSS')>foo</a>",
|
||||
"output": "<a>foo</a>",
|
||||
"rexml": "Ill-formed XHTML!"
|
||||
},
|
||||
|
@ -351,7 +351,7 @@
|
|||
{
|
||||
"name": "should_sanitize_script_tag_with_multiple_open_brackets_2",
|
||||
"input": "<iframe src=http://ha.ckers.org/scriptlet.html\n<",
|
||||
"output": "<iframe src=\"http://ha.ckers.org/scriptlet.html\"><",
|
||||
"output": "<iframe src=\"http://ha.ckers.org/scriptlet.html\" <=\"\">",
|
||||
"rexml": "Ill-formed XHTML!"
|
||||
},
|
||||
|
||||
|
@ -365,7 +365,7 @@
|
|||
{
|
||||
"name": "should_sanitize_unclosed_script",
|
||||
"input": "<script src=http://ha.ckers.org/xss.js?<b>",
|
||||
"output": "<script src=\"http://ha.ckers.org/xss.js?\"><b/>",
|
||||
"output": "<script src=\"http://ha.ckers.org/xss.js?&lt;b\">",
|
||||
"rexml": "Ill-formed XHTML!"
|
||||
},
|
||||
|
||||
|
|
|
@ -7,7 +7,8 @@
|
|||
|
||||
{"description": "proper attribute value non-quoting",
|
||||
"input": [["StartTag", "span", {"title": "foo"}]],
|
||||
"expected": ["<span title=foo>"]
|
||||
"expected": ["<span title=foo>"],
|
||||
"xhtml": ["<span title=\"foo\">"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value quoting (with >)",
|
||||
|
@ -17,7 +18,8 @@
|
|||
|
||||
{"description": "proper attribute value quoting (with <)",
|
||||
"input": [["StartTag", "span", {"title": "foo<bar"}]],
|
||||
"expected": ["<span title=\"foo<bar\">"]
|
||||
"expected": ["<span title=\"foo<bar\">"],
|
||||
"xhtml": ["<span title=\"foo<bar\">"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value quoting (with \")",
|
||||
|
@ -67,12 +69,14 @@
|
|||
|
||||
{"description": "void element (as EmptyTag token)",
|
||||
"input": [["EmptyTag", "img", {}]],
|
||||
"expected": ["<img>"]
|
||||
"expected": ["<img>"],
|
||||
"xhtml": ["<img />"]
|
||||
},
|
||||
|
||||
{"description": "void element (as StartTag token)",
|
||||
"input": [["StartTag", "img", {}]],
|
||||
"expected": ["<img>"]
|
||||
"expected": ["<img>"],
|
||||
"xhtml": ["<img />"]
|
||||
},
|
||||
|
||||
{"description": "doctype in error",
|
||||
|
@ -80,6 +84,17 @@
|
|||
"expected": ["<!DOCTYPE foo>"]
|
||||
},
|
||||
|
||||
{"description": "character data",
|
||||
"options": {"encoding":"utf-8"},
|
||||
"input": [["Characters", "a<b>c&d"]],
|
||||
"expected": ["a<b>c&d"]
|
||||
},
|
||||
|
||||
{"description": "rcdata",
|
||||
"input": [["StartTag", "script", {}], ["Characters", "a<b>c&d"]],
|
||||
"expected": ["<script>a<b>c&d"]
|
||||
},
|
||||
|
||||
{"description": "doctype",
|
||||
"input": [["Doctype", "HTML"]],
|
||||
"expected": ["<!DOCTYPE HTML>"]
|
||||
|
|
|
@ -9,31 +9,57 @@
|
|||
{"description": "empytag head",
|
||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||
"input": [["EmptyTag", "head", {}]],
|
||||
"expected": ["<head><meta charset=utf-8>"]
|
||||
"expected": ["<head><meta charset=utf-8>"],
|
||||
"xhtml": ["<head><meta charset=\"utf-8\" /></head>"]
|
||||
},
|
||||
|
||||
{"description": "head w/title",
|
||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||
"input": [["StartTag", "head", {}], ["StartTag","title",{}], ["Characters", "foo"],["EndTag", "title"], ["EndTag", "head"]],
|
||||
"expected": ["<head><meta charset=utf-8><title>foo</title>"]
|
||||
"expected": ["<head><meta charset=utf-8><title>foo</title>"],
|
||||
"xhtml": ["<head><meta charset=\"utf-8\" /><title>foo</title></head>"]
|
||||
},
|
||||
|
||||
{"description": "head w/meta-charset",
|
||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||
"input": [["StartTag", "head", {}], ["EmptyTag","meta",{"charset":"ascii"}], ["EndTag", "head"]],
|
||||
"expected": ["<head><meta charset=utf-8>"]
|
||||
"expected": ["<head><meta charset=utf-8>"],
|
||||
"xhtml": ["<head><meta charset=\"utf-8\" /></head>"]
|
||||
},
|
||||
|
||||
{"description": "head w/ two meta-charset",
|
||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||
"input": [["StartTag", "head", {}], ["EmptyTag","meta",{"charset":"ascii"}], ["EmptyTag","meta",{"charset":"ascii"}], ["EndTag", "head"]],
|
||||
"expected": ["<head><meta charset=utf-8><meta charset=utf-8>", "<head><meta charset=utf-8><meta charset=ascii>"],
|
||||
"xhtml": ["<head><meta charset=\"utf-8\" /><meta charset=\"utf-8\" /></head>", "<head><meta charset=\"utf-8\" /><meta charset=\"ascii\" /></head>"]
|
||||
},
|
||||
|
||||
{"description": "head w/robots",
|
||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||
"input": [["StartTag", "head", {}], ["EmptyTag","meta",{"name":"robots","content":"noindex"}], ["EndTag", "head"]],
|
||||
"expected": ["<head><meta charset=utf-8><meta content=noindex name=robots>"]
|
||||
"expected": ["<head><meta charset=utf-8><meta content=noindex name=robots>"],
|
||||
"xhtml": ["<head><meta charset=\"utf-8\" /><meta content=\"noindex\" name=\"robots\" /></head>"]
|
||||
},
|
||||
|
||||
{"description": "head w/robots & charset",
|
||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||
"input": [["StartTag", "head", {}], ["EmptyTag","meta",{"name":"robots","content":"noindex"}], ["EmptyTag","meta",{"charset":"ascii"}], ["EndTag", "head"]],
|
||||
"expected": ["<head><meta content=noindex name=robots><meta charset=utf-8>"]
|
||||
"expected": ["<head><meta content=noindex name=robots><meta charset=utf-8>"],
|
||||
"xhtml": ["<head><meta content=\"noindex\" name=\"robots\" /><meta charset=\"utf-8\" /></head>"]
|
||||
},
|
||||
|
||||
{"description": "head w/ charset in http-equiv content-type",
|
||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||
"input": [["StartTag", "head", {}], ["EmptyTag","meta",{"http-equiv":"content-type", "content":"text/html; charset=ascii"}], ["EndTag", "head"]],
|
||||
"expected": ["<head><meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"],
|
||||
"xhtml": ["<head><meta content=\"text/html; charset=utf-8\" http-equiv=\"content-type\" /></head>"]
|
||||
},
|
||||
|
||||
{"description": "head w/robots & charset in http-equiv content-type",
|
||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||
"input": [["StartTag", "head", {}], ["EmptyTag","meta",{"name":"robots","content":"noindex"}], ["EmptyTag","meta",{"http-equiv":"content-type", "content":"text/html; charset=ascii"}], ["EndTag", "head"]],
|
||||
"expected": ["<head><meta content=noindex name=robots><meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"],
|
||||
"xhtml": ["<head><meta content=\"noindex\" name=\"robots\" /><meta content=\"text/html; charset=utf-8\" http-equiv=\"content-type\" /></head>"]
|
||||
}
|
||||
|
||||
]}
|
||||
|
|
|
@ -9,13 +9,15 @@
|
|||
{"description": "quote_attr_values=true",
|
||||
"options": {"quote_attr_values": true},
|
||||
"input": [["StartTag", "button", {"disabled": "disabled"}]],
|
||||
"expected": ["<button disabled>"]
|
||||
"expected": ["<button disabled>"],
|
||||
"xhtml": ["<button disabled=\"disabled\">"]
|
||||
},
|
||||
|
||||
{"description": "quote_attr_values=true with irrelevant",
|
||||
"options": {"quote_attr_values": true},
|
||||
"input": [["StartTag", "div", {"irrelevant": "irrelevant"}]],
|
||||
"expected": ["<div irrelevant>"]
|
||||
"expected": ["<div irrelevant>"],
|
||||
"xhtml": ["<div irrelevant=\"irrelevant\">"]
|
||||
},
|
||||
|
||||
{"description": "use_trailing_solidus=true with void element",
|
||||
|
@ -33,13 +35,20 @@
|
|||
{"description": "minimize_boolean_attributes=false",
|
||||
"options": {"minimize_boolean_attributes": false},
|
||||
"input": [["StartTag", "div", {"irrelevant": "irrelevant"}]],
|
||||
"expected": ["<div irrelevant=irrelevant>"]
|
||||
"expected": ["<div irrelevant=irrelevant>"],
|
||||
"xhtml": ["<div irrelevant=\"irrelevant\">"]
|
||||
},
|
||||
|
||||
{"description": "minimize_boolean_attributes=false with empty value",
|
||||
"options": {"minimize_boolean_attributes": false},
|
||||
"input": [["StartTag", "div", {"irrelevant": ""}]],
|
||||
"expected": ["<div irrelevant=\"\">"]
|
||||
},
|
||||
|
||||
{"description": "escape less than signs in attribute values",
|
||||
"options": {"escape_lt_in_attrs": true},
|
||||
"input": [["StartTag", "a", {"title": "a<b>c&d"}]],
|
||||
"expected": ["<a title=\"a<b>c&d\">"]
|
||||
}
|
||||
|
||||
]}
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
"contentModelFlags":["RCDATA", "CDATA"],
|
||||
"lastStartTag":"baz",
|
||||
"input":"</foo>bar</baz>",
|
||||
"output":["ParseError", ["Character", "</foo>bar"], ["EndTag", "baz"]]},
|
||||
"output":[["Character", "</foo>bar"], ["EndTag", "baz"]]},
|
||||
|
||||
{"description":"End tag closing RCDATA or CDATA, switching back to PCDATA",
|
||||
"contentModelFlags":["RCDATA", "CDATA"],
|
||||
|
|
21
vendor/plugins/HTML5lib/testdata/tokenizer/escapeFlag.test
vendored
Normal file
21
vendor/plugins/HTML5lib/testdata/tokenizer/escapeFlag.test
vendored
Normal file
|
@ -0,0 +1,21 @@
|
|||
{"tests": [
|
||||
|
||||
{"description":"Commented close tag in [R]CDATA",
|
||||
"contentModelFlags":["RCDATA", "CDATA"],
|
||||
"lastStartTag":"bar",
|
||||
"input":"foo<!--</bar>--></bar>",
|
||||
"output":[["Character", "foo<!--</bar>-->"], ["EndTag", "bar"]]},
|
||||
|
||||
{"description":"Bogus comment in [R]CDATA",
|
||||
"contentModelFlags":["RCDATA", "CDATA"],
|
||||
"lastStartTag":"bar",
|
||||
"input":"foo<!-->baz</bar>",
|
||||
"output":[["Character", "foo<!-->baz"], ["EndTag", "bar"]]},
|
||||
|
||||
{"description":"End tag surrounded by bogus comment in [R]CDATA",
|
||||
"contentModelFlags":["RCDATA", "CDATA"],
|
||||
"lastStartTag":"bar",
|
||||
"input":"foo<!--></bar><!-->baz</bar>",
|
||||
"output":[["Character", "foo<!-->"], ["EndTag", "bar"], "ParseError", ["Comment", ""], ["Character", "baz"], ["EndTag", "bar"]]}
|
||||
|
||||
]}
|
|
@ -2,15 +2,15 @@
|
|||
|
||||
{"description":"Correct Doctype lowercase",
|
||||
"input":"<!DOCTYPE html>",
|
||||
"output":[["DOCTYPE", "HTML", false]]},
|
||||
"output":[["DOCTYPE", "html", null, null, true]]},
|
||||
|
||||
{"description":"Correct Doctype uppercase",
|
||||
"input":"<!DOCTYPE HTML>",
|
||||
"output":[["DOCTYPE", "HTML", false]]},
|
||||
"input":"<!DOCTYPE HTML>",
|
||||
"output":[["DOCTYPE", "HTML", null, null, true]]},
|
||||
|
||||
{"description":"Correct Doctype mixed case",
|
||||
"input":"<!DOCTYPE HtMl>",
|
||||
"output":[["DOCTYPE", "HTML", false]]},
|
||||
"output":[["DOCTYPE", "HtMl", null, null, true]]},
|
||||
|
||||
{"description":"Truncated doctype start",
|
||||
"input":"<!DOC>",
|
||||
|
@ -18,7 +18,7 @@
|
|||
|
||||
{"description":"Doctype in error",
|
||||
"input":"<!DOCTYPE foo>",
|
||||
"output":[["DOCTYPE", "FOO", true]]},
|
||||
"output":[["DOCTYPE", "foo", null, null, true]]},
|
||||
|
||||
{"description":"Single Start Tag",
|
||||
"input":"<h>",
|
||||
|
@ -84,17 +84,38 @@
|
|||
"input":"<!-",
|
||||
"output":["ParseError", ["Comment", "-"]]},
|
||||
|
||||
{"description":"Ampersand only",
|
||||
{"description":"Short comment",
|
||||
"input":"<!-->",
|
||||
"output":["ParseError", ["Comment", ""]]},
|
||||
|
||||
{"description":"Short comment two",
|
||||
"input":"<!--->",
|
||||
"output":["ParseError", ["Comment", ""]]},
|
||||
|
||||
{"description":"Short comment three",
|
||||
"input":"<!---->",
|
||||
"output":[["Comment", ""]]},
|
||||
|
||||
|
||||
{"description":"Ampersand EOF",
|
||||
"input":"&",
|
||||
"output":["ParseError", ["Character", "&"]]},
|
||||
"output":[["Character", "&"]]},
|
||||
|
||||
{"description":"Ampersand ampersand EOF",
|
||||
"input":"&&",
|
||||
"output":[["Character", "&&"]]},
|
||||
|
||||
{"description":"Ampersand space EOF",
|
||||
"input":"& ",
|
||||
"output":[["Character", "& "]]},
|
||||
|
||||
{"description":"Unfinished entity",
|
||||
"input":"&f",
|
||||
"output":["ParseError", ["Character", "&"], ["Character", "f"]]},
|
||||
"output":["ParseError", ["Character", "&f"]]},
|
||||
|
||||
{"description":"Ampersand, number sign",
|
||||
"input":"&#",
|
||||
"output":["ParseError", ["Character", "&"], ["Character", "#"]]},
|
||||
"output":["ParseError", ["Character", "&#"]]},
|
||||
|
||||
{"description":"Unfinished numeric entity",
|
||||
"input":"&#x",
|
||||
|
@ -110,8 +131,7 @@
|
|||
|
||||
{"description":"Entity without trailing semicolon (1)",
|
||||
"input":"I'm ¬it",
|
||||
"output":[["Character","I'm "], "ParseError", ["Character", "¬"],
|
||||
["Character", "it"]]},
|
||||
"output":[["Character","I'm "], "ParseError", ["Character", "¬it"]]},
|
||||
|
||||
{"description":"Entity without trailing semicolon (2)",
|
||||
"input":"I'm ¬in",
|
||||
|
|
|
@ -1,24 +1,44 @@
|
|||
{"tests": [
|
||||
|
||||
{"description":"Doctype without a name",
|
||||
{"description":"DOCTYPE without name",
|
||||
"input":"<!DOCTYPE>",
|
||||
"output":["ParseError", "ParseError", ["DOCTYPE", "", true]]},
|
||||
"output":["ParseError", "ParseError", ["DOCTYPE", "", null, null, false]]},
|
||||
|
||||
{"description":"Correct doctype without a space before name",
|
||||
{"description":"DOCTYPE without space before name",
|
||||
"input":"<!DOCTYPEhtml>",
|
||||
"output":["ParseError", ["DOCTYPE", "HTML", false]]},
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, true]]},
|
||||
|
||||
{"description":"Incorrect doctype without a space before name",
|
||||
{"description":"Incorrect DOCTYPE without a space before name",
|
||||
"input":"<!DOCTYPEfoo>",
|
||||
"output":["ParseError", ["DOCTYPE", "FOO", true]]},
|
||||
"output":["ParseError", ["DOCTYPE", "foo", null, null, true]]},
|
||||
|
||||
{"description":"Bogus doctype",
|
||||
{"description":"DOCTYPE with publicId",
|
||||
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">",
|
||||
"output":["ParseError", ["DOCTYPE", "HTML", true]]},
|
||||
"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", null, true]]},
|
||||
|
||||
{"description":"DOCTYPE with EOF after PUBLIC",
|
||||
"input":"<!DOCTYPE html PUBLIC",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
||||
|
||||
{"description":"DOCTYPE with EOF after PUBLIC '",
|
||||
"input":"<!DOCTYPE html PUBLIC '",
|
||||
"output":["ParseError", ["DOCTYPE", "html", "", null, false]]},
|
||||
|
||||
{"description":"DOCTYPE with EOF after PUBLIC 'x",
|
||||
"input":"<!DOCTYPE html PUBLIC 'x",
|
||||
"output":["ParseError", ["DOCTYPE", "html", "x", null, false]]},
|
||||
|
||||
{"description":"DOCTYPE with systemId",
|
||||
"input":"<!DOCTYPE html SYSTEM \"-//W3C//DTD HTML Transitional 4.01//EN\">",
|
||||
"output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
|
||||
|
||||
{"description":"DOCTYPE with publicId and systemId",
|
||||
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\" \"-//W3C//DTD HTML Transitional 4.01//EN\">",
|
||||
"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
|
||||
|
||||
{"description":"Incomplete doctype",
|
||||
"input":"<!DOCTYPE html ",
|
||||
"output":["ParseError", ["DOCTYPE", "HTML", true]]},
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
||||
|
||||
{"description":"Numeric entity representing the NUL character",
|
||||
"input":"�",
|
||||
|
@ -30,19 +50,19 @@
|
|||
|
||||
{"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
"output":[["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
"output":[["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Numeric entity representing a Windows-1252 'codepoint'",
|
||||
"input":"‰",
|
||||
"output":[["Character", "\u2030"]]},
|
||||
"output":["ParseError", ["Character", "\u2030"]]},
|
||||
|
||||
{"description":"Hexadecimal entity representing a Windows-1252 'codepoint'",
|
||||
"input":"‰",
|
||||
"output":[["Character", "\u2030"]]},
|
||||
"output":["ParseError", ["Character", "\u2030"]]},
|
||||
|
||||
{"description":"Hexadecimal entity with mixed uppercase and lowercase",
|
||||
"input":"ꯍ",
|
||||
|
@ -58,7 +78,7 @@
|
|||
|
||||
{"description":"StartTag containing <",
|
||||
"input":"<a<b>",
|
||||
"output":["ParseError", ["StartTag", "a", { }], ["StartTag", "b", { }]]},
|
||||
"output":[["StartTag", "a<b", { }]]},
|
||||
|
||||
{"description":"Non-void element containing trailing /",
|
||||
"input":"<h/>",
|
||||
|
|
|
@ -226,7 +226,6 @@ Line1<br>Line2<br>Line3<br>Line4
|
|||
<h1>Hello<h2>World
|
||||
#errors
|
||||
4: missing document type declaration
|
||||
13: h2 element start tag implying h1 element end tag
|
||||
19: mismatched body element end tag (premature end of file?)
|
||||
#document
|
||||
| <html>
|
||||
|
@ -234,8 +233,8 @@ Line1<br>Line2<br>Line3<br>Line4
|
|||
| <body>
|
||||
| <h1>
|
||||
| "Hello"
|
||||
| <h2>
|
||||
| "World"
|
||||
| <h2>
|
||||
| "World"
|
||||
|
||||
#data
|
||||
<a><p>X<a>Y</a>Z</p></a>
|
||||
|
@ -307,13 +306,18 @@ Line1<br>Line2<br>Line3<br>Line4
|
|||
#data
|
||||
<!--><div>--<!-->
|
||||
#errors
|
||||
13: unexpected character after two '-' characters while parsing comment
|
||||
18: missing document type declaration
|
||||
Incorrect comment token
|
||||
Missing document type declaration
|
||||
Incorrect comment token
|
||||
Unexpected end of file
|
||||
#document
|
||||
| <!-- ><div>--<! -->
|
||||
| <!-- -->
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| "--"
|
||||
| <!-- -->
|
||||
|
||||
#data
|
||||
<p><hr></p>
|
||||
|
@ -638,10 +642,10 @@ Line1<br>Line2<br>Line3<br>Line4
|
|||
| <html>
|
||||
| <head>
|
||||
| <script>
|
||||
| " <!-- "
|
||||
| " <!-- </script> --> "
|
||||
| " "
|
||||
| <body>
|
||||
| "--> EOF"
|
||||
| "EOF"
|
||||
|
||||
#data
|
||||
<b><p></b>TEST
|
||||
|
@ -1248,15 +1252,13 @@ Line1<br>Line2<br>Line3<br>Line4
|
|||
#data
|
||||
<style><!--</style><meta><script>--><link></script>
|
||||
#errors
|
||||
7: missing document type declaration
|
||||
missing document type declaration
|
||||
unexpected EOF
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <style>
|
||||
| "<!--"
|
||||
| <meta>
|
||||
| <script>
|
||||
| "--><link>"
|
||||
| "<!--</style><meta><script>--><link></script>"
|
||||
| <body>
|
||||
|
||||
#data
|
||||
|
@ -1305,12 +1307,12 @@ Line1<br>Line2<br>Line3<br>Line4
|
|||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <base>
|
||||
| <link>
|
||||
| <meta>
|
||||
| <title>
|
||||
| "<p>"
|
||||
| <body>
|
||||
| <base>
|
||||
| <link>
|
||||
| <meta>
|
||||
| <p>
|
||||
|
||||
#data
|
||||
|
@ -1381,12 +1383,11 @@ Line1<br>Line2<br>Line3<br>Line4
|
|||
6: missing document type declaration
|
||||
19: unexpected node at end of document
|
||||
19: unexpected node after body element end tag
|
||||
19: meta element start tag out of place
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <meta>
|
||||
| <body>
|
||||
| <meta>
|
||||
| <p>
|
||||
|
||||
#data
|
||||
|
@ -1430,14 +1431,13 @@ Line1<br>Line2<br>Line3<br>Line4
|
|||
<h1><h2>
|
||||
#errors
|
||||
4: missing document type declaration
|
||||
8: h2 element start tag implying h1 element end tag
|
||||
9: mismatched body element end tag (premature end of file?)
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <h1>
|
||||
| <h2>
|
||||
| <h2>
|
||||
|
||||
#data
|
||||
<a><p><a></a></p></a>
|
||||
|
@ -1630,8 +1630,7 @@ Line1<br>Line2<br>Line3<br>Line4
|
|||
4: missing document type declaration
|
||||
15: required tr element start tag implied by unexpected td element start tag
|
||||
27: unexpected td element end tag implied other end tags
|
||||
31: h3 element start tag implying h1 element end tag
|
||||
36: mismatched h1 element end tag
|
||||
Unexpected EOF
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
|
@ -1642,7 +1641,7 @@ Line1<br>Line2<br>Line3<br>Line4
|
|||
| <tr>
|
||||
| <td>
|
||||
| <h3>
|
||||
| <h3>
|
||||
| <h3>
|
||||
|
||||
#data
|
||||
<table><colgroup><col><colgroup><col><col><col><colgroup><col><col><thead><tr><td></table>
|
||||
|
@ -1807,6 +1806,7 @@ Line1<br>Line2<br>Line3<br>Line4
|
|||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <br>
|
||||
|
||||
#data
|
||||
<table><tr></strong></b></em></i></u></strike></s></blink></tt></pre></big></small></font></select></h1></h2></h3></h4></h5></h6></body></br></a></img></title></span></style></script></table></th></td></tr></frame></area></link></param></hr></input></col></base></meta></basefont></bgsound></embed></spacer></p></dd></dt></caption></colgroup></tbody></tfoot></thead></address></blockquote></center></dir></div></dl></fieldset></listing></menu></ol></ul></li></nobr></wbr></form></button></marquee></object></html></frameset></head></iframe></image></isindex></noembed></noframes></noscript></optgroup></option></plaintext></textarea>
|
||||
|
@ -1924,6 +1924,7 @@ Line1<br>Line2<br>Line3<br>Line4
|
|||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <br>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#data
|
||||
<!doctype html>Test
|
||||
<!DOCTYPE HTML>Test
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
|
@ -63,7 +63,7 @@ frame element can't occur here
|
|||
| "test"
|
||||
|
||||
#data
|
||||
<!doctype html><frameset>test
|
||||
<!DOCTYPE HTML><frameset>test
|
||||
#errors
|
||||
frameset can't contain text
|
||||
Unexpected end of file
|
||||
|
@ -74,7 +74,7 @@ Unexpected end of file
|
|||
| <frameset>
|
||||
|
||||
#data
|
||||
<!doctype html><frameset><!doctype html>
|
||||
<!DOCTYPE HTML><frameset><!DOCTYPE HTML>
|
||||
#errors
|
||||
document type declaration can only occur at the start of a document
|
||||
Expected end tag </frameset>
|
||||
|
@ -85,7 +85,7 @@ Expected end tag </frameset>
|
|||
| <frameset>
|
||||
|
||||
#data
|
||||
<!doctype html><font><p><b>test</font>
|
||||
<!DOCTYPE HTML><font><p><b>test</font>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
|
@ -99,7 +99,7 @@ Expected end tag </frameset>
|
|||
| "test"
|
||||
|
||||
#data
|
||||
<!DOCTYPE htmL><dt><div><dd>
|
||||
<!DOCTYPE HTML><dt><div><dd>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
|
@ -151,7 +151,7 @@ Unexpected end of file.
|
|||
| "</plaintext>"
|
||||
|
||||
#data
|
||||
<!doctype html><table><tr>TEST
|
||||
<!DOCTYPE HTML><table><tr>TEST
|
||||
#errors
|
||||
TEST can't occur in <tr>
|
||||
Unexpected end of file.
|
||||
|
@ -166,7 +166,7 @@ Unexpected end of file.
|
|||
| <tr>
|
||||
|
||||
#data
|
||||
<!doctype html><body t1=1><body t2=2><body t3=3 t4=4>
|
||||
<!DOCTYPE HTML><body t1=1><body t2=2><body t3=3 t4=4>
|
||||
#errors
|
||||
Unexpected start tag "body"
|
||||
Unexpected start tag "body"
|
||||
|
@ -193,21 +193,18 @@ Unexpected end tag.
|
|||
| <body>
|
||||
|
||||
#data
|
||||
<!doctype HtML></b test<b &=&>X
|
||||
<!DOCTYPE HTML></b test<b &=&>X
|
||||
#errors
|
||||
Unexpected < in attribute
|
||||
End tag contains attributes.
|
||||
Unexpected end tag.
|
||||
Named entity didn't end with ;
|
||||
Unexpected EOF. Missing closing tag.
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <b>
|
||||
| &="&"
|
||||
| "X"
|
||||
| "X"
|
||||
|
||||
#data
|
||||
<!doctypehtml><scrIPt type=text/x-foobar;baz>X</SCRipt
|
||||
|
@ -215,7 +212,7 @@ Unexpected EOF. Missing closing tag.
|
|||
No space after literal DOCTYPE.
|
||||
Unexpected EOF in (end) tag name
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <script>
|
||||
|
@ -294,7 +291,7 @@ Unfinished named entity.
|
|||
#errors
|
||||
No space after literal DOCTYPE.
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
@ -302,7 +299,7 @@ No space after literal DOCTYPE.
|
|||
| <li>
|
||||
|
||||
#data
|
||||
<!doctypehtml><p><dt>
|
||||
<!doctypeHTML><p><dt>
|
||||
#errors
|
||||
No space after literal DOCTYPE.
|
||||
#document
|
||||
|
@ -314,11 +311,11 @@ No space after literal DOCTYPE.
|
|||
| <dt>
|
||||
|
||||
#data
|
||||
<!doctypehtml><p><dd>
|
||||
<!doctypehtmL><p><dd>
|
||||
#errors
|
||||
No space after literal DOCTYPE.
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <!DOCTYPE htmL>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
@ -331,7 +328,7 @@ No space after literal DOCTYPE.
|
|||
No space after literal DOCTYPE.
|
||||
Unexpected EOF.
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
@ -339,7 +336,7 @@ Unexpected EOF.
|
|||
| <form>
|
||||
|
||||
#data
|
||||
<!doctype html><p><b><i><u></p> <p>X
|
||||
<!DOCTYPE HTML><p><b><i><u></p> <p>X
|
||||
#errors
|
||||
Unexpected end tag </p>.
|
||||
Unexpected end EOF. Missing closing tags.
|
||||
|
@ -360,7 +357,7 @@ Unexpected end EOF. Missing closing tags.
|
|||
| "X"
|
||||
|
||||
#data
|
||||
<!doctype html><p></P>X
|
||||
<!DOCTYPE HTML><p></P>X
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
|
@ -393,7 +390,7 @@ Invalid entity.
|
|||
| "&AMp;"
|
||||
|
||||
#data
|
||||
<!doctype html><html><head></head><body><thisISasillyTESTelementNameToMakeSureCrazyTagNamesArePARSEDcorrectLY>
|
||||
<!DOCTYPE HTML><html><head></head><body><thisISasillyTESTelementNameToMakeSureCrazyTagNamesArePARSEDcorrectLY>
|
||||
#errors
|
||||
Unexpected end of file.
|
||||
#document
|
||||
|
@ -404,7 +401,7 @@ Unexpected end of file.
|
|||
| <thisisasillytestelementnametomakesurecrazytagnamesareparsedcorrectly>
|
||||
|
||||
#data
|
||||
<!doctype html>X</body>X
|
||||
<!DOCTYPE HTML>X</body>X
|
||||
#errors
|
||||
Unexpected non-space characters in the after body phase.
|
||||
#document
|
||||
|
@ -415,7 +412,7 @@ Unexpected non-space characters in the after body phase.
|
|||
| "XX"
|
||||
|
||||
#data
|
||||
<!doctype html><!-- X
|
||||
<!DOCTYPE HTML><!-- X
|
||||
#errors
|
||||
Unexpected end of file in comment.
|
||||
#document
|
||||
|
@ -426,7 +423,7 @@ Unexpected end of file in comment.
|
|||
| <body>
|
||||
|
||||
#data
|
||||
<!doctype html><table><caption>test TEST</caption><td>test
|
||||
<!DOCTYPE HTML><table><caption>test TEST</caption><td>test
|
||||
#errors
|
||||
Unexpected <td> in table body phase.
|
||||
Unexpected end of file.
|
||||
|
@ -444,7 +441,7 @@ Unexpected end of file.
|
|||
| "test"
|
||||
|
||||
#data
|
||||
<!doctype html><select><option><optgroup>
|
||||
<!DOCTYPE HTML><select><option><optgroup>
|
||||
#errors
|
||||
Unexpected end of file. Missing closing tags.
|
||||
#document
|
||||
|
@ -457,7 +454,7 @@ Unexpected end of file. Missing closing tags.
|
|||
| <optgroup>
|
||||
|
||||
#data
|
||||
<!doctype html><select><optgroup><option></optgroup><option><select><option>
|
||||
<!DOCTYPE HTML><select><optgroup><option></optgroup><option><select><option>
|
||||
#errors
|
||||
Unexpected start tag <select> in <select>.
|
||||
Unexpected start tag <option>.
|
||||
|
@ -472,7 +469,7 @@ Unexpected start tag <option>.
|
|||
| <option>
|
||||
|
||||
#data
|
||||
<!doctype html><select><optgroup><option><optgroup>
|
||||
<!DOCTYPE HTML><select><optgroup><option><optgroup>
|
||||
#errors
|
||||
Unexpected end of file. Missing closing tags.
|
||||
#document
|
||||
|
@ -486,7 +483,7 @@ Unexpected end of file. Missing closing tags.
|
|||
| <optgroup>
|
||||
|
||||
#data
|
||||
<!doctype html><font><input><input></font>
|
||||
<!DOCTYPE HTML><font><input><input></font>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
|
@ -498,7 +495,7 @@ Unexpected end of file. Missing closing tags.
|
|||
| <input>
|
||||
|
||||
#data
|
||||
<!DoctypE html><!-- XXX - XXX -->
|
||||
<!DOCTYPE HTML><!-- XXX - XXX -->
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
|
@ -508,7 +505,7 @@ Unexpected end of file. Missing closing tags.
|
|||
| <body>
|
||||
|
||||
#data
|
||||
<!DoctypE html><!-- XXX - XXX
|
||||
<!DOCTYPE HTML><!-- XXX - XXX
|
||||
#errors
|
||||
Unexpected EOF in comment.
|
||||
#document
|
||||
|
@ -519,7 +516,7 @@ Unexpected EOF in comment.
|
|||
| <body>
|
||||
|
||||
#data
|
||||
<!DoctypE html><!-- XXX - XXX - XXX -->
|
||||
<!DOCTYPE HTML><!-- XXX - XXX - XXX -->
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
|
@ -583,7 +580,7 @@ Unexpected EOF.
|
|||
| "X"
|
||||
|
||||
#data
|
||||
<!doctype html><body><title>test</body></title>
|
||||
<!DOCTYPE HTML><body><title>test</body></title>
|
||||
#errors
|
||||
Unexpected start tag that belongs in the head.
|
||||
Expected closing tag after </.
|
||||
|
@ -596,7 +593,7 @@ Expected closing tag after </.
|
|||
| <body>
|
||||
|
||||
#data
|
||||
<!doctype html><body><title>X</title><meta name=z><link rel=foo><style>
|
||||
<!DOCTYPE HTML><body><title>X</title><meta name=z><link rel=foo><style>
|
||||
x { content:"</style" } </style>
|
||||
#errors
|
||||
Unexpected start tag that belongs in head.
|
||||
|
@ -609,17 +606,17 @@ Expected closing tag after </.
|
|||
| <head>
|
||||
| <title>
|
||||
| "X"
|
||||
| <body>
|
||||
| <meta>
|
||||
| name="z"
|
||||
| <link>
|
||||
| rel="foo"
|
||||
| <body>
|
||||
| <style>
|
||||
| "
|
||||
x { content:"</style" } "
|
||||
|
||||
#data
|
||||
<!doctype html><select><optgroup></optgroup></select>
|
||||
<!DOCTYPE HTML><select><optgroup></optgroup></select>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
|
@ -642,7 +639,7 @@ No doctype.
|
|||
| <body>
|
||||
|
||||
#data
|
||||
<!doctype html> <html>
|
||||
<!DOCTYPE HTML> <html>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
|
@ -652,7 +649,7 @@ No doctype.
|
|||
| <body>
|
||||
|
||||
#data
|
||||
<!doctype html><script>
|
||||
<!DOCTYPE HTML><script>
|
||||
</script> <title>x</title> </head>
|
||||
#errors
|
||||
#document
|
||||
|
@ -669,7 +666,7 @@ No doctype.
|
|||
| <body>
|
||||
|
||||
#data
|
||||
<!doctype html><html><body><html id=x>
|
||||
<!DOCTYPE HTML><html><body><html id=x>
|
||||
#errors
|
||||
duplicate html start tag
|
||||
#document
|
||||
|
@ -680,7 +677,7 @@ duplicate html start tag
|
|||
| <body>
|
||||
|
||||
#data
|
||||
<!doctype html>X</body><html id="x">
|
||||
<!DOCTYPE HTML>X</body><html id="x">
|
||||
#errors
|
||||
Unexpected html start tag in the after body phase.
|
||||
html needs to be the first start tag.
|
||||
|
@ -693,7 +690,7 @@ html needs to be the first start tag.
|
|||
| "X"
|
||||
|
||||
#data
|
||||
<!doctype html><head><html id=x>
|
||||
<!DOCTYPE HTML><head><html id=x>
|
||||
#errors
|
||||
html start tag too late
|
||||
#document
|
||||
|
@ -704,7 +701,7 @@ html start tag too late
|
|||
| <body>
|
||||
|
||||
#data
|
||||
<!doctype html>X</html>X
|
||||
<!DOCTYPE HTML>X</html>X
|
||||
#errors
|
||||
Unexpected non-space characters. Expected end of file.
|
||||
Unexpected non-space characters in after body phase. Expected end of file.
|
||||
|
@ -716,7 +713,7 @@ Unexpected non-space characters in after body phase. Expected end of file.
|
|||
| "XX"
|
||||
|
||||
#data
|
||||
<!doctype html>X</html>
|
||||
<!DOCTYPE HTML>X</html>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
|
@ -726,7 +723,7 @@ Unexpected non-space characters in after body phase. Expected end of file.
|
|||
| "X "
|
||||
|
||||
#data
|
||||
<!doctype html>X</html><p>X
|
||||
<!DOCTYPE HTML>X</html><p>X
|
||||
#errors
|
||||
Unexpected start tag <p> in trailing end phase.
|
||||
Unexpected start tag <p> in after body phase.
|
||||
|
@ -740,7 +737,7 @@ Unexpected start tag <p> in after body phase.
|
|||
| "X"
|
||||
|
||||
#data
|
||||
<!doctype html>X<p/x/y/z>
|
||||
<!DOCTYPE HTML>X<p/x/y/z>
|
||||
#errors
|
||||
Solidus (/) incorrectly placed.
|
||||
Solidus (/) incorrectly placed.
|
||||
|
@ -757,7 +754,7 @@ Solidus (/) incorrectly placed.
|
|||
| z=""
|
||||
|
||||
#data
|
||||
<!doctype html><!--x--
|
||||
<!DOCTYPE HTML><!--x--
|
||||
#errors
|
||||
Unexpected end of file in comment.
|
||||
#document
|
||||
|
@ -768,7 +765,7 @@ Unexpected end of file in comment.
|
|||
| <body>
|
||||
|
||||
#data
|
||||
<!doctype html><table><tr><td></p></table>
|
||||
<!DOCTYPE HTML><table><tr><td></p></table>
|
||||
#errors
|
||||
Unexpected </p> end tag.
|
||||
#document
|
||||
|
|
|
@ -49,23 +49,23 @@ No DOCTYPE
|
|||
| <script>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><html><head></head><body><pre>
|
||||
<!DOCTYPE htML><html><head></head><body><pre>
|
||||
</pre></body></html>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <pre>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><html><head></head><body><pre>
|
||||
<!DOCTYPE htML><html><head></head><body><pre>
|
||||
|
||||
foo</pre></body></html>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
@ -74,13 +74,13 @@ foo</pre></body></html>
|
|||
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><html><head></head><body><pre>
|
||||
<!DOCTYPE htML><html><head></head><body><pre>
|
||||
|
||||
foo
|
||||
</pre></body></html>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
@ -89,11 +89,11 @@ foo
|
|||
"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><html><head></head><body><pre>x</pre><span>
|
||||
<!DOCTYPE htML><html><head></head><body><pre>x</pre><span>
|
||||
</span></body></html>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
@ -104,11 +104,11 @@ foo
|
|||
"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><html><head></head><body><pre>x
|
||||
<!DOCTYPE htML><html><head></head><body><pre>x
|
||||
y</pre></body></html>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
@ -117,11 +117,11 @@ y</pre></body></html>
|
|||
y"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><html><head></head><body><pre>x<div>
|
||||
<!DOCTYPE htML><html><head></head><body><pre>x<div>
|
||||
y</pre></body></html>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
@ -132,20 +132,20 @@ y</pre></body></html>
|
|||
| y"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><HTML><META><HEAD></HEAD></HTML>
|
||||
<!DOCTYPE htML><HTML><META><HEAD></HEAD></HTML>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <meta>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><HTML><HEAD><head></HEAD></HTML>
|
||||
<!DOCTYPE htML><HTML><HEAD><head></HEAD></HTML>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
@ -171,23 +171,23 @@ y</pre></body></html>
|
|||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><textarea>
|
||||
<!DOCTYPE htML><textarea>
|
||||
</textarea>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <textarea>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><textarea>
|
||||
<!DOCTYPE htML><textarea>
|
||||
|
||||
foo</textarea>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
@ -195,11 +195,11 @@ foo</textarea>
|
|||
| "foo"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><html><head></head><body><ul><li><div><p><li></ul></body></html>
|
||||
<!DOCTYPE htML><html><head></head><body><ul><li><div><p><li></ul></body></html>
|
||||
#errors
|
||||
Missing end tag (div)
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
@ -208,3 +208,29 @@ Missing end tag (div)
|
|||
| <div>
|
||||
| <p>
|
||||
| <li>
|
||||
|
||||
#data
|
||||
<!doctype html><nobr><nobr><nobr>
|
||||
#errors
|
||||
Unexpected end of file.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <nobr>
|
||||
| <nobr>
|
||||
| <nobr>
|
||||
|
||||
#data
|
||||
<!doctype html><nobr><nobr></nobr><nobr>
|
||||
#errors
|
||||
Unexpected end of file.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <nobr>
|
||||
| <nobr>
|
||||
| <nobr>
|
||||
|
|
120
vendor/plugins/HTML5lib/testdata/tree-construction/tests5.dat
vendored
Normal file
120
vendor/plugins/HTML5lib/testdata/tree-construction/tests5.dat
vendored
Normal file
|
@ -0,0 +1,120 @@
|
|||
#data
|
||||
<style> <!-- </style>x
|
||||
#errors
|
||||
No DOCTYPE
|
||||
Unexpected end of file
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <style>
|
||||
| " <!-- </style>x"
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<style> <!-- </style> --> </style>x
|
||||
#errors
|
||||
No DOCTYPE
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <style>
|
||||
| " <!-- </style> --> "
|
||||
| <body>
|
||||
| "x"
|
||||
|
||||
#data
|
||||
<style> <!--> </style>x
|
||||
#errors
|
||||
No DOCTYPE
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <style>
|
||||
| " <!--> "
|
||||
| <body>
|
||||
| "x"
|
||||
|
||||
#data
|
||||
<style> <!---> </style>x
|
||||
#errors
|
||||
No DOCTYPE
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <style>
|
||||
| " <!---> "
|
||||
| <body>
|
||||
| "x"
|
||||
|
||||
#data
|
||||
<iframe> <!---> </iframe>x
|
||||
#errors
|
||||
No DOCTYPE
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <iframe>
|
||||
| " <!---> "
|
||||
| "x"
|
||||
|
||||
#data
|
||||
<iframe> <!--- </iframe>->x</iframe> --> </iframe>x
|
||||
#errors
|
||||
No DOCTYPE
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <iframe>
|
||||
| " <!--- </iframe>->x</iframe> --> "
|
||||
| "x"
|
||||
|
||||
#data
|
||||
<script> <!-- </script> --> </script>x
|
||||
#errors
|
||||
No DOCTYPE
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <script>
|
||||
| " <!-- </script> --> "
|
||||
| <body>
|
||||
| "x"
|
||||
|
||||
#data
|
||||
<title> <!-- </title> --> </title>x
|
||||
#errors
|
||||
No DOCTYPE
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <title>
|
||||
| " <!-- </title> --> "
|
||||
| <body>
|
||||
| "x"
|
||||
|
||||
#data
|
||||
<textarea> <!--- </textarea>->x</textarea> --> </textarea>x
|
||||
#errors
|
||||
No DOCTYPE
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <textarea>
|
||||
| " <!--- </textarea>->x</textarea> --> "
|
||||
| "x"
|
||||
|
||||
#data
|
||||
<style> <!</-- </style>x
|
||||
#errors
|
||||
No DOCTYPE
|
||||
Unexpected end of file
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <style>
|
||||
| " <!</-- "
|
||||
| <body>
|
||||
| "x"
|
29
vendor/plugins/HTML5lib/testdata/tree-construction/tests6.dat
vendored
Normal file
29
vendor/plugins/HTML5lib/testdata/tree-construction/tests6.dat
vendored
Normal file
|
@ -0,0 +1,29 @@
|
|||
#data
|
||||
<!doctype html></head> <head>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| " "
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!doctype html></html> <head>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| " "
|
||||
|
||||
#data
|
||||
<!doctype html></body><meta>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <meta>
|
1
vendor/plugins/HTML5lib/tests/preamble.rb
vendored
1
vendor/plugins/HTML5lib/tests/preamble.rb
vendored
|
@ -24,6 +24,7 @@ rescue LoadError
|
|||
def self.parse json
|
||||
json.gsub!(/"\s*:/, '"=>')
|
||||
json.gsub!(/\\u[0-9a-fA-F]{4}/) {|x| [x[2..-1].to_i(16)].pack('U')}
|
||||
null = nil
|
||||
eval json
|
||||
end
|
||||
end
|
||||
|
|
4
vendor/plugins/HTML5lib/tests/test_lxp.rb
vendored
4
vendor/plugins/HTML5lib/tests/test_lxp.rb
vendored
|
@ -191,13 +191,13 @@ EOX
|
|||
end
|
||||
|
||||
def test_br
|
||||
assert_xhtml_equal <<EOX
|
||||
assert_xhtml_equal <<EOX1
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head><title>XLINK</title></head>
|
||||
<body>
|
||||
<br/>
|
||||
</body></html>
|
||||
EOX
|
||||
EOX1
|
||||
end
|
||||
|
||||
def xtest_strong
|
||||
|
|
8
vendor/plugins/HTML5lib/tests/test_parser.rb
vendored
8
vendor/plugins/HTML5lib/tests/test_parser.rb
vendored
|
@ -12,7 +12,7 @@ begin
|
|||
rescue LoadError
|
||||
end
|
||||
|
||||
$CHECK_PARSER_ERRORS = ARGV.delete('-p')
|
||||
$CHECK_PARSER_ERRORS = ARGV.delete('-p') # TODO
|
||||
|
||||
puts 'Testing tree builders: ' + $tree_types_to_test * ', '
|
||||
|
||||
|
@ -45,9 +45,9 @@ class Html5ParserTestCase < Test::Unit::TestCase
|
|||
actual_output = convertTreeDump(parser.tree.testSerializer(parser.tree.document))
|
||||
|
||||
assert_equal sortattrs(expected_output), sortattrs(actual_output), [
|
||||
'Input:', input,
|
||||
'Expected:', expected_output,
|
||||
'Recieved:', actual_output
|
||||
'', 'Input:', input,
|
||||
'', 'Expected:', expected_output,
|
||||
'', 'Recieved:', actual_output
|
||||
].join("\n")
|
||||
|
||||
if $CHECK_PARSER_ERRORS
|
||||
|
|
|
@ -30,7 +30,7 @@ class SanitizeTest < Test::Unit::TestCase
|
|||
:use_trailing_solidus => true,
|
||||
:omit_optional_tags => false,
|
||||
:inject_meta_charset => false,
|
||||
:sanitize => true}).gsub(/^<div xmlns='http:\/\/www.w3.org\/1999\/xhtml'>(.*)<\/div>$/, '\1')
|
||||
:sanitize => true}).gsub(/\A<div xmlns='http:\/\/www.w3.org\/1999\/xhtml'>(.*)<\/div>\Z/m, '\1')
|
||||
rescue REXML::ParseException
|
||||
return "Ill-formed XHTML!"
|
||||
end
|
||||
|
@ -65,6 +65,7 @@ class SanitizeTest < Test::Unit::TestCase
|
|||
elsif VOID_ELEMENTS.include?(tag_name)
|
||||
htmloutput = "<#{tag_name} title='1'/>foo <bad>bar</bad> baz"
|
||||
xhtmloutput = htmloutput
|
||||
htmloutput += '<br/>' if tag_name == 'br'
|
||||
rexmloutput = "<#{tag_name} title='1' />"
|
||||
end
|
||||
check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
|
||||
|
|
12
vendor/plugins/HTML5lib/tests/test_serializer.rb
vendored
12
vendor/plugins/HTML5lib/tests/test_serializer.rb
vendored
|
@ -49,6 +49,18 @@ class Html5SerializeTestcase < Test::Unit::TestCase
|
|||
elsif !expected.include?(result)
|
||||
flunk("Expected: #{expected.inspect}, Received: #{result.inspect}")
|
||||
end
|
||||
|
||||
return if test_name == 'optionaltags'
|
||||
|
||||
result = HTML5lib::XHTMLSerializer.
|
||||
serialize(JsonWalker.new(test["input"]), (test["options"] || {}))
|
||||
expected = test["xhtml"] || test["expected"]
|
||||
if expected.length == 1
|
||||
assert_equal(expected[0], result, test["description"])
|
||||
elsif !expected.include?(result)
|
||||
flunk("Expected: #{expected.inspect}, Received: #{result.inspect}")
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
|
6
vendor/plugins/HTML5lib/tests/test_stream.rb
vendored
6
vendor/plugins/HTML5lib/tests/test_stream.rb
vendored
|
@ -52,13 +52,11 @@ class HTMLInputStreamTest < Test::Unit::TestCase
|
|||
|
||||
def test_newlines
|
||||
stream = HTMLInputStream.new("\xef\xbb\xbf" + "a\nbb\r\nccc\rdddd")
|
||||
assert_equal(0, stream.instance_eval {@tell})
|
||||
assert_equal([1,0], stream.position)
|
||||
assert_equal("a\nbb\n", stream.chars_until('c'))
|
||||
assert_equal(6, stream.instance_eval {@tell})
|
||||
assert_equal([3,0], stream.position)
|
||||
assert_equal("ccc\ndddd", stream.chars_until('x'))
|
||||
assert_equal(14, stream.instance_eval {@tell})
|
||||
assert_equal([4,4], stream.position)
|
||||
assert_equal([0,1,5,9], stream.instance_eval {@new_lines})
|
||||
assert_equal([1,2,3], stream.instance_eval {@line_lengths})
|
||||
end
|
||||
end
|
||||
|
|
|
@ -30,9 +30,10 @@ class Html5TokenizerTestCase < Test::Unit::TestCase
|
|||
def tokenizer_test(data)
|
||||
(data['contentModelFlags'] || [:PCDATA]).each do |content_model_flag|
|
||||
message = [
|
||||
'Description:', data['description'],
|
||||
'Input:', data['input'],
|
||||
'Content Model Flag:', content_model_flag ] * "\n"
|
||||
'', 'Description:', data['description'],
|
||||
'', 'Input:', data['input'],
|
||||
'', 'Content Model Flag:', content_model_flag,
|
||||
'' ] * "\n"
|
||||
|
||||
assert_nothing_raised message do
|
||||
tokenizer = HTML5lib::HTMLTokenizer.new(data['input'])
|
||||
|
|
|
@ -11,9 +11,9 @@ $tree_types_to_test = {
|
|||
'rexml' =>
|
||||
{:builder => HTML5lib::TreeBuilders['rexml'],
|
||||
:walker => HTML5lib::TreeWalkers['rexml']},
|
||||
# 'hpricot' =>
|
||||
# {:builder => HTML5lib::TreeBuilders['hpricot'],
|
||||
# :walker => HTML5lib::TreeWalkers['hpricot']},
|
||||
'hpricot' =>
|
||||
{:builder => HTML5lib::TreeBuilders['hpricot'],
|
||||
:walker => HTML5lib::TreeWalkers['hpricot']},
|
||||
}
|
||||
|
||||
puts 'Testing tree walkers: ' + $tree_types_to_test.keys * ', '
|
||||
|
@ -46,7 +46,7 @@ class TestTreeWalkers < Test::Unit::TestCase
|
|||
output = []
|
||||
indent = 0
|
||||
concatenateCharacterTokens(tokens) do |token|
|
||||
case token[:type]
|
||||
case token[:type]
|
||||
when :StartTag, :EmptyTag
|
||||
output << "#{' '*indent}<#{token[:name]}>"
|
||||
indent += 2
|
||||
|
@ -65,7 +65,7 @@ class TestTreeWalkers < Test::Unit::TestCase
|
|||
output << "#{' '*indent}\"#{token[:data]}\""
|
||||
else
|
||||
# TODO: what to do with errors?
|
||||
end
|
||||
end
|
||||
end
|
||||
return output.join("\n")
|
||||
end
|
||||
|
@ -73,6 +73,7 @@ class TestTreeWalkers < Test::Unit::TestCase
|
|||
html5lib_test_files('tree-construction').each do |test_file|
|
||||
|
||||
test_name = File.basename(test_file).sub('.dat', '')
|
||||
next if test_name == 'tests5' # TODO
|
||||
|
||||
File.read(test_file).split("#data\n").each_with_index do |data, index|
|
||||
next if data.empty?
|
||||
|
@ -80,12 +81,11 @@ class TestTreeWalkers < Test::Unit::TestCase
|
|||
innerHTML, input, expected_output, expected_errors =
|
||||
HTML5lib::TestSupport::parseTestcase(data)
|
||||
|
||||
rexml = $tree_types_to_test['rexml']
|
||||
$tree_types_to_test.each do |tree_name, treeClass|
|
||||
$tree_types_to_test.each do |tree_name, tree_class|
|
||||
|
||||
define_method "test_#{test_name}_#{index}_#{tree_name}" do
|
||||
|
||||
parser = HTML5lib::HTMLParser.new(:tree => treeClass[:builder])
|
||||
parser = HTML5lib::HTMLParser.new(:tree => tree_class[:builder])
|
||||
|
||||
if innerHTML
|
||||
parser.parseFragment(input, innerHTML)
|
||||
|
@ -96,10 +96,13 @@ class TestTreeWalkers < Test::Unit::TestCase
|
|||
document = parser.tree.getDocument
|
||||
|
||||
begin
|
||||
output = sortattrs(convertTokens(treeClass[:walker].new(document)))
|
||||
output = sortattrs(convertTokens(tree_class[:walker].new(document)))
|
||||
expected = sortattrs(expected_output)
|
||||
errorMsg = "\n\nExpected:\n#{expected}\nRecieved:\n#{output}\n"
|
||||
assert_equal(expected, output, errorMsg)
|
||||
assert_equal expected, output, [
|
||||
'', 'Input:', input,
|
||||
'', 'Expected:', expected,
|
||||
'', 'Recieved:', output
|
||||
].join("\n")
|
||||
rescue NotImplementedError
|
||||
# Amnesty for those that confess...
|
||||
end
|
||||
|
|
|
@ -18,7 +18,8 @@ class TokenizerTestParser
|
|||
end
|
||||
|
||||
def processDoctype(token)
|
||||
@outputTokens.push(["DOCTYPE", token[:name], token[:data]])
|
||||
@outputTokens.push(["DOCTYPE", token[:name], token[:publicId],
|
||||
token[:systemId], token[:correct]])
|
||||
end
|
||||
|
||||
def processStartTag(token)
|
||||
|
|
Loading…
Reference in a new issue