Minor S5 tweaks and Sync with Latest HTML5lib

This commit is contained in:
Jacques Distler 2007-08-30 12:19:10 -05:00
parent dbed460843
commit 81d3cdc8e4
81 changed files with 9887 additions and 1687 deletions

View file

@ -18,7 +18,7 @@ xml.feed('xmlns' => "http://www.w3.org/2005/Atom", "xml:lang" => 'en') do
xml.name(page.author) xml.name(page.author)
end end
if @hide_description if @hide_description
xml.summary('Content suppressed.', 'type' => 'text') xml.summary("Updated by #{page.author} on #{page.updated_at.getgm.strftime("%Y-%m-%d")} at #{page.updated_at.getgm.strftime("%H:%M:%SZ")}.", 'type' => 'text')
else else
xml.content('type' => 'xhtml', 'xml:base' => url_for(:only_path => false, :web => @web_name, :action => @link_action, :id => page.name) ) do xml.content('type' => 'xhtml', 'xml:base' => url_for(:only_path => false, :web => @web_name, :action => @link_action, :id => page.name) ) do
xml.div('xmlns' => 'http://www.w3.org/1999/xhtml' ) do xml.div('xmlns' => 'http://www.w3.org/1999/xhtml' ) do

View file

@ -16,4 +16,4 @@ table.plaintable {
text-align:center; text-align:center;
margin-left:30px; margin-left:30px;
} }
.noborder td, .noborder th {border:0}

View file

@ -1,6 +1,6 @@
/* Following are the presentation styles -- edit away! */ /* Following are the presentation styles -- edit away! */
body {background: #FFF; color: #000; font-size: 2em;} body {background: #FFF; color: #000; font-size: 1.6em;}
:link, :visited {text-decoration: none; color: #00C;} :link, :visited {text-decoration: none; color: #00C;}
#controls :active {color: #8A8 !important;} #controls :active {color: #8A8 !important;}
#controls :focus {outline: 1px dotted #272;} #controls :focus {outline: 1px dotted #272;}

View file

@ -0,0 +1,64 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN" "http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f.dtd" >
<!-- Do not edit this document! The system will likely break if you do. -->
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Notes</title>
<link rel="stylesheet" href="default/notes.css" type="text/css" />
<script type="text/javascript">
// <![CDATA[
document.onkeyup = opener.keys;
document.onkeypress = opener.trap;
document.onclick = opener.clicker;
// ]]>
</script>
</head>
<body onload="opener.s5NotesWindowLoaded=true;" onunload="opener.s5NotesWindowLoaded=false;">
<div class="timers" id="elapsed">
<h1>
<a href="#" onclick="opener.minimizeTimer('elapsed'); return false;">Elapsed Time</a>
</h1>
<ul>
<li>
<h2>Presentation</h2>
<span class="clock" id="elapsed-presentation">00:00:00</span>
</li>
<li>
<h2>Current Slide</h2>
<span class="clock" id="elapsed-slide">00:00:00</span>
</li>
</ul>
<div class="controls">
<a href="#reset-elapsed" onclick="opener.resetElapsedTime(); return false;" title="Reset Elapsed Time">|&larr;</a>
</div>
</div>
<div class="timers" id="remaining">
<h1>
<a href="#" onclick="opener.minimizeTimer('remaining'); return false;">Remaining Time</a>
</h1>
<p>
<a href="#subtract-remaining" class="control" id="minus" onclick="opener.alterRemainingTime('-5'); return false;" title="Subtract 5 Minutes">-</a>
<span class="clock" id="timeLeft">00:00:00</span>
<a href="#add-remaining" class="control" id="plus" onclick="opener.alterRemainingTime('5'); return false;" title="Add 5 Minutes">+</a>
</p>
<div class="controls">
<form action="#" onsubmit="opener.resetRemainingTime(); return false;">
<input type="text" class="text" id="startFrom" value="0" size="4" maxlength="4" />
<a href="#toggle-remaining" onclick="opener.toggleRemainingTime(); return false;" title="Pause/Run Remaining Time">||</a>
<a href="#reset-remaining" onclick="opener.resetRemainingTime(); return false;" title="Reset Remaining Time">|&larr;</a>
</form>
</div>
</div>
<h2 id="slide">...</h2>
<div id="notes"></div>
<h2 id="next">...</h2>
<div id="nextnotes"></div>
</body>
</html>

5
vendor/plugins/HTML5lib/History.txt vendored Normal file
View file

@ -0,0 +1,5 @@
== 0.1.0 / 2007-08-07
* 1 major enhancement
* Birthday!

59
vendor/plugins/HTML5lib/Manifest.txt vendored Normal file
View file

@ -0,0 +1,59 @@
History.txt
Manifest.txt
README
Rakefile.rb
lib/html5.rb
lib/html5/constants.rb
lib/html5/filters/base.rb
lib/html5/filters/inject_meta_charset.rb
lib/html5/filters/optionaltags.rb
lib/html5/filters/sanitizer.rb
lib/html5/filters/whitespace.rb
lib/html5/html5parser.rb
lib/html5/html5parser/after_body_phase.rb
lib/html5/html5parser/after_frameset_phase.rb
lib/html5/html5parser/after_head_phase.rb
lib/html5/html5parser/before_head_phase.rb
lib/html5/html5parser/in_body_phase.rb
lib/html5/html5parser/in_caption_phase.rb
lib/html5/html5parser/in_cell_phase.rb
lib/html5/html5parser/in_column_group_phase.rb
lib/html5/html5parser/in_frameset_phase.rb
lib/html5/html5parser/in_head_phase.rb
lib/html5/html5parser/in_row_phase.rb
lib/html5/html5parser/in_select_phase.rb
lib/html5/html5parser/in_table_body_phase.rb
lib/html5/html5parser/in_table_phase.rb
lib/html5/html5parser/initial_phase.rb
lib/html5/html5parser/phase.rb
lib/html5/html5parser/root_element_phase.rb
lib/html5/html5parser/trailing_end_phase.rb
lib/html5/inputstream.rb
lib/html5/liberalxmlparser.rb
lib/html5/sanitizer.rb
lib/html5/serializer.rb
lib/html5/serializer/htmlserializer.rb
lib/html5/serializer/xhtmlserializer.rb
lib/html5/tokenizer.rb
lib/html5/treebuilders.rb
lib/html5/treebuilders/base.rb
lib/html5/treebuilders/hpricot.rb
lib/html5/treebuilders/rexml.rb
lib/html5/treebuilders/simpletree.rb
lib/html5/treewalkers.rb
lib/html5/treewalkers/base.rb
lib/html5/treewalkers/hpricot.rb
lib/html5/treewalkers/rexml.rb
lib/html5/treewalkers/simpletree.rb
lib/html5/version.rb
parse.rb
tests/preamble.rb
tests/test_encoding.rb
tests/test_lxp.rb
tests/test_parser.rb
tests/test_sanitizer.rb
tests/test_serializer.rb
tests/test_stream.rb
tests/test_tokenizer.rb
tests/test_treewalkers.rb
tests/tokenizer_test_parser.rb

View file

@ -1,9 +1,45 @@
= HTML5lib html5
by Ryan King, et al
http://code.google.com/p/html5lib
== Basic Usage == DESCRIPTION:
require 'html5lib' A ruby implementation of the parsing algorithm in HTML5.
doc = HTML5lib.parse('<html>...</html>')
doc.class # REXML::Document == FEATURES/PROBLEMS:
== SYNOPSIS:
TODO
== REQUIREMENTS:
* chardet, only tested with 0.9.0
== INSTALL:
* sudo gem install html5
== LICENSE:
Copyright (c) 2006-2007 The Authors
Contributers:
James Graham - jg307@cam.ac.uk
Anne van Kesteren - annevankesteren@gmail.com
Lachlan Hunt - lachlan.hunt@lachy.id.au
Matt McDonald - kanashii@kanashii.ca
Sam Ruby - rubys@intertwingly.net
Ian Hickson (Google) - ian@hixie.ch
Thomas Broyer - t.broyer@ltgt.net
Jacques Distler - distler@golem.ph.utexas.edu
Ryan King - ryan@theryanking.com
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View file

@ -1,7 +1,33 @@
require 'rake' require 'rake'
require 'rake/testtask' require 'hoe'
require 'lib/html5/version'
Rake::TestTask.new do |task| Hoe.new("html5", HTML5::VERSION) do |p|
task.pattern = 'tests/test_*.rb' p.name = "html5"
task.verbose = true p.description = p.paragraphs_of('README', 2..5).join("\n\n")
p.summary = "HTML5 parser/tokenizer."
p.author = ['Ryan King'] # TODO: add more names
p.email = 'ryan@theryanking.com'
p.url = 'http://code.google.com/p/html5lib'
p.need_zip = true
p.extra_deps << ['chardet', '>= 0.9.0']
p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
end end
require 'rcov/rcovtask'
namespace :test do
namespace :coverage do
desc "Delete aggregate coverage data."
task(:clean) { rm_f "coverage.data" }
end
desc 'Aggregate code coverage for unit, functional and integration tests'
Rcov::RcovTask.new(:coverage => "test:coverage:clean") do |t|
t.libs << "tests"
t.test_files = FileList["tests/test_*.rb"]
t.output_dir = "tests/coverage/"
t.verbose = true
end
end

215
vendor/plugins/HTML5lib/bin/html5 vendored Executable file
View file

@ -0,0 +1,215 @@
#!/usr/bin/env ruby
$:.unshift File.dirname(__FILE__), 'lib'
def parse(opts, args)
encoding = nil
f = args[-1]
if f
begin
if f[0..6] == 'http://'
require 'open-uri'
f = URI.parse(f).open
encoding = f.charset
elsif f == '-'
f = $stdin
else
f = open(f)
end
rescue
end
else
$stderr.write("No filename provided. Use -h for help\n")
exit(1)
end
require 'html5/treebuilders'
treebuilder = HTML5::TreeBuilders[opts.treebuilder]
if opts.output == :xml
require 'html5/liberalxmlparser'
p = HTML5::XMLParser.new(:tree=>treebuilder)
else
require 'html5/html5parser'
p = HTML5::HTMLParser.new(:tree=>treebuilder)
end
if opts.parsemethod == :parse
args = [f, encoding]
else
args = [f, 'div', encoding]
end
if opts.profile
require 'profiler'
Profiler__::start_profile
p.send(opts.parsemethod, *args)
Profiler__::stop_profile
Profiler__::print_profile($stderr)
elsif opts.time
require 'time' # TODO: switch to benchmark
t0 = Time.new
document = p.send(opts.parsemethod, *args)
t1 = Time.new
print_output(p, document, opts)
t2 = Time.new
puts "\n\nRun took: %fs (plus %fs to print the output)"%[t1-t0, t2-t1]
else
document = p.send(opts.parsemethod, *args)
print_output(p, document, opts)
end
end
def print_output(parser, document, opts)
puts "Encoding: #{parser.tokenizer.stream.char_encoding}" if opts.encoding
case opts.output
when :xml
print document
when :html
require 'html5/treewalkers'
tokens = HTML5::TreeWalkers[opts.treebuilder].new(document)
require 'html5/serializer'
puts HTML5::HTMLSerializer.serialize(tokens, opts.serializer)
when :hilite
print document.hilite
when :tree
document = [document] unless document.respond_to?(:each)
document.each {|fragment| puts parser.tree.testSerializer(fragment)}
end
if opts.error
errList=[]
for pos, message in parser.errors
errList << ("Line %i Col %i"%pos + " " + message)
end
$stdout.write("\nParse errors:\n" + errList.join("\n")+"\n")
end
end
require 'ostruct'
options = OpenStruct.new
options.profile = false
options.time = false
options.output = :html
options.treebuilder = 'simpletree'
options.error = false
options.encoding = false
options.parsemethod = :parse
options.serializer = {
:encoding => 'utf-8',
:omit_optional_tags => false,
:inject_meta_charset => false
}
require 'optparse'
opts = OptionParser.new do |opts|
opts.separator ""
opts.separator "Parse Options:"
opts.on("-b", "--treebuilder NAME") do |treebuilder|
options.treebuilder = treebuilder
end
opts.on("-f", "--fragment", "Parse as a fragment") do |parse|
options.parsemethod = :parse_fragment
end
opts.separator ""
opts.separator "Filter Options:"
opts.on("--[no-]inject-meta-charset", "inject <meta charset>") do |inject|
options.serializer[:inject_meta_charset] = inject
end
opts.on("--[no-]strip-whitespace", "strip unnecessary whitespace") do |strip|
options.serializer[:strip_whitespace] = strip
end
opts.on("--[no-]sanitize", "escape unsafe tags") do |sanitize|
options.serializer[:sanitize] = sanitize
end
opts.separator ""
opts.separator "Output Options:"
opts.on("--tree", "output as debug tree") do |tree|
options.output = :tree
end
opts.on("-x", "--xml", "output as xml") do |xml|
options.output = :xml
options.treebuilder = "rexml"
end
opts.on("--[no-]html", "Output as html") do |html|
options.output = (html ? :html : nil)
end
opts.on("--hilite", "Output as formatted highlighted code.") do |hilite|
options.output = :hilite
end
opts.on("-e", "--error", "Print a list of parse errors") do |error|
options.error = error
end
opts.separator ""
opts.separator "Serialization Options:"
opts.on("--[no-]omit-optional-tags", "Omit optional tags") do |omit|
options.serializer[:omit_optional_tags] = omit
end
opts.on("--[no-]quote-attr-values", "Quote attribute values") do |quote|
options.serializer[:quote_attr_values] = quote
end
opts.on("--[no-]use-best-quote-char", "Use best quote character") do |best|
options.serializer[:use_best_quote_char] = best
end
opts.on("--quote-char C", "Use specified quote character") do |c|
options.serializer[:quote_char] = c
end
opts.on("--[no-]minimize-boolean-attributes", "Minimize boolean attributes") do |min|
options.serializer[:minimize_boolean_attributes] = min
end
opts.on("--[no-]use-trailing-solidus", "Use trailing solidus") do |slash|
options.serializer[:use_trailing_solidus] = slash
end
opts.on("--[no-]escape-lt-in-attrs", "Escape less than signs in attribute values") do |lt|
options.serializer[:escape_lt_in_attrs] = lt
end
opts.on("--[no-]escape-rcdata", "Escape rcdata element values") do |rcdata|
options.serializer[:escape_rcdata] = rcdata
end
opts.separator ""
opts.separator "Other Options:"
opts.on("-p", "--[no-]profile", "Profile the run") do |profile|
options.profile = profile
end
opts.on("-t", "--[no-]time", "Time the run") do |time|
options.time = time
end
opts.on("-c", "--[no-]encoding", "Print character encoding used") do |encoding|
options.encoding = encoding
end
opts.on_tail("-h", "--help", "Show this message") do
puts opts
exit
end
end
opts.parse!(ARGV)
parse options, ARGV

View file

@ -1,11 +1,13 @@
require 'html5/html5parser' require 'html5/html5parser'
require 'html5/version'
module HTML5 module HTML5
def self.parse(stream, options={})
HTMLParser.parse(stream, options)
end
def self.parseFragment(stream, options={}) def self.parse(stream, options={})
HTMLParser.parse(stream, options) HTMLParser.parse(stream, options)
end end
def self.parse_fragment(stream, options={})
HTMLParser.parse(stream, options)
end
end end

View file

@ -161,23 +161,24 @@ module HTML5
] ]
BOOLEAN_ATTRIBUTES = { BOOLEAN_ATTRIBUTES = {
:global => %w[irrelevant], :global => %w[irrelevant],
'style' => %w[scoped], 'style' => %w[scoped],
'img' => %w[ismap], 'img' => %w[ismap],
'audio' => %w[autoplay controls], 'audio' => %w[autoplay controls],
'video' => %w[autoplay controls], 'video' => %w[autoplay controls],
'script' => %w[defer async], 'script' => %w[defer async],
'details' => %w[open], 'details' => %w[open],
'datagrid' => %w[multiple disabled], 'datagrid' => %w[multiple disabled],
'command' => %w[hidden disabled checked default], 'command' => %w[hidden disabled checked default],
'menu' => %w[autosubmit], 'menu' => %w[autosubmit],
'fieldset' => %w[disabled readonly], 'fieldset' => %w[disabled readonly],
'option' => %w[disabled readonly selected], 'option' => %w[disabled readonly selected],
'optgroup' => %w[disabled readonly], 'optgroup' => %w[disabled readonly],
'button' => %w[disabled autofocus], 'button' => %w[disabled autofocus],
'input' => %w[disabled readonly required autofocus checked ismap], 'input' => %w[disabled readonly required autofocus checked ismap],
'select' => %w[disabled readonly autofocus multiple], 'select' => %w[disabled readonly autofocus multiple],
'output' => %w[disabled readonly] 'output' => %w[disabled readonly]
} }
# entitiesWindows1252 has to be _ordered_ and needs to have an index. # entitiesWindows1252 has to be _ordered_ and needs to have an index.
@ -227,372 +228,372 @@ module HTML5
# print ' ENTITIES = {\n ' + ',\n '.join(list) + '\n }' # print ' ENTITIES = {\n ' + ',\n '.join(list) + '\n }'
ENTITIES = { ENTITIES = {
'AElig' => "\xc3\x86", 'AElig' => "\xc3\x86",
'AElig;' => "\xc3\x86", 'AElig;' => "\xc3\x86",
'AMP' => '&', 'AMP' => '&',
'AMP;' => '&', 'AMP;' => '&',
'Aacute' => "\xc3\x81", 'Aacute' => "\xc3\x81",
'Aacute;' => "\xc3\x81", 'Aacute;' => "\xc3\x81",
'Acirc' => "\xc3\x82", 'Acirc' => "\xc3\x82",
'Acirc;' => "\xc3\x82", 'Acirc;' => "\xc3\x82",
'Agrave' => "\xc3\x80", 'Agrave' => "\xc3\x80",
'Agrave;' => "\xc3\x80", 'Agrave;' => "\xc3\x80",
'Alpha;' => "\xce\x91", 'Alpha;' => "\xce\x91",
'Aring' => "\xc3\x85", 'Aring' => "\xc3\x85",
'Aring;' => "\xc3\x85", 'Aring;' => "\xc3\x85",
'Atilde' => "\xc3\x83", 'Atilde' => "\xc3\x83",
'Atilde;' => "\xc3\x83", 'Atilde;' => "\xc3\x83",
'Auml' => "\xc3\x84", 'Auml' => "\xc3\x84",
'Auml;' => "\xc3\x84", 'Auml;' => "\xc3\x84",
'Beta;' => "\xce\x92", 'Beta;' => "\xce\x92",
'COPY' => "\xc2\xa9", 'COPY' => "\xc2\xa9",
'COPY;' => "\xc2\xa9", 'COPY;' => "\xc2\xa9",
'Ccedil' => "\xc3\x87", 'Ccedil' => "\xc3\x87",
'Ccedil;' => "\xc3\x87", 'Ccedil;' => "\xc3\x87",
'Chi;' => "\xce\xa7", 'Chi;' => "\xce\xa7",
'Dagger;' => "\xe2\x80\xa1", 'Dagger;' => "\xe2\x80\xa1",
'Delta;' => "\xce\x94", 'Delta;' => "\xce\x94",
'ETH' => "\xc3\x90", 'ETH' => "\xc3\x90",
'ETH;' => "\xc3\x90", 'ETH;' => "\xc3\x90",
'Eacute' => "\xc3\x89", 'Eacute' => "\xc3\x89",
'Eacute;' => "\xc3\x89", 'Eacute;' => "\xc3\x89",
'Ecirc' => "\xc3\x8a", 'Ecirc' => "\xc3\x8a",
'Ecirc;' => "\xc3\x8a", 'Ecirc;' => "\xc3\x8a",
'Egrave' => "\xc3\x88", 'Egrave' => "\xc3\x88",
'Egrave;' => "\xc3\x88", 'Egrave;' => "\xc3\x88",
'Epsilon;' => "\xce\x95", 'Epsilon;' => "\xce\x95",
'Eta;' => "\xce\x97", 'Eta;' => "\xce\x97",
'Euml' => "\xc3\x8b", 'Euml' => "\xc3\x8b",
'Euml;' => "\xc3\x8b", 'Euml;' => "\xc3\x8b",
'GT' => '>', 'GT' => '>',
'GT;' => '>', 'GT;' => '>',
'Gamma;' => "\xce\x93", 'Gamma;' => "\xce\x93",
'Iacute' => "\xc3\x8d", 'Iacute' => "\xc3\x8d",
'Iacute;' => "\xc3\x8d", 'Iacute;' => "\xc3\x8d",
'Icirc' => "\xc3\x8e", 'Icirc' => "\xc3\x8e",
'Icirc;' => "\xc3\x8e", 'Icirc;' => "\xc3\x8e",
'Igrave' => "\xc3\x8c", 'Igrave' => "\xc3\x8c",
'Igrave;' => "\xc3\x8c", 'Igrave;' => "\xc3\x8c",
'Iota;' => "\xce\x99", 'Iota;' => "\xce\x99",
'Iuml' => "\xc3\x8f", 'Iuml' => "\xc3\x8f",
'Iuml;' => "\xc3\x8f", 'Iuml;' => "\xc3\x8f",
'Kappa;' => "\xce\x9a", 'Kappa;' => "\xce\x9a",
'LT' => '<', 'LT' => '<',
'LT;' => '<', 'LT;' => '<',
'Lambda;' => "\xce\x9b", 'Lambda;' => "\xce\x9b",
'Mu;' => "\xce\x9c", 'Mu;' => "\xce\x9c",
'Ntilde' => "\xc3\x91", 'Ntilde' => "\xc3\x91",
'Ntilde;' => "\xc3\x91", 'Ntilde;' => "\xc3\x91",
'Nu;' => "\xce\x9d", 'Nu;' => "\xce\x9d",
'OElig;' => "\xc5\x92", 'OElig;' => "\xc5\x92",
'Oacute' => "\xc3\x93", 'Oacute' => "\xc3\x93",
'Oacute;' => "\xc3\x93", 'Oacute;' => "\xc3\x93",
'Ocirc' => "\xc3\x94", 'Ocirc' => "\xc3\x94",
'Ocirc;' => "\xc3\x94", 'Ocirc;' => "\xc3\x94",
'Ograve' => "\xc3\x92", 'Ograve' => "\xc3\x92",
'Ograve;' => "\xc3\x92", 'Ograve;' => "\xc3\x92",
'Omega;' => "\xce\xa9", 'Omega;' => "\xce\xa9",
'Omicron;' => "\xce\x9f", 'Omicron;' => "\xce\x9f",
'Oslash' => "\xc3\x98", 'Oslash' => "\xc3\x98",
'Oslash;' => "\xc3\x98", 'Oslash;' => "\xc3\x98",
'Otilde' => "\xc3\x95", 'Otilde' => "\xc3\x95",
'Otilde;' => "\xc3\x95", 'Otilde;' => "\xc3\x95",
'Ouml' => "\xc3\x96", 'Ouml' => "\xc3\x96",
'Ouml;' => "\xc3\x96", 'Ouml;' => "\xc3\x96",
'Phi;' => "\xce\xa6", 'Phi;' => "\xce\xa6",
'Pi;' => "\xce\xa0", 'Pi;' => "\xce\xa0",
'Prime;' => "\xe2\x80\xb3", 'Prime;' => "\xe2\x80\xb3",
'Psi;' => "\xce\xa8", 'Psi;' => "\xce\xa8",
'QUOT' => '"', 'QUOT' => '"',
'QUOT;' => '"', 'QUOT;' => '"',
'REG' => "\xc2\xae", 'REG' => "\xc2\xae",
'REG;' => "\xc2\xae", 'REG;' => "\xc2\xae",
'Rho;' => "\xce\xa1", 'Rho;' => "\xce\xa1",
'Scaron;' => "\xc5\xa0", 'Scaron;' => "\xc5\xa0",
'Sigma;' => "\xce\xa3", 'Sigma;' => "\xce\xa3",
'THORN' => "\xc3\x9e", 'THORN' => "\xc3\x9e",
'THORN;' => "\xc3\x9e", 'THORN;' => "\xc3\x9e",
'TRADE;' => "\xe2\x84\xa2", 'TRADE;' => "\xe2\x84\xa2",
'Tau;' => "\xce\xa4", 'Tau;' => "\xce\xa4",
'Theta;' => "\xce\x98", 'Theta;' => "\xce\x98",
'Uacute' => "\xc3\x9a", 'Uacute' => "\xc3\x9a",
'Uacute;' => "\xc3\x9a", 'Uacute;' => "\xc3\x9a",
'Ucirc' => "\xc3\x9b", 'Ucirc' => "\xc3\x9b",
'Ucirc;' => "\xc3\x9b", 'Ucirc;' => "\xc3\x9b",
'Ugrave' => "\xc3\x99", 'Ugrave' => "\xc3\x99",
'Ugrave;' => "\xc3\x99", 'Ugrave;' => "\xc3\x99",
'Upsilon;' => "\xce\xa5", 'Upsilon;' => "\xce\xa5",
'Uuml' => "\xc3\x9c", 'Uuml' => "\xc3\x9c",
'Uuml;' => "\xc3\x9c", 'Uuml;' => "\xc3\x9c",
'Xi;' => "\xce\x9e", 'Xi;' => "\xce\x9e",
'Yacute' => "\xc3\x9d", 'Yacute' => "\xc3\x9d",
'Yacute;' => "\xc3\x9d", 'Yacute;' => "\xc3\x9d",
'Yuml;' => "\xc5\xb8", 'Yuml;' => "\xc5\xb8",
'Zeta;' => "\xce\x96", 'Zeta;' => "\xce\x96",
'aacute' => "\xc3\xa1", 'aacute' => "\xc3\xa1",
'aacute;' => "\xc3\xa1", 'aacute;' => "\xc3\xa1",
'acirc' => "\xc3\xa2", 'acirc' => "\xc3\xa2",
'acirc;' => "\xc3\xa2", 'acirc;' => "\xc3\xa2",
'acute' => "\xc2\xb4", 'acute' => "\xc2\xb4",
'acute;' => "\xc2\xb4", 'acute;' => "\xc2\xb4",
'aelig' => "\xc3\xa6", 'aelig' => "\xc3\xa6",
'aelig;' => "\xc3\xa6", 'aelig;' => "\xc3\xa6",
'agrave' => "\xc3\xa0", 'agrave' => "\xc3\xa0",
'agrave;' => "\xc3\xa0", 'agrave;' => "\xc3\xa0",
'alefsym;' => "\xe2\x84\xb5", 'alefsym;' => "\xe2\x84\xb5",
'alpha;' => "\xce\xb1", 'alpha;' => "\xce\xb1",
'amp' => '&', 'amp' => '&',
'amp;' => '&', 'amp;' => '&',
'and;' => "\xe2\x88\xa7", 'and;' => "\xe2\x88\xa7",
'ang;' => "\xe2\x88\xa0", 'ang;' => "\xe2\x88\xa0",
'apos;' => "'", 'apos;' => "'",
'aring' => "\xc3\xa5", 'aring' => "\xc3\xa5",
'aring;' => "\xc3\xa5", 'aring;' => "\xc3\xa5",
'asymp;' => "\xe2\x89\x88", 'asymp;' => "\xe2\x89\x88",
'atilde' => "\xc3\xa3", 'atilde' => "\xc3\xa3",
'atilde;' => "\xc3\xa3", 'atilde;' => "\xc3\xa3",
'auml' => "\xc3\xa4", 'auml' => "\xc3\xa4",
'auml;' => "\xc3\xa4", 'auml;' => "\xc3\xa4",
'bdquo;' => "\xe2\x80\x9e", 'bdquo;' => "\xe2\x80\x9e",
'beta;' => "\xce\xb2", 'beta;' => "\xce\xb2",
'brvbar' => "\xc2\xa6", 'brvbar' => "\xc2\xa6",
'brvbar;' => "\xc2\xa6", 'brvbar;' => "\xc2\xa6",
'bull;' => "\xe2\x80\xa2", 'bull;' => "\xe2\x80\xa2",
'cap;' => "\xe2\x88\xa9", 'cap;' => "\xe2\x88\xa9",
'ccedil' => "\xc3\xa7", 'ccedil' => "\xc3\xa7",
'ccedil;' => "\xc3\xa7", 'ccedil;' => "\xc3\xa7",
'cedil' => "\xc2\xb8", 'cedil' => "\xc2\xb8",
'cedil;' => "\xc2\xb8", 'cedil;' => "\xc2\xb8",
'cent' => "\xc2\xa2", 'cent' => "\xc2\xa2",
'cent;' => "\xc2\xa2", 'cent;' => "\xc2\xa2",
'chi;' => "\xcf\x87", 'chi;' => "\xcf\x87",
'circ;' => "\xcb\x86", 'circ;' => "\xcb\x86",
'clubs;' => "\xe2\x99\xa3", 'clubs;' => "\xe2\x99\xa3",
'cong;' => "\xe2\x89\x85", 'cong;' => "\xe2\x89\x85",
'copy' => "\xc2\xa9", 'copy' => "\xc2\xa9",
'copy;' => "\xc2\xa9", 'copy;' => "\xc2\xa9",
'crarr;' => "\xe2\x86\xb5", 'crarr;' => "\xe2\x86\xb5",
'cup;' => "\xe2\x88\xaa", 'cup;' => "\xe2\x88\xaa",
'curren' => "\xc2\xa4", 'curren' => "\xc2\xa4",
'curren;' => "\xc2\xa4", 'curren;' => "\xc2\xa4",
'dArr;' => "\xe2\x87\x93", 'dArr;' => "\xe2\x87\x93",
'dagger;' => "\xe2\x80\xa0", 'dagger;' => "\xe2\x80\xa0",
'darr;' => "\xe2\x86\x93", 'darr;' => "\xe2\x86\x93",
'deg' => "\xc2\xb0", 'deg' => "\xc2\xb0",
'deg;' => "\xc2\xb0", 'deg;' => "\xc2\xb0",
'delta;' => "\xce\xb4", 'delta;' => "\xce\xb4",
'diams;' => "\xe2\x99\xa6", 'diams;' => "\xe2\x99\xa6",
'divide' => "\xc3\xb7", 'divide' => "\xc3\xb7",
'divide;' => "\xc3\xb7", 'divide;' => "\xc3\xb7",
'eacute' => "\xc3\xa9", 'eacute' => "\xc3\xa9",
'eacute;' => "\xc3\xa9", 'eacute;' => "\xc3\xa9",
'ecirc' => "\xc3\xaa", 'ecirc' => "\xc3\xaa",
'ecirc;' => "\xc3\xaa", 'ecirc;' => "\xc3\xaa",
'egrave' => "\xc3\xa8", 'egrave' => "\xc3\xa8",
'egrave;' => "\xc3\xa8", 'egrave;' => "\xc3\xa8",
'empty;' => "\xe2\x88\x85", 'empty;' => "\xe2\x88\x85",
'emsp;' => "\xe2\x80\x83", 'emsp;' => "\xe2\x80\x83",
'ensp;' => "\xe2\x80\x82", 'ensp;' => "\xe2\x80\x82",
'epsilon;' => "\xce\xb5", 'epsilon;' => "\xce\xb5",
'equiv;' => "\xe2\x89\xa1", 'equiv;' => "\xe2\x89\xa1",
'eta;' => "\xce\xb7", 'eta;' => "\xce\xb7",
'eth' => "\xc3\xb0", 'eth' => "\xc3\xb0",
'eth;' => "\xc3\xb0", 'eth;' => "\xc3\xb0",
'euml' => "\xc3\xab", 'euml' => "\xc3\xab",
'euml;' => "\xc3\xab", 'euml;' => "\xc3\xab",
'euro;' => "\xe2\x82\xac", 'euro;' => "\xe2\x82\xac",
'exist;' => "\xe2\x88\x83", 'exist;' => "\xe2\x88\x83",
'fnof;' => "\xc6\x92", 'fnof;' => "\xc6\x92",
'forall;' => "\xe2\x88\x80", 'forall;' => "\xe2\x88\x80",
'frac12' => "\xc2\xbd", 'frac12' => "\xc2\xbd",
'frac12;' => "\xc2\xbd", 'frac12;' => "\xc2\xbd",
'frac14' => "\xc2\xbc", 'frac14' => "\xc2\xbc",
'frac14;' => "\xc2\xbc", 'frac14;' => "\xc2\xbc",
'frac34' => "\xc2\xbe", 'frac34' => "\xc2\xbe",
'frac34;' => "\xc2\xbe", 'frac34;' => "\xc2\xbe",
'frasl;' => "\xe2\x81\x84", 'frasl;' => "\xe2\x81\x84",
'gamma;' => "\xce\xb3", 'gamma;' => "\xce\xb3",
'ge;' => "\xe2\x89\xa5", 'ge;' => "\xe2\x89\xa5",
'gt' => '>', 'gt' => '>',
'gt;' => '>', 'gt;' => '>',
'hArr;' => "\xe2\x87\x94", 'hArr;' => "\xe2\x87\x94",
'harr;' => "\xe2\x86\x94", 'harr;' => "\xe2\x86\x94",
'hearts;' => "\xe2\x99\xa5", 'hearts;' => "\xe2\x99\xa5",
'hellip;' => "\xe2\x80\xa6", 'hellip;' => "\xe2\x80\xa6",
'iacute' => "\xc3\xad", 'iacute' => "\xc3\xad",
'iacute;' => "\xc3\xad", 'iacute;' => "\xc3\xad",
'icirc' => "\xc3\xae", 'icirc' => "\xc3\xae",
'icirc;' => "\xc3\xae", 'icirc;' => "\xc3\xae",
'iexcl' => "\xc2\xa1", 'iexcl' => "\xc2\xa1",
'iexcl;' => "\xc2\xa1", 'iexcl;' => "\xc2\xa1",
'igrave' => "\xc3\xac", 'igrave' => "\xc3\xac",
'igrave;' => "\xc3\xac", 'igrave;' => "\xc3\xac",
'image;' => "\xe2\x84\x91", 'image;' => "\xe2\x84\x91",
'infin;' => "\xe2\x88\x9e", 'infin;' => "\xe2\x88\x9e",
'int;' => "\xe2\x88\xab", 'int;' => "\xe2\x88\xab",
'iota;' => "\xce\xb9", 'iota;' => "\xce\xb9",
'iquest' => "\xc2\xbf", 'iquest' => "\xc2\xbf",
'iquest;' => "\xc2\xbf", 'iquest;' => "\xc2\xbf",
'isin;' => "\xe2\x88\x88", 'isin;' => "\xe2\x88\x88",
'iuml' => "\xc3\xaf", 'iuml' => "\xc3\xaf",
'iuml;' => "\xc3\xaf", 'iuml;' => "\xc3\xaf",
'kappa;' => "\xce\xba", 'kappa;' => "\xce\xba",
'lArr;' => "\xe2\x87\x90", 'lArr;' => "\xe2\x87\x90",
'lambda;' => "\xce\xbb", 'lambda;' => "\xce\xbb",
'lang;' => "\xe3\x80\x88", 'lang;' => "\xe3\x80\x88",
'laquo' => "\xc2\xab", 'laquo' => "\xc2\xab",
'laquo;' => "\xc2\xab", 'laquo;' => "\xc2\xab",
'larr;' => "\xe2\x86\x90", 'larr;' => "\xe2\x86\x90",
'lceil;' => "\xe2\x8c\x88", 'lceil;' => "\xe2\x8c\x88",
'ldquo;' => "\xe2\x80\x9c", 'ldquo;' => "\xe2\x80\x9c",
'le;' => "\xe2\x89\xa4", 'le;' => "\xe2\x89\xa4",
'lfloor;' => "\xe2\x8c\x8a", 'lfloor;' => "\xe2\x8c\x8a",
'lowast;' => "\xe2\x88\x97", 'lowast;' => "\xe2\x88\x97",
'loz;' => "\xe2\x97\x8a", 'loz;' => "\xe2\x97\x8a",
'lrm;' => "\xe2\x80\x8e", 'lrm;' => "\xe2\x80\x8e",
'lsaquo;' => "\xe2\x80\xb9", 'lsaquo;' => "\xe2\x80\xb9",
'lsquo;' => "\xe2\x80\x98", 'lsquo;' => "\xe2\x80\x98",
'lt' => '<', 'lt' => '<',
'lt;' => '<', 'lt;' => '<',
'macr' => "\xc2\xaf", 'macr' => "\xc2\xaf",
'macr;' => "\xc2\xaf", 'macr;' => "\xc2\xaf",
'mdash;' => "\xe2\x80\x94", 'mdash;' => "\xe2\x80\x94",
'micro' => "\xc2\xb5", 'micro' => "\xc2\xb5",
'micro;' => "\xc2\xb5", 'micro;' => "\xc2\xb5",
'middot' => "\xc2\xb7", 'middot' => "\xc2\xb7",
'middot;' => "\xc2\xb7", 'middot;' => "\xc2\xb7",
'minus;' => "\xe2\x88\x92", 'minus;' => "\xe2\x88\x92",
'mu;' => "\xce\xbc", 'mu;' => "\xce\xbc",
'nabla;' => "\xe2\x88\x87", 'nabla;' => "\xe2\x88\x87",
'nbsp' => "\xc2\xa0", 'nbsp' => "\xc2\xa0",
'nbsp;' => "\xc2\xa0", 'nbsp;' => "\xc2\xa0",
'ndash;' => "\xe2\x80\x93", 'ndash;' => "\xe2\x80\x93",
'ne;' => "\xe2\x89\xa0", 'ne;' => "\xe2\x89\xa0",
'ni;' => "\xe2\x88\x8b", 'ni;' => "\xe2\x88\x8b",
'not' => "\xc2\xac", 'not' => "\xc2\xac",
'not;' => "\xc2\xac", 'not;' => "\xc2\xac",
'notin;' => "\xe2\x88\x89", 'notin;' => "\xe2\x88\x89",
'nsub;' => "\xe2\x8a\x84", 'nsub;' => "\xe2\x8a\x84",
'ntilde' => "\xc3\xb1", 'ntilde' => "\xc3\xb1",
'ntilde;' => "\xc3\xb1", 'ntilde;' => "\xc3\xb1",
'nu;' => "\xce\xbd", 'nu;' => "\xce\xbd",
'oacute' => "\xc3\xb3", 'oacute' => "\xc3\xb3",
'oacute;' => "\xc3\xb3", 'oacute;' => "\xc3\xb3",
'ocirc' => "\xc3\xb4", 'ocirc' => "\xc3\xb4",
'ocirc;' => "\xc3\xb4", 'ocirc;' => "\xc3\xb4",
'oelig;' => "\xc5\x93", 'oelig;' => "\xc5\x93",
'ograve' => "\xc3\xb2", 'ograve' => "\xc3\xb2",
'ograve;' => "\xc3\xb2", 'ograve;' => "\xc3\xb2",
'oline;' => "\xe2\x80\xbe", 'oline;' => "\xe2\x80\xbe",
'omega;' => "\xcf\x89", 'omega;' => "\xcf\x89",
'omicron;' => "\xce\xbf", 'omicron;' => "\xce\xbf",
'oplus;' => "\xe2\x8a\x95", 'oplus;' => "\xe2\x8a\x95",
'or;' => "\xe2\x88\xa8", 'or;' => "\xe2\x88\xa8",
'ordf' => "\xc2\xaa", 'ordf' => "\xc2\xaa",
'ordf;' => "\xc2\xaa", 'ordf;' => "\xc2\xaa",
'ordm' => "\xc2\xba", 'ordm' => "\xc2\xba",
'ordm;' => "\xc2\xba", 'ordm;' => "\xc2\xba",
'oslash' => "\xc3\xb8", 'oslash' => "\xc3\xb8",
'oslash;' => "\xc3\xb8", 'oslash;' => "\xc3\xb8",
'otilde' => "\xc3\xb5", 'otilde' => "\xc3\xb5",
'otilde;' => "\xc3\xb5", 'otilde;' => "\xc3\xb5",
'otimes;' => "\xe2\x8a\x97", 'otimes;' => "\xe2\x8a\x97",
'ouml' => "\xc3\xb6", 'ouml' => "\xc3\xb6",
'ouml;' => "\xc3\xb6", 'ouml;' => "\xc3\xb6",
'para' => "\xc2\xb6", 'para' => "\xc2\xb6",
'para;' => "\xc2\xb6", 'para;' => "\xc2\xb6",
'part;' => "\xe2\x88\x82", 'part;' => "\xe2\x88\x82",
'permil;' => "\xe2\x80\xb0", 'permil;' => "\xe2\x80\xb0",
'perp;' => "\xe2\x8a\xa5", 'perp;' => "\xe2\x8a\xa5",
'phi;' => "\xcf\x86", 'phi;' => "\xcf\x86",
'pi;' => "\xcf\x80", 'pi;' => "\xcf\x80",
'piv;' => "\xcf\x96", 'piv;' => "\xcf\x96",
'plusmn' => "\xc2\xb1", 'plusmn' => "\xc2\xb1",
'plusmn;' => "\xc2\xb1", 'plusmn;' => "\xc2\xb1",
'pound' => "\xc2\xa3", 'pound' => "\xc2\xa3",
'pound;' => "\xc2\xa3", 'pound;' => "\xc2\xa3",
'prime;' => "\xe2\x80\xb2", 'prime;' => "\xe2\x80\xb2",
'prod;' => "\xe2\x88\x8f", 'prod;' => "\xe2\x88\x8f",
'prop;' => "\xe2\x88\x9d", 'prop;' => "\xe2\x88\x9d",
'psi;' => "\xcf\x88", 'psi;' => "\xcf\x88",
'quot' => '"', 'quot' => '"',
'quot;' => '"', 'quot;' => '"',
'rArr;' => "\xe2\x87\x92", 'rArr;' => "\xe2\x87\x92",
'radic;' => "\xe2\x88\x9a", 'radic;' => "\xe2\x88\x9a",
'rang;' => "\xe3\x80\x89", 'rang;' => "\xe3\x80\x89",
'raquo' => "\xc2\xbb", 'raquo' => "\xc2\xbb",
'raquo;' => "\xc2\xbb", 'raquo;' => "\xc2\xbb",
'rarr;' => "\xe2\x86\x92", 'rarr;' => "\xe2\x86\x92",
'rceil;' => "\xe2\x8c\x89", 'rceil;' => "\xe2\x8c\x89",
'rdquo;' => "\xe2\x80\x9d", 'rdquo;' => "\xe2\x80\x9d",
'real;' => "\xe2\x84\x9c", 'real;' => "\xe2\x84\x9c",
'reg' => "\xc2\xae", 'reg' => "\xc2\xae",
'reg;' => "\xc2\xae", 'reg;' => "\xc2\xae",
'rfloor;' => "\xe2\x8c\x8b", 'rfloor;' => "\xe2\x8c\x8b",
'rho;' => "\xcf\x81", 'rho;' => "\xcf\x81",
'rlm;' => "\xe2\x80\x8f", 'rlm;' => "\xe2\x80\x8f",
'rsaquo;' => "\xe2\x80\xba", 'rsaquo;' => "\xe2\x80\xba",
'rsquo;' => "\xe2\x80\x99", 'rsquo;' => "\xe2\x80\x99",
'sbquo;' => "\xe2\x80\x9a", 'sbquo;' => "\xe2\x80\x9a",
'scaron;' => "\xc5\xa1", 'scaron;' => "\xc5\xa1",
'sdot;' => "\xe2\x8b\x85", 'sdot;' => "\xe2\x8b\x85",
'sect' => "\xc2\xa7", 'sect' => "\xc2\xa7",
'sect;' => "\xc2\xa7", 'sect;' => "\xc2\xa7",
'shy' => "\xc2\xad", 'shy' => "\xc2\xad",
'shy;' => "\xc2\xad", 'shy;' => "\xc2\xad",
'sigma;' => "\xcf\x83", 'sigma;' => "\xcf\x83",
'sigmaf;' => "\xcf\x82", 'sigmaf;' => "\xcf\x82",
'sim;' => "\xe2\x88\xbc", 'sim;' => "\xe2\x88\xbc",
'spades;' => "\xe2\x99\xa0", 'spades;' => "\xe2\x99\xa0",
'sub;' => "\xe2\x8a\x82", 'sub;' => "\xe2\x8a\x82",
'sube;' => "\xe2\x8a\x86", 'sube;' => "\xe2\x8a\x86",
'sum;' => "\xe2\x88\x91", 'sum;' => "\xe2\x88\x91",
'sup1' => "\xc2\xb9", 'sup1' => "\xc2\xb9",
'sup1;' => "\xc2\xb9", 'sup1;' => "\xc2\xb9",
'sup2' => "\xc2\xb2", 'sup2' => "\xc2\xb2",
'sup2;' => "\xc2\xb2", 'sup2;' => "\xc2\xb2",
'sup3' => "\xc2\xb3", 'sup3' => "\xc2\xb3",
'sup3;' => "\xc2\xb3", 'sup3;' => "\xc2\xb3",
'sup;' => "\xe2\x8a\x83", 'sup;' => "\xe2\x8a\x83",
'supe;' => "\xe2\x8a\x87", 'supe;' => "\xe2\x8a\x87",
'szlig' => "\xc3\x9f", 'szlig' => "\xc3\x9f",
'szlig;' => "\xc3\x9f", 'szlig;' => "\xc3\x9f",
'tau;' => "\xcf\x84", 'tau;' => "\xcf\x84",
'there4;' => "\xe2\x88\xb4", 'there4;' => "\xe2\x88\xb4",
'theta;' => "\xce\xb8", 'theta;' => "\xce\xb8",
'thetasym;' => "\xcf\x91", 'thetasym;' => "\xcf\x91",
'thinsp;' => "\xe2\x80\x89", 'thinsp;' => "\xe2\x80\x89",
'thorn' => "\xc3\xbe", 'thorn' => "\xc3\xbe",
'thorn;' => "\xc3\xbe", 'thorn;' => "\xc3\xbe",
'tilde;' => "\xcb\x9c", 'tilde;' => "\xcb\x9c",
'times' => "\xc3\x97", 'times' => "\xc3\x97",
'times;' => "\xc3\x97", 'times;' => "\xc3\x97",
'trade;' => "\xe2\x84\xa2", 'trade;' => "\xe2\x84\xa2",
'uArr;' => "\xe2\x87\x91", 'uArr;' => "\xe2\x87\x91",
'uacute' => "\xc3\xba", 'uacute' => "\xc3\xba",
'uacute;' => "\xc3\xba", 'uacute;' => "\xc3\xba",
'uarr;' => "\xe2\x86\x91", 'uarr;' => "\xe2\x86\x91",
'ucirc' => "\xc3\xbb", 'ucirc' => "\xc3\xbb",
'ucirc;' => "\xc3\xbb", 'ucirc;' => "\xc3\xbb",
'ugrave' => "\xc3\xb9", 'ugrave' => "\xc3\xb9",
'ugrave;' => "\xc3\xb9", 'ugrave;' => "\xc3\xb9",
'uml' => "\xc2\xa8", 'uml' => "\xc2\xa8",
'uml;' => "\xc2\xa8", 'uml;' => "\xc2\xa8",
'upsih;' => "\xcf\x92", 'upsih;' => "\xcf\x92",
'upsilon;' => "\xcf\x85", 'upsilon;' => "\xcf\x85",
'uuml' => "\xc3\xbc", 'uuml' => "\xc3\xbc",
'uuml;' => "\xc3\xbc", 'uuml;' => "\xc3\xbc",
'weierp;' => "\xe2\x84\x98", 'weierp;' => "\xe2\x84\x98",
'xi;' => "\xce\xbe", 'xi;' => "\xce\xbe",
'yacute' => "\xc3\xbd", 'yacute' => "\xc3\xbd",
'yacute;' => "\xc3\xbd", 'yacute;' => "\xc3\xbd",
'yen' => "\xc2\xa5", 'yen' => "\xc2\xa5",
'yen;' => "\xc2\xa5", 'yen;' => "\xc2\xa5",
'yuml' => "\xc3\xbf", 'yuml' => "\xc3\xbf",
'yuml;' => "\xc3\xbf", 'yuml;' => "\xc3\xbf",
'zeta;' => "\xce\xb6", 'zeta;' => "\xce\xb6",
'zwj;' => "\xe2\x80\x8d", 'zwj;' => "\xe2\x80\x8d",
'zwnj;' => "\xe2\x80\x8c" 'zwnj;' => "\xe2\x80\x8c"
} }
ENCODINGS = %w[ ENCODINGS = %w[

View file

@ -21,9 +21,9 @@ module HTML5
when :EmptyTag when :EmptyTag
if token[:name].downcase == "meta" if token[:name].downcase == "meta"
# replace charset with actual encoding # replace charset with actual encoding
token[:data].each_with_index do |(name,value),index| token[:data].each_with_index do |(name, value), index|
if name == 'charset' if name == 'charset'
token[:data][index][1]=@encoding token[:data][index][1] = @encoding
meta_found = true meta_found = true
end end
end end
@ -31,7 +31,7 @@ module HTML5
# replace charset with actual encoding # replace charset with actual encoding
has_http_equiv_content_type = false has_http_equiv_content_type = false
content_index = -1 content_index = -1
token[:data].each_with_index do |(name,value),i| token[:data].each_with_index do |(name, value), i|
if name.downcase == 'charset' if name.downcase == 'charset'
token[:data][i] = ['charset', @encoding] token[:data][i] = ['charset', @encoding]
meta_found = true meta_found = true
@ -43,30 +43,27 @@ module HTML5
end end
end end
if not meta_found if !meta_found
if has_http_equiv_content_type and content_index >= 0 if has_http_equiv_content_type && content_index >= 0
token[:data][content_index][1] = token[:data][content_index][1] = 'text/html; charset=%s' % @encoding
'text/html; charset=%s' % @encoding
meta_found = true meta_found = true
end end
end end
elsif token[:name].downcase == "head" and not meta_found elsif token[:name].downcase == "head" && !meta_found
# insert meta into empty head # insert meta into empty head
yield(:type => :StartTag, :name => "head", :data => token[:data]) yield :type => :StartTag, :name => "head", :data => token[:data]
yield(:type => :EmptyTag, :name => "meta", yield :type => :EmptyTag, :name => "meta", :data => [["charset", @encoding]]
:data => [["charset", @encoding]]) yield :type => :EndTag, :name => "head"
yield(:type => :EndTag, :name => "head")
meta_found = true meta_found = true
next next
end end
when :EndTag when :EndTag
if token[:name].downcase == "head" and pending.any? if token[:name].downcase == "head" && pending.any?
# insert meta into head (if necessary) and flush pending queue # insert meta into head (if necessary) and flush pending queue
yield pending.shift yield pending.shift
yield(:type => :EmptyTag, :name => "meta", yield :type => :EmptyTag, :name => "meta", :data => [["charset", @encoding]] if !meta_found
:data => [["charset", @encoding]]) if not meta_found
yield pending.shift while pending.any? yield pending.shift while pending.any?
meta_found = true meta_found = true
state = :post_head state = :post_head

View file

@ -75,8 +75,7 @@ module HTML5
if type == :StartTag if type == :StartTag
# omit the thead and tfoot elements' end tag when they are # omit the thead and tfoot elements' end tag when they are
# immediately followed by a tbody element. See is_optional_end. # immediately followed by a tbody element. See is_optional_end.
if previous and previous[:type] == :EndTag and \ if previous and previous[:type] == :EndTag && %w(tbody thead tfoot).include?(previous[:name])
%w(tbody thead tfoot).include?(previous[:name])
return false return false
end end
@ -85,7 +84,7 @@ module HTML5
return false return false
end end
end end
return false return false
end end
def is_optional_end(tagname, nexttok) def is_optional_end(tagname, nexttok)

View file

@ -21,7 +21,7 @@ module HTML5
preserve -= 1 if preserve > 0 preserve -= 1 if preserve > 0
when :SpaceCharacters when :SpaceCharacters
next if preserve == 0 token[:data] = " " if preserve == 0 && token[:data]
when :Characters when :Characters
token[:data] = token[:data].sub(SPACES,' ') if preserve == 0 token[:data] = token[:data].sub(SPACES,' ') if preserve == 0

View file

@ -16,7 +16,7 @@ module HTML5
# #
class HTMLParser class HTMLParser
attr_accessor :phase, :firstStartTag, :innerHTML, :lastPhase, :insertFromTable attr_accessor :phase, :first_start_tag, :inner_html, :last_phase, :insert_from_table
attr_reader :phases, :tokenizer, :tree, :errors attr_reader :phases, :tokenizer, :tree, :errors
@ -25,10 +25,10 @@ module HTML5
new(options).parse(stream,encoding) new(options).parse(stream,encoding)
end end
def self.parseFragment(stream, options = {}) def self.parse_fragment(stream, options = {})
container = options.delete(:container) || 'div' container = options.delete(:container) || 'div'
encoding = options.delete(:encoding) encoding = options.delete(:encoding)
new(options).parseFragment(stream,container,encoding) new(options).parse_fragment(stream, container, encoding)
end end
@@phases = %w( initial rootElement beforeHead inHead afterHead inBody inTable inCaption @@phases = %w( initial rootElement beforeHead inHead afterHead inBody inTable inCaption
@ -44,56 +44,58 @@ module HTML5
@tokenizer = HTMLTokenizer @tokenizer = HTMLTokenizer
@tree = TreeBuilders::REXML::TreeBuilder @tree = TreeBuilders::REXML::TreeBuilder
options.each { |name, value| instance_variable_set("@#{name}", value) } options.each {|name, value| instance_variable_set("@#{name}", value) }
@lowercase_attr_name = nil unless instance_variables.include?("@lowercase_attr_name")
@lowercase_element_name = nil unless instance_variables.include?("@lowercase_element_name")
@tree = @tree.new @tree = @tree.new
@phases = @@phases.inject({}) do |phases, phase_name| @phases = @@phases.inject({}) do |phases, phase_name|
phase_class_name = phase_name.sub(/(.)/) { $1.upcase } + 'Phase' phase_class_name = phase_name.sub(/(.)/) { $1.upcase } + 'Phase'
phases[phase_name.to_sym] = HTML5.const_get(phase_class_name).new(self, @tree) phases[phase_name.to_sym] = HTML5.const_get(phase_class_name).new(self, @tree)
phases phases
end end
end end
def _parse(stream, innerHTML, encoding, container = 'div') def _parse(stream, inner_html, encoding, container = 'div')
@tree.reset @tree.reset
@firstStartTag = false @first_start_tag = false
@errors = [] @errors = []
@tokenizer = @tokenizer.class unless Class === @tokenizer @tokenizer = @tokenizer.class unless Class === @tokenizer
@tokenizer = @tokenizer.new(stream, :encoding => encoding, @tokenizer = @tokenizer.new(stream, :encoding => encoding,
:parseMeta => !innerHTML) :parseMeta => !inner_html, :lowercase_attr_name => @lowercase_attr_name, :lowercase_element_name => @lowercase_element_name)
if innerHTML if inner_html
case @innerHTML = container.downcase case @inner_html = container.downcase
when 'title', 'textarea' when 'title', 'textarea'
@tokenizer.contentModelFlag = :RCDATA @tokenizer.content_model_flag = :RCDATA
when 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript' when 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript'
@tokenizer.contentModelFlag = :CDATA @tokenizer.content_model_flag = :CDATA
when 'plaintext' when 'plaintext'
@tokenizer.contentModelFlag = :PLAINTEXT @tokenizer.content_model_flag = :PLAINTEXT
else else
# contentModelFlag already is PCDATA # content_model_flag already is PCDATA
#@tokenizer.contentModelFlag = :PCDATA #@tokenizer.content_model_flag = :PCDATA
end end
@phase = @phases[:rootElement] @phase = @phases[:rootElement]
@phase.insertHtmlElement @phase.insert_html_element
resetInsertionMode reset_insertion_mode
else else
@innerHTML = false @inner_html = false
@phase = @phases[:initial] @phase = @phases[:initial]
end end
# We only seem to have InBodyPhase testcases where the following is # We only seem to have InBodyPhase testcases where the following is
# relevant ... need others too # relevant ... need others too
@lastPhase = nil @last_phase = nil
# XXX This is temporary for the moment so there isn't any other # XXX This is temporary for the moment so there isn't any other
# changes needed for the parser to work with the iterable tokenizer # changes needed for the parser to work with the iterable tokenizer
@tokenizer.each do |token| @tokenizer.each do |token|
token = normalizeToken(token) token = normalize_token(token)
method = 'process%s' % token[:type] method = 'process%s' % token[:type]
@ -108,12 +110,12 @@ module HTML5
@phase.send method, token[:name], token[:publicId], @phase.send method, token[:name], token[:publicId],
token[:systemId], token[:correct] token[:systemId], token[:correct]
else else
parseError(token[:data]) parse_error(token[:data])
end end
end end
# When the loop finishes it's EOF # When the loop finishes it's EOF
@phase.processEOF @phase.process_eof
end end
# Parse a HTML document into a well-formed tree # Parse a HTML document into a well-formed tree
@ -126,12 +128,12 @@ module HTML5
# element) # element)
def parse(stream, encoding=nil) def parse(stream, encoding=nil)
_parse(stream, false, encoding) _parse(stream, false, encoding)
return @tree.getDocument @tree.get_document
end end
# Parse a HTML fragment into a well-formed tree fragment # Parse a HTML fragment into a well-formed tree fragment
# container - name of the element we're setting the innerHTML property # container - name of the element we're setting the inner_html property
# if set to nil, default to 'div' # if set to nil, default to 'div'
# #
# stream - a filelike object or string containing the HTML to be parsed # stream - a filelike object or string containing the HTML to be parsed
@ -140,19 +142,19 @@ module HTML5
# the encoding. If specified, that encoding will be used, # the encoding. If specified, that encoding will be used,
# regardless of any BOM or later declaration (such as in a meta # regardless of any BOM or later declaration (such as in a meta
# element) # element)
def parseFragment(stream, container='div', encoding=nil) def parse_fragment(stream, container='div', encoding=nil)
_parse(stream, true, encoding, container) _parse(stream, true, encoding, container)
return @tree.getFragment @tree.get_fragment
end end
def parseError(data = 'XXX ERROR MESSAGE NEEDED') def parse_error(data = 'XXX ERROR MESSAGE NEEDED')
# XXX The idea is to make data mandatory. # XXX The idea is to make data mandatory.
@errors.push([@tokenizer.stream.position, data]) @errors.push([@tokenizer.stream.position, data])
raise ParseError if @strict raise ParseError if @strict
end end
# HTML5 specific normalizations to the token stream # HTML5 specific normalizations to the token stream
def normalizeToken(token) def normalize_token(token)
if token[:type] == :EmptyTag if token[:type] == :EmptyTag
# When a solidus (/) is encountered within a tag name what happens # When a solidus (/) is encountered within a tag name what happens
@ -161,75 +163,75 @@ module HTML5
# thing and if it doesn't it's wrong for everyone. # thing and if it doesn't it's wrong for everyone.
unless VOID_ELEMENTS.include?(token[:name]) unless VOID_ELEMENTS.include?(token[:name])
parseError(_('Solidus (/) incorrectly placed in tag.')) parse_error(_('Solidus (/) incorrectly placed in tag.'))
end end
token[:type] = :StartTag token[:type] = :StartTag
end end
if token[:type] == :StartTag if token[:type] == :StartTag
token[:name] = token[:name].tr(ASCII_UPPERCASE,ASCII_LOWERCASE) token[:name] = token[:name].downcase
# We need to remove the duplicate attributes and convert attributes # We need to remove the duplicate attributes and convert attributes
# to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"} # to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
unless token[:data].empty? unless token[:data].empty?
data = token[:data].reverse.map { |attr, value| [attr.tr(ASCII_UPPERCASE, ASCII_LOWERCASE), value] } data = token[:data].reverse.map {|attr, value| [attr.downcase, value] }
token[:data] = Hash[*data.flatten] token[:data] = Hash[*data.flatten]
end end
elsif token[:type] == :EndTag elsif token[:type] == :EndTag
parseError(_('End tag contains unexpected attributes.')) unless token[:data].empty? parse_error(_('End tag contains unexpected attributes.')) unless token[:data].empty?
token[:name] = token[:name].downcase token[:name] = token[:name].downcase
end end
return token token
end end
@@new_modes = { @@new_modes = {
'select' => :inSelect, 'select' => :inSelect,
'td' => :inCell, 'td' => :inCell,
'th' => :inCell, 'th' => :inCell,
'tr' => :inRow, 'tr' => :inRow,
'tbody' => :inTableBody, 'tbody' => :inTableBody,
'thead' => :inTableBody, 'thead' => :inTableBody,
'tfoot' => :inTableBody, 'tfoot' => :inTableBody,
'caption' => :inCaption, 'caption' => :inCaption,
'colgroup' => :inColumnGroup, 'colgroup' => :inColumnGroup,
'table' => :inTable, 'table' => :inTable,
'head' => :inBody, 'head' => :inBody,
'body' => :inBody, 'body' => :inBody,
'frameset' => :inFrameset 'frameset' => :inFrameset
} }
def resetInsertionMode def reset_insertion_mode
# The name of this method is mostly historical. (It's also used in the # The name of this method is mostly historical. (It's also used in the
# specification.) # specification.)
last = false last = false
@tree.openElements.reverse.each do |node| @tree.open_elements.reverse.each do |node|
nodeName = node.name node_name = node.name
if node == @tree.openElements[0] if node == @tree.open_elements.first
last = true last = true
unless ['td', 'th'].include?(nodeName) unless ['td', 'th'].include?(node_name)
# XXX # XXX
# assert @innerHTML # assert @inner_html
nodeName = @innerHTML node_name = @inner_html
end end
end end
# Check for conditions that should only happen in the innerHTML # Check for conditions that should only happen in the inner_html
# case # case
if ['select', 'colgroup', 'head', 'frameset'].include?(nodeName) if ['select', 'colgroup', 'head', 'frameset'].include?(node_name)
# XXX # XXX
# assert @innerHTML # assert @inner_html
end end
if @@new_modes.has_key?(nodeName) if @@new_modes.has_key?(node_name)
@phase = @phases[@@new_modes[nodeName]] @phase = @phases[@@new_modes[node_name]]
elsif nodeName == 'html' elsif node_name == 'html'
@phase = @phases[@tree.headPointer.nil?? :beforeHead : :afterHead] @phase = @phases[@tree.head_pointer.nil?? :beforeHead : :afterHead]
elsif last elsif last
@phase = @phases[:inBody] @phase = @phases[:inBody]
else else

View file

@ -8,36 +8,36 @@ module HTML5
def processComment(data) def processComment(data)
# This is needed because data is to be appended to the <html> element # This is needed because data is to be appended to the <html> element
# here and not to whatever is currently open. # here and not to whatever is currently open.
@tree.insertComment(data, @tree.openElements[0]) @tree.insert_comment(data, @tree.open_elements.first)
end end
def processCharacters(data) def processCharacters(data)
@parser.parseError(_('Unexpected non-space characters in the after body phase.')) parse_error(_('Unexpected non-space characters in the after body phase.'))
@parser.phase = @parser.phases[:inBody] @parser.phase = @parser.phases[:inBody]
@parser.phase.processCharacters(data) @parser.phase.processCharacters(data)
end end
def processStartTag(name, attributes) def processStartTag(name, attributes)
@parser.parseError(_("Unexpected start tag token (#{name}) in the after body phase.")) parse_error(_("Unexpected start tag token (#{name}) in the after body phase."))
@parser.phase = @parser.phases[:inBody] @parser.phase = @parser.phases[:inBody]
@parser.phase.processStartTag(name, attributes) @parser.phase.processStartTag(name, attributes)
end end
def endTagHtml(name) def endTagHtml(name)
if @parser.innerHTML if @parser.inner_html
@parser.parseError parse_error
else else
# XXX: This may need to be done, not sure # XXX: This may need to be done, not sure
# Don't set lastPhase to the current phase but to the inBody phase # Don't set last_phase to the current phase but to the inBody phase
# instead. No need for extra parse errors if there's something after </html>. # instead. No need for extra parse errors if there's something after </html>.
# Try "<!doctype html>X</html>X" for instance. # Try "<!doctype html>X</html>X" for instance.
@parser.lastPhase = @parser.phase @parser.last_phase = @parser.phase
@parser.phase = @parser.phases[:trailingEnd] @parser.phase = @parser.phases[:trailingEnd]
end end
end end
def endTagOther(name) def endTagOther(name)
@parser.parseError(_("Unexpected end tag token (#{name}) in the after body phase.")) parse_error(_("Unexpected end tag token (#{name}) in the after body phase."))
@parser.phase = @parser.phases[:inBody] @parser.phase = @parser.phases[:inBody]
@parser.phase.processEndTag(name) @parser.phase.processEndTag(name)
end end

View file

@ -10,7 +10,7 @@ module HTML5
handle_end 'html' handle_end 'html'
def processCharacters(data) def processCharacters(data)
@parser.parseError(_('Unexpected non-space characters in the after frameset phase. Ignored.')) parse_error(_('Unexpected non-space characters in the after frameset phase. Ignored.'))
end end
def startTagNoframes(name, attributes) def startTagNoframes(name, attributes)
@ -18,16 +18,16 @@ module HTML5
end end
def startTagOther(name, attributes) def startTagOther(name, attributes)
@parser.parseError(_("Unexpected start tag (#{name}) in the after frameset phase. Ignored.")) parse_error(_("Unexpected start tag (#{name}) in the after frameset phase. Ignored."))
end end
def endTagHtml(name) def endTagHtml(name)
@parser.lastPhase = @parser.phase @parser.last_phase = @parser.phase
@parser.phase = @parser.phases[:trailingEnd] @parser.phase = @parser.phases[:trailingEnd]
end end
def endTagOther(name) def endTagOther(name)
@parser.parseError(_("Unexpected end tag (#{name}) in the after frameset phase. Ignored.")) parse_error(_("Unexpected end tag (#{name}) in the after frameset phase. Ignored."))
end end
end end

View file

@ -2,47 +2,47 @@ require 'html5/html5parser/phase'
module HTML5 module HTML5
class AfterHeadPhase < Phase class AfterHeadPhase < Phase
handle_start 'html', 'body', 'frameset', %w( base link meta script style title ) => 'FromHead' handle_start 'html', 'body', 'frameset', %w( base link meta script style title ) => 'FromHead'
def processEOF def process_eof
anythingElse anything_else
@parser.phase.processEOF @parser.phase.process_eof
end end
def processCharacters(data) def processCharacters(data)
anythingElse anything_else
@parser.phase.processCharacters(data) @parser.phase.processCharacters(data)
end end
def startTagBody(name, attributes) def startTagBody(name, attributes)
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inBody] @parser.phase = @parser.phases[:inBody]
end end
def startTagFrameset(name, attributes) def startTagFrameset(name, attributes)
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inFrameset] @parser.phase = @parser.phases[:inFrameset]
end end
def startTagFromHead(name, attributes) def startTagFromHead(name, attributes)
@parser.parseError(_("Unexpected start tag (#{name}) that can be in head. Moved.")) parse_error(_("Unexpected start tag (#{name}) that can be in head. Moved."))
@parser.phase = @parser.phases[:inHead] @parser.phase = @parser.phases[:inHead]
@parser.phase.processStartTag(name, attributes) @parser.phase.processStartTag(name, attributes)
end end
def startTagOther(name, attributes) def startTagOther(name, attributes)
anythingElse anything_else
@parser.phase.processStartTag(name, attributes) @parser.phase.processStartTag(name, attributes)
end end
def processEndTag(name) def processEndTag(name)
anythingElse anything_else
@parser.phase.processEndTag(name) @parser.phase.processEndTag(name)
end end
def anythingElse def anything_else
@tree.insertElement('body', {}) @tree.insert_element('body', {})
@parser.phase = @parser.phases[:inBody] @parser.phase = @parser.phases[:inBody]
end end

View file

@ -7,9 +7,9 @@ module HTML5
handle_end %w( html head body br p ) => 'ImplyHead' handle_end %w( html head body br p ) => 'ImplyHead'
def processEOF def process_eof
startTagHead('head', {}) startTagHead('head', {})
@parser.phase.processEOF @parser.phase.process_eof
end end
def processCharacters(data) def processCharacters(data)
@ -18,8 +18,8 @@ module HTML5
end end
def startTagHead(name, attributes) def startTagHead(name, attributes)
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
@tree.headPointer = @tree.openElements[-1] @tree.head_pointer = @tree.open_elements[-1]
@parser.phase = @parser.phases[:inHead] @parser.phase = @parser.phases[:inHead]
end end
@ -34,7 +34,7 @@ module HTML5
end end
def endTagOther(name) def endTagOther(name)
@parser.parseError(_("Unexpected end tag (#{name}) after the (implied) root element.")) parse_error(_("Unexpected end tag (#{name}) after the (implied) root element."))
end end
end end

View file

@ -51,25 +51,40 @@ module HTML5
# for special handling of whitespace in <pre> # for special handling of whitespace in <pre>
@processSpaceCharactersDropNewline = false @processSpaceCharactersDropNewline = false
if $-w
$-w = false
alias processSpaceCharactersNonPre processSpaceCharacters
$-w = true
else
alias processSpaceCharactersNonPre processSpaceCharacters
end
end end
def processSpaceCharactersDropNewline(data) def processSpaceCharactersDropNewline(data)
#Sometimes (start of <pre> blocks) we want to drop leading newlines # #Sometimes (start of <pre> blocks) we want to drop leading newlines
@processSpaceCharactersDropNewline = false
if (data.length > 0 and data[0] == ?\n and if $-w
%w[pre textarea].include?(@tree.openElements[-1].name) and $-w = false
not @tree.openElements[-1].hasContent) alias processSpaceCharacters processSpaceCharactersNonPre
$-w = true
else
alias processSpaceCharacters processSpaceCharactersNonPre
end
if (data.length > 0 and data[0] == ?\n &&
%w[pre textarea].include?(@tree.open_elements.last.name) && !@tree.open_elements.last.hasContent)
data = data[1..-1] data = data[1..-1]
end end
@tree.insertText(data) if data.length > 0
if data.length > 0
@tree.reconstructActiveFormattingElements
@tree.insertText(data)
end
end end
def processSpaceCharacters(data) def processSpaceCharacters(data)
if @processSpaceCharactersDropNewline @tree.reconstructActiveFormattingElements()
processSpaceCharactersDropNewline(data) @tree.insertText(data)
else
super(data)
end
end end
def processCharacters(data) def processCharacters(data)
@ -85,20 +100,19 @@ module HTML5
end end
def startTagTitle(name, attributes) def startTagTitle(name, attributes)
@parser.parseError(_("Unexpected start tag (#{name}) that belongs in the head. Moved.")) parse_error(_("Unexpected start tag (#{name}) that belongs in the head. Moved."))
@parser.phases[:inHead].processStartTag(name, attributes) @parser.phases[:inHead].processStartTag(name, attributes)
end end
def startTagBody(name, attributes) def startTagBody(name, attributes)
@parser.parseError(_('Unexpected start tag (body).')) parse_error(_('Unexpected start tag (body).'))
if (@tree.openElements.length == 1 or if (@tree.open_elements.length == 1 || @tree.open_elements[1].name != 'body')
@tree.openElements[1].name != 'body') assert @parser.inner_html
assert @parser.innerHTML
else else
attributes.each do |attr, value| attributes.each do |attr, value|
unless @tree.openElements[1].attributes.has_key?(attr) unless @tree.open_elements[1].attributes.has_key?(attr)
@tree.openElements[1].attributes[attr] = value @tree.open_elements[1].attributes[attr] = value
end end
end end
end end
@ -106,17 +120,17 @@ module HTML5
def startTagCloseP(name, attributes) def startTagCloseP(name, attributes)
endTagP('p') if in_scope?('p') endTagP('p') if in_scope?('p')
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
@processSpaceCharactersDropNewline = true if name == 'pre' @processSpaceCharactersDropNewline = true if name == 'pre'
end end
def startTagForm(name, attributes) def startTagForm(name, attributes)
if @tree.formPointer if @tree.formPointer
@parser.parseError(_('Unexpected start tag (form). Ignored.')) parse_error(_('Unexpected start tag (form). Ignored.'))
else else
endTagP('p') if in_scope?('p') endTagP('p') if in_scope?('p')
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
@tree.formPointer = @tree.openElements[-1] @tree.formPointer = @tree.open_elements[-1]
end end
end end
@ -125,31 +139,28 @@ module HTML5
stopNames = {'li' => ['li'], 'dd' => ['dd', 'dt'], 'dt' => ['dd', 'dt']} stopNames = {'li' => ['li'], 'dd' => ['dd', 'dt'], 'dt' => ['dd', 'dt']}
stopName = stopNames[name] stopName = stopNames[name]
@tree.openElements.reverse.each_with_index do |node, i| @tree.open_elements.reverse.each_with_index do |node, i|
if stopName.include?(node.name) if stopName.include?(node.name)
poppedNodes = (0..i).collect { @tree.openElements.pop } poppedNodes = (0..i).collect { @tree.open_elements.pop }
if i >= 1 if i >= 1
@parser.parseError(_("Missing end tag%s (%s)" % [ parse_error(_("Missing end tag%s (%s)" % [(i>1 ? 's' : ''), poppedNodes.reverse.map{|item| item.name}.join(', ')]))
(i>1 ? 's' : ''),
poppedNodes.reverse.map {|item| item.name}.join(', ')]))
end end
break break
end end
# Phrasing elements are all non special, non scoping, non # Phrasing elements are all non special, non scoping, non
# formatting elements # formatting elements
break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) and break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) && !%w[address div].include?(node.name))
not ['address', 'div'].include?(node.name))
end end
# Always insert an <li> element. # Always insert an <li> element.
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
end end
def startTagPlaintext(name, attributes) def startTagPlaintext(name, attributes)
endTagP('p') if in_scope?('p') endTagP('p') if in_scope?('p')
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
@parser.tokenizer.contentModelFlag = :PLAINTEXT @parser.tokenizer.content_model_flag = :PLAINTEXT
end end
def startTagHeading(name, attributes) def startTagHeading(name, attributes)
@ -158,7 +169,7 @@ module HTML5
# Uncomment the following for IE7 behavior: # Uncomment the following for IE7 behavior:
# HEADING_ELEMENTS.each do |element| # HEADING_ELEMENTS.each do |element|
# if in_scope?(element) # if in_scope?(element)
# @parser.parseError(_("Unexpected start tag (#{name}).")) # parse_error(_("Unexpected start tag (#{name})."))
# #
# remove_open_elements_until do |element| # remove_open_elements_until do |element|
# HEADING_ELEMENTS.include?(element.name) # HEADING_ELEMENTS.include?(element.name)
@ -167,14 +178,14 @@ module HTML5
# break # break
# end # end
# end # end
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
end end
def startTagA(name, attributes) def startTagA(name, attributes)
if afeAElement = @tree.elementInActiveFormattingElements('a') if afeAElement = @tree.elementInActiveFormattingElements('a')
@parser.parseError(_('Unexpected start tag (a) implies end tag (a).')) parse_error(_('Unexpected start tag (a) implies end tag (a).'))
endTagFormatting('a') endTagFormatting('a')
@tree.openElements.delete(afeAElement) if @tree.openElements.include?(afeAElement) @tree.open_elements.delete(afeAElement) if @tree.open_elements.include?(afeAElement)
@tree.activeFormattingElements.delete(afeAElement) if @tree.activeFormattingElements.include?(afeAElement) @tree.activeFormattingElements.delete(afeAElement) if @tree.activeFormattingElements.include?(afeAElement)
end end
@tree.reconstructActiveFormattingElements @tree.reconstructActiveFormattingElements
@ -188,77 +199,82 @@ module HTML5
def startTagNobr(name, attributes) def startTagNobr(name, attributes)
@tree.reconstructActiveFormattingElements @tree.reconstructActiveFormattingElements
processEndTag('nobr') if in_scope?('nobr') if in_scope?('nobr')
parse_error(_('Unexpected start tag (nobr) implies end tag (nobr).'))
processEndTag('nobr')
# XXX Need tests that trigger the following
@tree.reconstructActiveFormattingElements
end
addFormattingElement(name, attributes) addFormattingElement(name, attributes)
end end
def startTagButton(name, attributes) def startTagButton(name, attributes)
if in_scope?('button') if in_scope?('button')
@parser.parseError(_('Unexpected start tag (button) implied end tag (button).')) parse_error(_('Unexpected start tag (button) implied end tag (button).'))
processEndTag('button') processEndTag('button')
@parser.phase.processStartTag(name, attributes) @parser.phase.processStartTag(name, attributes)
else else
@tree.reconstructActiveFormattingElements @tree.reconstructActiveFormattingElements
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
@tree.activeFormattingElements.push(Marker) @tree.activeFormattingElements.push(Marker)
end end
end end
def startTagMarqueeObject(name, attributes) def startTagMarqueeObject(name, attributes)
@tree.reconstructActiveFormattingElements @tree.reconstructActiveFormattingElements
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
@tree.activeFormattingElements.push(Marker) @tree.activeFormattingElements.push(Marker)
end end
def startTagXmp(name, attributes) def startTagXmp(name, attributes)
@tree.reconstructActiveFormattingElements @tree.reconstructActiveFormattingElements
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
@parser.tokenizer.contentModelFlag = :CDATA @parser.tokenizer.content_model_flag = :CDATA
end end
def startTagTable(name, attributes) def startTagTable(name, attributes)
processEndTag('p') if in_scope?('p') processEndTag('p') if in_scope?('p')
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inTable] @parser.phase = @parser.phases[:inTable]
end end
def startTagVoidFormatting(name, attributes) def startTagVoidFormatting(name, attributes)
@tree.reconstructActiveFormattingElements @tree.reconstructActiveFormattingElements
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
@tree.openElements.pop @tree.open_elements.pop
end end
def startTagHr(name, attributes) def startTagHr(name, attributes)
endTagP('p') if in_scope?('p') endTagP('p') if in_scope?('p')
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
@tree.openElements.pop @tree.open_elements.pop
end end
def startTagImage(name, attributes) def startTagImage(name, attributes)
# No really... # No really...
@parser.parseError(_('Unexpected start tag (image). Treated as img.')) parse_error(_('Unexpected start tag (image). Treated as img.'))
processStartTag('img', attributes) processStartTag('img', attributes)
end end
def startTagInput(name, attributes) def startTagInput(name, attributes)
@tree.reconstructActiveFormattingElements @tree.reconstructActiveFormattingElements
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
if @tree.formPointer if @tree.formPointer
# XXX Not exactly sure what to do here # XXX Not exactly sure what to do here
# @tree.openElements[-1].form = @tree.formPointer # @tree.open_elements[-1].form = @tree.formPointer
end end
@tree.openElements.pop @tree.open_elements.pop
end end
def startTagIsindex(name, attributes) def startTagIsindex(name, attributes)
@parser.parseError(_("Unexpected start tag isindex. Don't use it!")) parse_error(_("Unexpected start tag isindex. Don't use it!"))
return if @tree.formPointer return if @tree.formPointer
processStartTag('form', {}) processStartTag('form', {})
processStartTag('hr', {}) processStartTag('hr', {})
processStartTag('p', {}) processStartTag('p', {})
processStartTag('label', {}) processStartTag('label', {})
# XXX Localization ... # XXX Localization ...
processCharacters('This is a searchable index. Insert your search keywords here:') processCharacters('This is a searchable index. Insert your search keywords here: ')
attributes['name'] = 'isindex' attributes['name'] = 'isindex'
attrs = attributes.to_a attrs = attributes.to_a
processStartTag('input', attributes) processStartTag('input', attributes)
@ -270,20 +286,21 @@ module HTML5
def startTagTextarea(name, attributes) def startTagTextarea(name, attributes)
# XXX Form element pointer checking here as well... # XXX Form element pointer checking here as well...
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
@parser.tokenizer.contentModelFlag = :RCDATA @parser.tokenizer.content_model_flag = :RCDATA
@processSpaceCharactersDropNewline = true @processSpaceCharactersDropNewline = true
alias processSpaceCharacters processSpaceCharactersDropNewline
end end
# iframe, noembed noframes, noscript(if scripting enabled) # iframe, noembed noframes, noscript(if scripting enabled)
def startTagCdata(name, attributes) def startTagCdata(name, attributes)
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
@parser.tokenizer.contentModelFlag = :CDATA @parser.tokenizer.content_model_flag = :CDATA
end end
def startTagSelect(name, attributes) def startTagSelect(name, attributes)
@tree.reconstructActiveFormattingElements @tree.reconstructActiveFormattingElements
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inSelect] @parser.phase = @parser.phases[:inSelect]
end end
@ -293,7 +310,7 @@ module HTML5
# "caption", "col", "colgroup", "frame", "frameset", "head", # "caption", "col", "colgroup", "frame", "frameset", "head",
# "option", "optgroup", "tbody", "td", "tfoot", "th", "thead", # "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
# "tr", "noscript" # "tr", "noscript"
@parser.parseError(_("Unexpected start tag (#{name}). Ignored.")) parse_error(_("Unexpected start tag (#{name}). Ignored."))
end end
def startTagNew(name, attributes) def startTagNew(name, attributes)
@ -306,14 +323,14 @@ module HTML5
def startTagOther(name, attributes) def startTagOther(name, attributes)
@tree.reconstructActiveFormattingElements @tree.reconstructActiveFormattingElements
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
end end
def endTagP(name) def endTagP(name)
@tree.generateImpliedEndTags('p') if in_scope?('p') @tree.generateImpliedEndTags('p') if in_scope?('p')
@parser.parseError(_('Unexpected end tag (p).')) unless @tree.openElements[-1].name == 'p' parse_error(_('Unexpected end tag (p).')) unless @tree.open_elements.last.name == 'p'
if in_scope?('p') if in_scope?('p')
@tree.openElements.pop while in_scope?('p') @tree.open_elements.pop while in_scope?('p')
else else
startTagCloseP('p', {}) startTagCloseP('p', {})
endTagP('p') endTagP('p')
@ -324,20 +341,20 @@ module HTML5
# XXX Need to take open <p> tags into account here. We shouldn't imply # XXX Need to take open <p> tags into account here. We shouldn't imply
# </p> but we should not throw a parse error either. Specification is # </p> but we should not throw a parse error either. Specification is
# likely to be updated. # likely to be updated.
unless @tree.openElements[1].name == 'body' unless @tree.open_elements[1].name == 'body'
# innerHTML case # inner_html case
@parser.parseError parse_error
return return
end end
unless @tree.openElements[-1].name == 'body' unless @tree.open_elements.last.name == 'body'
@parser.parseError(_("Unexpected end tag (body). Missing end tag (#{@tree.openElements[-1].name}).")) parse_error(_("Unexpected end tag (body). Missing end tag (#{@tree.open_elements[-1].name})."))
end end
@parser.phase = @parser.phases[:afterBody] @parser.phase = @parser.phases[:afterBody]
end end
def endTagHtml(name) def endTagHtml(name)
endTagBody(name) endTagBody(name)
@parser.phase.processEndTag(name) unless @parser.innerHTML @parser.phase.processEndTag(name) unless @parser.inner_html
end end
def endTagBlock(name) def endTagBlock(name)
@ -346,8 +363,8 @@ module HTML5
@tree.generateImpliedEndTags if in_scope?(name) @tree.generateImpliedEndTags if in_scope?(name)
unless @tree.openElements[-1].name == name unless @tree.open_elements.last.name == name
@parser.parseError(_("End tag (#{name}) seen too early. Expected other end tag.")) parse_error(_("End tag (#{name}) seen too early. Expected other end tag."))
end end
if in_scope?(name) if in_scope?(name)
@ -359,22 +376,20 @@ module HTML5
if in_scope?(name) if in_scope?(name)
@tree.generateImpliedEndTags @tree.generateImpliedEndTags
end end
if @tree.openElements[-1].name != name if @tree.open_elements.last.name != name
@parser.parseError(_("End tag (form) seen too early. Ignored.")) parse_error(_("End tag (form) seen too early. Ignored."))
else else
@tree.openElements.pop @tree.open_elements.pop
end end
@tree.formPointer = nil @tree.formPointer = nil
end end
def endTagListItem(name) def endTagListItem(name)
# AT Could merge this with the Block case # AT Could merge this with the Block case
if in_scope?(name) @tree.generateImpliedEndTags(name) if in_scope?(name)
@tree.generateImpliedEndTags(name)
unless @tree.openElements[-1].name == name unless @tree.open_elements.last.name == name
@parser.parseError(_("End tag (#{name}) seen too early. Expected other end tag.")) parse_error(_("End tag (#{name}) seen too early. " + 'Expected other end tag.'))
end
end end
remove_open_elements_until(name) if in_scope?(name) remove_open_elements_until(name) if in_scope?(name)
@ -388,13 +403,13 @@ module HTML5
end end
end end
unless @tree.openElements[-1].name == name unless @tree.open_elements.last.name == name
@parser.parseError(_("Unexpected end tag (#{name}). Expected other end tag.")) parse_error(_("Unexpected end tag (#{name}). Expected other end tag."))
end end
HEADING_ELEMENTS.each do |element| HEADING_ELEMENTS.each do |element|
if in_scope?(element) if in_scope?(element)
remove_open_elements_until { |element| HEADING_ELEMENTS.include?(element.name) } remove_open_elements_until {|element| HEADING_ELEMENTS.include?(element.name)}
break break
end end
end end
@ -403,30 +418,30 @@ module HTML5
# The much-feared adoption agency algorithm # The much-feared adoption agency algorithm
def endTagFormatting(name) def endTagFormatting(name)
# http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency # http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
# XXX Better parseError messages appreciated. # XXX Better parse_error messages appreciated.
while true while true
# Step 1 paragraph 1 # Step 1 paragraph 1
afeElement = @tree.elementInActiveFormattingElements(name) afeElement = @tree.elementInActiveFormattingElements(name)
if not afeElement or (@tree.openElements.include?(afeElement) and not in_scope?(afeElement.name)) if !afeElement or (@tree.open_elements.include?(afeElement) && !in_scope?(afeElement.name))
@parser.parseError(_("End tag (#{name}) violates step 1, paragraph 1 of the adoption agency algorithm.")) parse_error(_("End tag (#{name}) violates step 1, paragraph 1 of the adoption agency algorithm."))
return return
# Step 1 paragraph 2 # Step 1 paragraph 2
elsif not @tree.openElements.include?(afeElement) elsif not @tree.open_elements.include?(afeElement)
@parser.parseError(_("End tag (#{name}) violates step 1, paragraph 2 of the adoption agency algorithm.")) parse_error(_("End tag (#{name}) violates step 1, paragraph 2 of the adoption agency algorithm."))
@tree.activeFormattingElements.delete(afeElement) @tree.activeFormattingElements.delete(afeElement)
return return
end end
# Step 1 paragraph 3 # Step 1 paragraph 3
if afeElement != @tree.openElements[-1] if afeElement != @tree.open_elements.last
@parser.parseError(_("End tag (#{name}) violates step 1, paragraph 3 of the adoption agency algorithm.")) parse_error(_("End tag (#{name}) violates step 1, paragraph 3 of the adoption agency algorithm."))
end end
# Step 2 # Step 2
# Start of the adoption agency algorithm proper # Start of the adoption agency algorithm proper
afeIndex = @tree.openElements.index(afeElement) afeIndex = @tree.open_elements.index(afeElement)
furthestBlock = nil furthestBlock = nil
@tree.openElements[afeIndex..-1].each do |element| @tree.open_elements[afeIndex..-1].each do |element|
if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(element.name) if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(element.name)
furthestBlock = element furthestBlock = element
break break
@ -435,11 +450,11 @@ module HTML5
# Step 3 # Step 3
if furthestBlock.nil? if furthestBlock.nil?
element = remove_open_elements_until { |element| element == afeElement } element = remove_open_elements_until {|element| element == afeElement }
@tree.activeFormattingElements.delete(element) @tree.activeFormattingElements.delete(element)
return return
end end
commonAncestor = @tree.openElements[afeIndex - 1] commonAncestor = @tree.open_elements[afeIndex - 1]
# Step 5 # Step 5
furthestBlock.parent.removeChild(furthestBlock) if furthestBlock.parent furthestBlock.parent.removeChild(furthestBlock) if furthestBlock.parent
@ -456,11 +471,11 @@ module HTML5
while true while true
# AT replace this with a function and recursion? # AT replace this with a function and recursion?
# Node is element before node in open elements # Node is element before node in open elements
node = @tree.openElements[@tree.openElements.index(node) - 1] node = @tree.open_elements[@tree.open_elements.index(node) - 1]
until @tree.activeFormattingElements.include?(node) until @tree.activeFormattingElements.include?(node)
tmpNode = node tmpNode = node
node = @tree.openElements[@tree.openElements.index(node) - 1] node = @tree.open_elements[@tree.open_elements.index(node) - 1]
@tree.openElements.delete(tmpNode) @tree.open_elements.delete(tmpNode)
end end
# Step 7.3 # Step 7.3
break if node == afeElement break if node == afeElement
@ -477,7 +492,7 @@ module HTML5
clone = node.cloneNode clone = node.cloneNode
# Replace node with clone # Replace node with clone
@tree.activeFormattingElements[@tree.activeFormattingElements.index(node)] = clone @tree.activeFormattingElements[@tree.activeFormattingElements.index(node)] = clone
@tree.openElements[@tree.openElements.index(node)] = clone @tree.open_elements[@tree.open_elements.index(node)] = clone
node = clone node = clone
end end
# Step 7.6 # Step 7.6
@ -507,47 +522,47 @@ module HTML5
@tree.activeFormattingElements.insert([bookmark,@tree.activeFormattingElements.length].min, clone) @tree.activeFormattingElements.insert([bookmark,@tree.activeFormattingElements.length].min, clone)
# Step 13 # Step 13
@tree.openElements.delete(afeElement) @tree.open_elements.delete(afeElement)
@tree.openElements.insert(@tree.openElements.index(furthestBlock) + 1, clone) @tree.open_elements.insert(@tree.open_elements.index(furthestBlock) + 1, clone)
end end
end end
def endTagButtonMarqueeObject(name) def endTagButtonMarqueeObject(name)
@tree.generateImpliedEndTags if in_scope?(name) @tree.generateImpliedEndTags if in_scope?(name)
unless @tree.openElements[-1].name == name unless @tree.open_elements.last.name == name
@parser.parseError(_("Unexpected end tag (#{name}). Expected other end tag first.")) parse_error(_("Unexpected end tag (#{name}). Expected other end tag first."))
end end
if in_scope?(name) if in_scope?(name)
remove_open_elements_until(name) remove_open_elements_until(name)
@tree.clearActiveFormattingElements @tree.clearActiveFormattingElements
end end
end end
def endTagMisplaced(name) def endTagMisplaced(name)
# This handles elements with end tags in other insertion modes. # This handles elements with end tags in other insertion modes.
@parser.parseError(_("Unexpected end tag (#{name}). Ignored.")) parse_error(_("Unexpected end tag (#{name}). Ignored."))
end end
def endTagBr(name) def endTagBr(name)
@parser.parseError(_("Unexpected end tag (br). Treated as br element.")) parse_error(_("Unexpected end tag (br). Treated as br element."))
@tree.reconstructActiveFormattingElements @tree.reconstructActiveFormattingElements
@tree.insertElement(name, {}) @tree.insert_element(name, {})
@tree.openElements.pop() @tree.open_elements.pop()
end end
def endTagNone(name) def endTagNone(name)
# This handles elements with no end tag. # This handles elements with no end tag.
@parser.parseError(_("This tag (#{name}) has no end tag")) parse_error(_("This tag (#{name}) has no end tag"))
end end
def endTagCdataTextAreaXmp(name) def endTagCdataTextAreaXmp(name)
if @tree.openElements[-1].name == name if @tree.open_elements.last.name == name
@tree.openElements.pop @tree.open_elements.pop
else else
@parser.parseError(_("Unexpected end tag (#{name}). Ignored.")) parse_error(_("Unexpected end tag (#{name}). Ignored."))
end end
end end
@ -561,20 +576,20 @@ module HTML5
def endTagOther(name) def endTagOther(name)
# XXX This logic should be moved into the treebuilder # XXX This logic should be moved into the treebuilder
@tree.openElements.reverse.each do |node| @tree.open_elements.reverse.each do |node|
if node.name == name if node.name == name
@tree.generateImpliedEndTags @tree.generateImpliedEndTags
unless @tree.openElements[-1].name == name unless @tree.open_elements.last.name == name
@parser.parseError(_("Unexpected end tag (#{name}).")) parse_error(_("Unexpected end tag (#{name})."))
end end
remove_open_elements_until { |element| element == node } remove_open_elements_until {|element| element == node }
break break
else else
if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name)
@parser.parseError(_("Unexpected end tag (#{name}). Ignored.")) parse_error(_("Unexpected end tag (#{name}). Ignored."))
break break
end end
end end
@ -584,8 +599,8 @@ module HTML5
protected protected
def addFormattingElement(name, attributes) def addFormattingElement(name, attributes)
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
@tree.activeFormattingElements.push(@tree.openElements[-1]) @tree.activeFormattingElements.push(@tree.open_elements.last)
end end
end end

View file

@ -10,7 +10,7 @@ module HTML5
handle_end 'caption', 'table', %w( body col colgroup html tbody td tfoot th thead tr ) => 'Ignore' handle_end 'caption', 'table', %w( body col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
def ignoreEndTagCaption def ignoreEndTagCaption
not in_scope?('caption', true) !in_scope?('caption', true)
end end
def processCharacters(data) def processCharacters(data)
@ -18,7 +18,7 @@ module HTML5
end end
def startTagTableElement(name, attributes) def startTagTableElement(name, attributes)
@parser.parseError parse_error
#XXX Have to duplicate logic here to find out if the tag is ignored #XXX Have to duplicate logic here to find out if the tag is ignored
ignoreEndTag = ignoreEndTagCaption ignoreEndTag = ignoreEndTagCaption
@parser.phase.processEndTag('caption') @parser.phase.processEndTag('caption')
@ -31,15 +31,15 @@ module HTML5
def endTagCaption(name) def endTagCaption(name)
if ignoreEndTagCaption if ignoreEndTagCaption
# innerHTML case # inner_html case
assert @parser.innerHTML assert @parser.inner_html
@parser.parseError parse_error
else else
# AT this code is quite similar to endTagTable in "InTable" # AT this code is quite similar to endTagTable in "InTable"
@tree.generateImpliedEndTags @tree.generateImpliedEndTags
unless @tree.openElements[-1].name == 'caption' unless @tree.open_elements[-1].name == 'caption'
@parser.parseError(_("Unexpected end tag (caption). Missing end tags.")) parse_error(_("Unexpected end tag (caption). Missing end tags."))
end end
remove_open_elements_until('caption') remove_open_elements_until('caption')
@ -50,14 +50,14 @@ module HTML5
end end
def endTagTable(name) def endTagTable(name)
@parser.parseError parse_error
ignoreEndTag = ignoreEndTagCaption ignoreEndTag = ignoreEndTagCaption
@parser.phase.processEndTag('caption') @parser.phase.processEndTag('caption')
@parser.phase.processEndTag(name) unless ignoreEndTag @parser.phase.processEndTag(name) unless ignoreEndTag
end end
def endTagIgnore(name) def endTagIgnore(name)
@parser.parseError(_("Unexpected end tag (#{name}). Ignored.")) parse_error(_("Unexpected end tag (#{name}). Ignored."))
end end
def endTagOther(name) def endTagOther(name)

View file

@ -20,8 +20,8 @@ module HTML5
closeCell closeCell
@parser.phase.processStartTag(name, attributes) @parser.phase.processStartTag(name, attributes)
else else
# innerHTML case # inner_html case
@parser.parseError parse_error
end end
end end
@ -32,22 +32,22 @@ module HTML5
def endTagTableCell(name) def endTagTableCell(name)
if in_scope?(name, true) if in_scope?(name, true)
@tree.generateImpliedEndTags(name) @tree.generateImpliedEndTags(name)
if @tree.openElements[-1].name != name if @tree.open_elements.last.name != name
@parser.parseError("Got table cell end tag (#{name}) while required end tags are missing.") parse_error("Got table cell end tag (#{name}) while required end tags are missing.")
remove_open_elements_until(name) remove_open_elements_until(name)
else else
@tree.openElements.pop @tree.open_elements.pop
end end
@tree.clearActiveFormattingElements @tree.clearActiveFormattingElements
@parser.phase = @parser.phases[:inRow] @parser.phase = @parser.phases[:inRow]
else else
@parser.parseError(_("Unexpected end tag (#{name}). Ignored.")) parse_error(_("Unexpected end tag (#{name}). Ignored."))
end end
end end
def endTagIgnore(name) def endTagIgnore(name)
@parser.parseError(_("Unexpected end tag (#{name}). Ignored.")) parse_error(_("Unexpected end tag (#{name}). Ignored."))
end end
def endTagImply(name) def endTagImply(name)
@ -55,8 +55,8 @@ module HTML5
closeCell closeCell
@parser.phase.processEndTag(name) @parser.phase.processEndTag(name)
else else
# sometimes innerHTML case # sometimes inner_html case
@parser.parseError parse_error
end end
end end

View file

@ -10,7 +10,7 @@ module HTML5
handle_end 'colgroup', 'col' handle_end 'colgroup', 'col'
def ignoreEndTagColgroup def ignoreEndTagColgroup
@tree.openElements[-1].name == 'html' @tree.open_elements[-1].name == 'html'
end end
def processCharacters(data) def processCharacters(data)
@ -20,8 +20,8 @@ module HTML5
end end
def startTagCol(name, attributes) def startTagCol(name, attributes)
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
@tree.openElements.pop @tree.open_elements.pop
end end
def startTagOther(name, attributes) def startTagOther(name, attributes)
@ -32,17 +32,17 @@ module HTML5
def endTagColgroup(name) def endTagColgroup(name)
if ignoreEndTagColgroup if ignoreEndTagColgroup
# innerHTML case # inner_html case
assert @parser.innerHTML assert @parser.inner_html
@parser.parseError parse_error
else else
@tree.openElements.pop @tree.open_elements.pop
@parser.phase = @parser.phases[:inTable] @parser.phase = @parser.phases[:inTable]
end end
end end
def endTagCol(name) def endTagCol(name)
@parser.parseError(_('Unexpected end tag (col). col has no end tag.')) parse_error(_('Unexpected end tag (col). col has no end tag.'))
end end
def endTagOther(name) def endTagOther(name)

View file

@ -10,16 +10,16 @@ module HTML5
handle_end 'frameset', 'noframes' handle_end 'frameset', 'noframes'
def processCharacters(data) def processCharacters(data)
@parser.parseError(_('Unexpected characters in the frameset phase. Characters ignored.')) parse_error(_('Unexpected characters in the frameset phase. Characters ignored.'))
end end
def startTagFrameset(name, attributes) def startTagFrameset(name, attributes)
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
end end
def startTagFrame(name, attributes) def startTagFrame(name, attributes)
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
@tree.openElements.pop @tree.open_elements.pop
end end
def startTagNoframes(name, attributes) def startTagNoframes(name, attributes)
@ -27,19 +27,19 @@ module HTML5
end end
def startTagOther(name, attributes) def startTagOther(name, attributes)
@parser.parseError(_("Unexpected start tag token (#{name}) in the frameset phase. Ignored")) parse_error(_("Unexpected start tag token (#{name}) in the frameset phase. Ignored"))
end end
def endTagFrameset(name) def endTagFrameset(name)
if @tree.openElements[-1].name == 'html' if @tree.open_elements.last.name == 'html'
# innerHTML case # inner_html case
@parser.parseError(_("Unexpected end tag token (frameset) in the frameset phase (innerHTML).")) parse_error(_("Unexpected end tag token (frameset) in the frameset phase (inner_html)."))
else else
@tree.openElements.pop @tree.open_elements.pop
end end
if (not @parser.innerHTML and if (not @parser.inner_html and
@tree.openElements[-1].name != 'frameset') @tree.open_elements.last.name != 'frameset')
# If we're not in innerHTML mode and the the current node is not a # If we're not in inner_html mode and the the current node is not a
# "frameset" element (anymore) then switch. # "frameset" element (anymore) then switch.
@parser.phase = @parser.phases[:afterFrameset] @parser.phase = @parser.phases[:afterFrameset]
end end
@ -50,7 +50,7 @@ module HTML5
end end
def endTagOther(name) def endTagOther(name)
@parser.parseError(_("Unexpected end tag token (#{name}) in the frameset phase. Ignored.")) parse_error(_("Unexpected end tag token (#{name}) in the frameset phase. Ignored."))
end end
end end

View file

@ -3,108 +3,120 @@ require 'html5/html5parser/phase'
module HTML5 module HTML5
class InHeadPhase < Phase class InHeadPhase < Phase
handle_start 'html', 'head', 'title', 'style', 'script', %w( base link meta ) handle_start 'html', 'head', 'title', 'style', 'script', 'noscript'
handle_start %w( base link meta )
handle_end 'head' handle_end 'head'
handle_end %w( html body br p ) => 'ImplyAfterHead' handle_end %w( html body br p ) => 'ImplyAfterHead'
handle_end %w( title style script ) handle_end %w( title style script noscript )
def processEOF def process_eof
if ['title', 'style', 'script'].include?(name = @tree.openElements[-1].name) if ['title', 'style', 'script'].include?(name = @tree.open_elements.last.name)
@parser.parseError(_("Unexpected end of file. Expected end tag (#{name}).")) parse_error(_("Unexpected end of file. Expected end tag (#{name})."))
@tree.openElements.pop @tree.open_elements.pop
end end
anythingElse anything_else
@parser.phase.processEOF @parser.phase.process_eof
end end
def processCharacters(data) def processCharacters(data)
if ['title', 'style', 'script'].include?(@tree.openElements[-1].name) if %w[title style script noscript].include?(@tree.open_elements.last.name)
@tree.insertText(data) @tree.insertText(data)
else else
anythingElse anything_else
@parser.phase.processCharacters(data) @parser.phase.processCharacters(data)
end end
end end
def startTagHead(name, attributes) def startTagHead(name, attributes)
@parser.parseError(_('Unexpected start tag head in existing head. Ignored')) parse_error(_('Unexpected start tag head in existing head. Ignored'))
end end
def startTagTitle(name, attributes) def startTagTitle(name, attributes)
element = @tree.createElement(name, attributes) element = @tree.createElement(name, attributes)
appendToHead(element) appendToHead(element)
@tree.openElements.push(element) @tree.open_elements.push(element)
@parser.tokenizer.contentModelFlag = :RCDATA @parser.tokenizer.content_model_flag = :RCDATA
end end
def startTagStyle(name, attributes) def startTagStyle(name, attributes)
element = @tree.createElement(name, attributes) element = @tree.createElement(name, attributes)
if @tree.headPointer != nil and @parser.phase == @parser.phases[:inHead] if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
appendToHead(element) appendToHead(element)
else else
@tree.openElements[-1].appendChild(element) @tree.open_elements.last.appendChild(element)
end end
@tree.openElements.push(element) @tree.open_elements.push(element)
@parser.tokenizer.contentModelFlag = :CDATA @parser.tokenizer.content_model_flag = :CDATA
end
def startTagNoscript(name, attributes)
# XXX Need to decide whether to implement the scripting disabled case.
element = @tree.createElement(name, attributes)
if @tree.head_pointer !=nil and @parser.phase == @parser.phases[:inHead]
appendToHead(element)
else
@tree.open_elements.last.appendChild(element)
end
@tree.open_elements.push(element)
@parser.tokenizer.content_model_flag = :CDATA
end end
def startTagScript(name, attributes) def startTagScript(name, attributes)
#XXX Inner HTML case may be wrong #XXX Inner HTML case may be wrong
element = @tree.createElement(name, attributes) element = @tree.createElement(name, attributes)
element._flags.push("parser-inserted") element._flags.push("parser-inserted")
if (@tree.headPointer != nil and if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
@parser.phase == @parser.phases[:inHead])
appendToHead(element) appendToHead(element)
else else
@tree.openElements[-1].appendChild(element) @tree.open_elements.last.appendChild(element)
end end
@tree.openElements.push(element) @tree.open_elements.push(element)
@parser.tokenizer.contentModelFlag = :CDATA @parser.tokenizer.content_model_flag = :CDATA
end end
def startTagBaseLinkMeta(name, attributes) def startTagBaseLinkMeta(name, attributes)
element = @tree.createElement(name, attributes) element = @tree.createElement(name, attributes)
if @tree.headPointer != nil and @parser.phase == @parser.phases[:inHead] if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
appendToHead(element) appendToHead(element)
else else
@tree.openElements[-1].appendChild(element) @tree.open_elements.last.appendChild(element)
end end
end end
def startTagOther(name, attributes) def startTagOther(name, attributes)
anythingElse anything_else
@parser.phase.processStartTag(name, attributes) @parser.phase.processStartTag(name, attributes)
end end
def endTagHead(name) def endTagHead(name)
if @tree.openElements[-1].name == 'head' if @tree.open_elements.last.name == 'head'
@tree.openElements.pop @tree.open_elements.pop
else else
@parser.parseError(_("Unexpected end tag (head). Ignored.")) parse_error(_("Unexpected end tag (head). Ignored."))
end end
@parser.phase = @parser.phases[:afterHead] @parser.phase = @parser.phases[:afterHead]
end end
def endTagImplyAfterHead(name) def endTagImplyAfterHead(name)
anythingElse anything_else
@parser.phase.processEndTag(name) @parser.phase.processEndTag(name)
end end
def endTagTitleStyleScript(name) def endTagTitleStyleScriptNoscript(name)
if @tree.openElements[-1].name == name if @tree.open_elements.last.name == name
@tree.openElements.pop @tree.open_elements.pop
else else
@parser.parseError(_("Unexpected end tag (#{name}). Ignored.")) parse_error(_("Unexpected end tag (#{name}). Ignored."))
end end
end end
def endTagOther(name) def endTagOther(name)
@parser.parseError(_("Unexpected end tag (#{name}). Ignored.")) parse_error(_("Unexpected end tag (#{name}). Ignored."))
end end
def anythingElse def anything_else
if @tree.openElements[-1].name == 'head' if @tree.open_elements.last.name == 'head'
endTagHead('head') endTagHead('head')
else else
@parser.phase = @parser.phases[:afterHead] @parser.phase = @parser.phases[:afterHead]
@ -114,11 +126,11 @@ module HTML5
protected protected
def appendToHead(element) def appendToHead(element)
if @tree.headPointer.nil? if @tree.head_pointer.nil?
assert @parser.innerHTML assert @parser.inner_html
@tree.openElements[-1].appendChild(element) @tree.open_elements.last.appendChild(element)
else else
@tree.headPointer.appendChild(element) @tree.head_pointer.appendChild(element)
end end
end end

View file

@ -15,7 +15,7 @@ module HTML5
def startTagTableCell(name, attributes) def startTagTableCell(name, attributes)
clearStackToTableRowContext clearStackToTableRowContext
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inCell] @parser.phase = @parser.phases[:inCell]
@tree.activeFormattingElements.push(Marker) @tree.activeFormattingElements.push(Marker)
end end
@ -23,7 +23,7 @@ module HTML5
def startTagTableOther(name, attributes) def startTagTableOther(name, attributes)
ignoreEndTag = ignoreEndTagTr ignoreEndTag = ignoreEndTagTr
endTagTr('tr') endTagTr('tr')
# XXX how are we sure it's always ignored in the innerHTML case? # XXX how are we sure it's always ignored in the inner_html case?
@parser.phase.processStartTag(name, attributes) unless ignoreEndTag @parser.phase.processStartTag(name, attributes) unless ignoreEndTag
end end
@ -33,12 +33,12 @@ module HTML5
def endTagTr(name) def endTagTr(name)
if ignoreEndTagTr if ignoreEndTagTr
# innerHTML case # inner_html case
assert @parser.innerHTML assert @parser.inner_html
@parser.parseError parse_error
else else
clearStackToTableRowContext clearStackToTableRowContext
@tree.openElements.pop @tree.open_elements.pop
@parser.phase = @parser.phases[:inTableBody] @parser.phase = @parser.phases[:inTableBody]
end end
end end
@ -47,7 +47,7 @@ module HTML5
ignoreEndTag = ignoreEndTagTr ignoreEndTag = ignoreEndTagTr
endTagTr('tr') endTagTr('tr')
# Reprocess the current tag if the tr end tag was not ignored # Reprocess the current tag if the tr end tag was not ignored
# XXX how are we sure it's always ignored in the innerHTML case? # XXX how are we sure it's always ignored in the inner_html case?
@parser.phase.processEndTag(name) unless ignoreEndTag @parser.phase.processEndTag(name) unless ignoreEndTag
end end
@ -56,13 +56,13 @@ module HTML5
endTagTr('tr') endTagTr('tr')
@parser.phase.processEndTag(name) @parser.phase.processEndTag(name)
else else
# innerHTML case # inner_html case
@parser.parseError parse_error
end end
end end
def endTagIgnore(name) def endTagIgnore(name)
@parser.parseError(_("Unexpected end tag (#{name}) in the row phase. Ignored.")) parse_error(_("Unexpected end tag (#{name}) in the row phase. Ignored."))
end end
def endTagOther(name) def endTagOther(name)
@ -73,9 +73,9 @@ module HTML5
# XXX unify this with other table helper methods # XXX unify this with other table helper methods
def clearStackToTableRowContext def clearStackToTableRowContext
until ['tr', 'html'].include?(name = @tree.openElements[-1].name) until %w[tr html].include?(name = @tree.open_elements.last.name)
@parser.parseError(_("Unexpected implied end tag (#{name}) in the row phase.")) parse_error(_("Unexpected implied end tag (#{name}) in the row phase."))
@tree.openElements.pop @tree.open_elements.pop
end end
end end

View file

@ -15,44 +15,44 @@ module HTML5
def startTagOption(name, attributes) def startTagOption(name, attributes)
# We need to imply </option> if <option> is the current node. # We need to imply </option> if <option> is the current node.
@tree.openElements.pop if @tree.openElements[-1].name == 'option' @tree.open_elements.pop if @tree.open_elements.last.name == 'option'
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
end end
def startTagOptgroup(name, attributes) def startTagOptgroup(name, attributes)
@tree.openElements.pop if @tree.openElements[-1].name == 'option' @tree.open_elements.pop if @tree.open_elements.last.name == 'option'
@tree.openElements.pop if @tree.openElements[-1].name == 'optgroup' @tree.open_elements.pop if @tree.open_elements.last.name == 'optgroup'
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
end end
def startTagSelect(name, attributes) def startTagSelect(name, attributes)
@parser.parseError(_('Unexpected start tag (select) in the select phase implies select start tag.')) parse_error(_('Unexpected start tag (select) in the select phase implies select start tag.'))
endTagSelect('select') endTagSelect('select')
end end
def startTagOther(name, attributes) def startTagOther(name, attributes)
@parser.parseError(_('Unexpected start tag token (#{name}) in the select phase. Ignored.')) parse_error(_('Unexpected start tag token (#{name}) in the select phase. Ignored.'))
end end
def endTagOption(name) def endTagOption(name)
if @tree.openElements[-1].name == 'option' if @tree.open_elements.last.name == 'option'
@tree.openElements.pop @tree.open_elements.pop
else else
@parser.parseError(_('Unexpected end tag (option) in the select phase. Ignored.')) parse_error(_('Unexpected end tag (option) in the select phase. Ignored.'))
end end
end end
def endTagOptgroup(name) def endTagOptgroup(name)
# </optgroup> implicitly closes <option> # </optgroup> implicitly closes <option>
if @tree.openElements[-1].name == 'option' and @tree.openElements[-2].name == 'optgroup' if @tree.open_elements.last.name == 'option' and @tree.open_elements[-2].name == 'optgroup'
@tree.openElements.pop @tree.open_elements.pop
end end
# It also closes </optgroup> # It also closes </optgroup>
if @tree.openElements[-1].name == 'optgroup' if @tree.open_elements.last.name == 'optgroup'
@tree.openElements.pop @tree.open_elements.pop
# But nothing else # But nothing else
else else
@parser.parseError(_('Unexpected end tag (optgroup) in the select phase. Ignored.')) parse_error(_('Unexpected end tag (optgroup) in the select phase. Ignored.'))
end end
end end
@ -60,15 +60,15 @@ module HTML5
if in_scope?('select', true) if in_scope?('select', true)
remove_open_elements_until('select') remove_open_elements_until('select')
@parser.resetInsertionMode @parser.reset_insertion_mode
else else
# innerHTML case # inner_html case
@parser.parseError parse_error
end end
end end
def endTagTableElements(name) def endTagTableElements(name)
@parser.parseError(_("Unexpected table end tag (#{name}) in the select phase.")) parse_error(_("Unexpected table end tag (#{name}) in the select phase."))
if in_scope?(name, true) if in_scope?(name, true)
endTagSelect('select') endTagSelect('select')
@ -77,7 +77,7 @@ module HTML5
end end
def endTagOther(name) def endTagOther(name)
@parser.parseError(_("Unexpected end tag token (#{name}) in the select phase. Ignored.")) parse_error(_("Unexpected end tag token (#{name}) in the select phase. Ignored."))
end end
end end

View file

@ -15,12 +15,12 @@ module HTML5
def startTagTr(name, attributes) def startTagTr(name, attributes)
clearStackToTableBodyContext clearStackToTableBodyContext
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inRow] @parser.phase = @parser.phases[:inRow]
end end
def startTagTableCell(name, attributes) def startTagTableCell(name, attributes)
@parser.parseError(_("Unexpected table cell start tag (#{name}) in the table body phase.")) parse_error(_("Unexpected table cell start tag (#{name}) in the table body phase."))
startTagTr('tr', {}) startTagTr('tr', {})
@parser.phase.processStartTag(name, attributes) @parser.phase.processStartTag(name, attributes)
end end
@ -29,11 +29,11 @@ module HTML5
# XXX AT Any ideas on how to share this with endTagTable? # XXX AT Any ideas on how to share this with endTagTable?
if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true) if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
clearStackToTableBodyContext clearStackToTableBodyContext
endTagTableRowGroup(@tree.openElements[-1].name) endTagTableRowGroup(@tree.open_elements.last.name)
@parser.phase.processStartTag(name, attributes) @parser.phase.processStartTag(name, attributes)
else else
# innerHTML case # inner_html case
@parser.parseError parse_error
end end
end end
@ -44,26 +44,26 @@ module HTML5
def endTagTableRowGroup(name) def endTagTableRowGroup(name)
if in_scope?(name, true) if in_scope?(name, true)
clearStackToTableBodyContext clearStackToTableBodyContext
@tree.openElements.pop @tree.open_elements.pop
@parser.phase = @parser.phases[:inTable] @parser.phase = @parser.phases[:inTable]
else else
@parser.parseError(_("Unexpected end tag (#{name}) in the table body phase. Ignored.")) parse_error(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
end end
end end
def endTagTable(name) def endTagTable(name)
if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true) if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
clearStackToTableBodyContext clearStackToTableBodyContext
endTagTableRowGroup(@tree.openElements[-1].name) endTagTableRowGroup(@tree.open_elements.last.name)
@parser.phase.processEndTag(name) @parser.phase.processEndTag(name)
else else
# innerHTML case # inner_html case
@parser.parseError parse_error
end end
end end
def endTagIgnore(name) def endTagIgnore(name)
@parser.parseError(_("Unexpected end tag (#{name}) in the table body phase. Ignored.")) parse_error(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
end end
def endTagOther(name) def endTagOther(name)
@ -73,9 +73,9 @@ module HTML5
protected protected
def clearStackToTableBodyContext def clearStackToTableBodyContext
until ['tbody', 'tfoot', 'thead', 'html'].include?(name = @tree.openElements[-1].name) until %w[tbody tfoot thead html].include?(name = @tree.open_elements.last.name)
@parser.parseError(_("Unexpected implied end tag (#{name}) in the table body phase.")) parse_error(_("Unexpected implied end tag (#{name}) in the table body phase."))
@tree.openElements.pop @tree.open_elements.pop
end end
end end

View file

@ -12,24 +12,24 @@ module HTML5
handle_end 'table', %w( body caption col colgroup html tbody td tfoot th thead tr ) => 'Ignore' handle_end 'table', %w( body caption col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
def processCharacters(data) def processCharacters(data)
@parser.parseError(_("Unexpected non-space characters in table context caused voodoo mode.")) parse_error(_("Unexpected non-space characters in table context caused voodoo mode."))
# Make all the special element rearranging voodoo kick in # Make all the special element rearranging voodoo kick in
@tree.insertFromTable = true @tree.insert_from_table = true
# Process the character in the "in body" mode # Process the character in the "in body" mode
@parser.phases[:inBody].processCharacters(data) @parser.phases[:inBody].processCharacters(data)
@tree.insertFromTable = false @tree.insert_from_table = false
end end
def startTagCaption(name, attributes) def startTagCaption(name, attributes)
clearStackToTableContext clearStackToTableContext
@tree.activeFormattingElements.push(Marker) @tree.activeFormattingElements.push(Marker)
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inCaption] @parser.phase = @parser.phases[:inCaption]
end end
def startTagColgroup(name, attributes) def startTagColgroup(name, attributes)
clearStackToTableContext clearStackToTableContext
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inColumnGroup] @parser.phase = @parser.phases[:inColumnGroup]
end end
@ -40,7 +40,7 @@ module HTML5
def startTagRowGroup(name, attributes) def startTagRowGroup(name, attributes)
clearStackToTableContext clearStackToTableContext
@tree.insertElement(name, attributes) @tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inTableBody] @parser.phase = @parser.phases[:inTableBody]
end end
@ -50,60 +50,60 @@ module HTML5
end end
def startTagTable(name, attributes) def startTagTable(name, attributes)
@parser.parseError(_("Unexpected start tag (table) in table phase. Implies end tag (table).")) parse_error(_("Unexpected start tag (table) in table phase. Implies end tag (table)."))
@parser.phase.processEndTag('table') @parser.phase.processEndTag('table')
@parser.phase.processStartTag(name, attributes) unless @parser.innerHTML @parser.phase.processStartTag(name, attributes) unless @parser.inner_html
end end
def startTagOther(name, attributes) def startTagOther(name, attributes)
@parser.parseError(_("Unexpected start tag (#{name}) in table context caused voodoo mode.")) parse_error(_("Unexpected start tag (#{name}) in table context caused voodoo mode."))
# Make all the special element rearranging voodoo kick in # Make all the special element rearranging voodoo kick in
@tree.insertFromTable = true @tree.insert_from_table = true
# Process the start tag in the "in body" mode # Process the start tag in the "in body" mode
@parser.phases[:inBody].processStartTag(name, attributes) @parser.phases[:inBody].processStartTag(name, attributes)
@tree.insertFromTable = false @tree.insert_from_table = false
end end
def endTagTable(name) def endTagTable(name)
if in_scope?('table', true) if in_scope?('table', true)
@tree.generateImpliedEndTags @tree.generateImpliedEndTags
unless @tree.openElements[-1].name == 'table' unless @tree.open_elements.last.name == 'table'
@parser.parseError(_("Unexpected end tag (table). Expected end tag (#{@tree.openElements[-1].name}).")) parse_error(_("Unexpected end tag (table). Expected end tag (#{@tree.open_elements.last.name})."))
end end
remove_open_elements_until('table') remove_open_elements_until('table')
@parser.resetInsertionMode @parser.reset_insertion_mode
else else
# innerHTML case # inner_html case
assert @parser.innerHTML assert @parser.inner_html
@parser.parseError parse_error
end end
end end
def endTagIgnore(name) def endTagIgnore(name)
@parser.parseError(_("Unexpected end tag (#{name}). Ignored.")) parse_error(_("Unexpected end tag (#{name}). Ignored."))
end end
def endTagOther(name) def endTagOther(name)
@parser.parseError(_("Unexpected end tag (#{name}) in table context caused voodoo mode.")) parse_error(_("Unexpected end tag (#{name}) in table context caused voodoo mode."))
# Make all the special element rearranging voodoo kick in # Make all the special element rearranging voodoo kick in
@tree.insertFromTable = true @tree.insert_from_table = true
# Process the end tag in the "in body" mode # Process the end tag in the "in body" mode
@parser.phases[:inBody].processEndTag(name) @parser.phases[:inBody].processEndTag(name)
@tree.insertFromTable = false @tree.insert_from_table = false
end end
protected protected
def clearStackToTableContext def clearStackToTableContext
# "clear the stack back to a table context" # "clear the stack back to a table context"
until ['table', 'html'].include?(name = @tree.openElements[-1].name) until %w[table html].include?(name = @tree.open_elements.last.name)
@parser.parseError(_("Unexpected implied end tag (#{name}) in the table phase.")) parse_error(_("Unexpected implied end tag (#{name}) in the table phase."))
@tree.openElements.pop @tree.open_elements.pop
end end
# When the current node is <html> it's an innerHTML case # When the current node is <html> it's an inner_html case
end end
end end

View file

@ -7,22 +7,22 @@ module HTML5
# covered in the specification. The error handling is typically known as # covered in the specification. The error handling is typically known as
# "quirks mode". It is expected that a future version of HTML5 will define this. # "quirks mode". It is expected that a future version of HTML5 will define this.
def processEOF def process_eof
@parser.parseError(_('Unexpected End of file. Expected DOCTYPE.')) parse_error(_('Unexpected End of file. Expected DOCTYPE.'))
@parser.phase = @parser.phases[:rootElement] @parser.phase = @parser.phases[:rootElement]
@parser.phase.processEOF @parser.phase.process_eof
end end
def processComment(data) def processComment(data)
@tree.insertComment(data, @tree.document) @tree.insert_comment(data, @tree.document)
end end
def processDoctype(name, publicId, systemId, correct) def processDoctype(name, publicId, systemId, correct)
if name.downcase != 'html' or publicId or systemId if name.downcase != 'html' or publicId or systemId
@parser.parseError(_('Erroneous DOCTYPE.')) parse_error(_('Erroneous DOCTYPE.'))
end end
# XXX need to update DOCTYPE tokens # XXX need to update DOCTYPE tokens
@tree.insertDoctype(name) @tree.insertDoctype(name, publicId, systemId)
publicId = publicId.to_s.upcase publicId = publicId.to_s.upcase
@ -110,23 +110,22 @@ module HTML5
end end
def processSpaceCharacters(data) def processSpaceCharacters(data)
@tree.insertText(data, @tree.document)
end end
def processCharacters(data) def processCharacters(data)
@parser.parseError(_('Unexpected non-space characters. Expected DOCTYPE.')) parse_error(_('Unexpected non-space characters. Expected DOCTYPE.'))
@parser.phase = @parser.phases[:rootElement] @parser.phase = @parser.phases[:rootElement]
@parser.phase.processCharacters(data) @parser.phase.processCharacters(data)
end end
def processStartTag(name, attributes) def processStartTag(name, attributes)
@parser.parseError(_("Unexpected start tag (#{name}). Expected DOCTYPE.")) parse_error(_("Unexpected start tag (#{name}). Expected DOCTYPE."))
@parser.phase = @parser.phases[:rootElement] @parser.phase = @parser.phases[:rootElement]
@parser.phase.processStartTag(name, attributes) @parser.phase.processStartTag(name, attributes)
end end
def processEndTag(name) def processEndTag(name)
@parser.parseError(_("Unexpected end tag (#{name}). Expected DOCTYPE.")) parse_error(_("Unexpected end tag (#{name}). Expected DOCTYPE."))
@parser.phase = @parser.phases[:rootElement] @parser.phase = @parser.phases[:rootElement]
@parser.phase.processEndTag(name) @parser.phase.processEndTag(name)
end end

View file

@ -15,9 +15,12 @@ module HTML5
# #
class Phase class Phase
extend Forwardable
def_delegators :@parser, :parse_error
# The following example call: # The following example call:
# #
# tag_handlers('startTag', 'html', %( base link meta ), %( li dt dd ) => 'ListItem') # tag_handlers('startTag', 'html', %w( base link meta ), %w( li dt dd ) => 'ListItem')
# #
# ...would return a hash equal to this: # ...would return a hash equal to this:
# #
@ -34,15 +37,15 @@ module HTML5
if tags.last.is_a?(Hash) if tags.last.is_a?(Hash)
tags.pop.each do |names, handler_method_suffix| tags.pop.each do |names, handler_method_suffix|
handler_method = prefix + handler_method_suffix handler_method = prefix + handler_method_suffix
Array(names).each { |name| mapping[name] = handler_method } Array(names).each {|name| mapping[name] = handler_method }
end end
end end
tags.each do |names| tags.each do |names|
names = Array(names) names = Array(names)
handler_method = prefix + names.map { |name| name.capitalize }.join handler_method = prefix + names.map {|name| name.capitalize }.join
names.each { |name| mapping[name] = handler_method } names.each {|name| mapping[name] = handler_method }
end end
return mapping mapping
end end
def self.start_tag_handlers def self.start_tag_handlers
@ -80,17 +83,17 @@ module HTML5
@parser, @tree = parser, tree @parser, @tree = parser, tree
end end
def processEOF def process_eof
@tree.generateImpliedEndTags @tree.generateImpliedEndTags
if @tree.openElements.length > 2 if @tree.open_elements.length > 2
@parser.parseError(_('Unexpected end of file. Missing closing tags.')) parse_error(_('Unexpected end of file. Missing closing tags.'))
elsif @tree.openElements.length == 2 and @tree.openElements[1].name != 'body' elsif @tree.open_elements.length == 2 and @tree.open_elements[1].name != 'body'
# This happens for framesets or something? # This happens for framesets or something?
@parser.parseError(_("Unexpected end of file. Expected end tag (#{@tree.openElements[1].name}) first.")) parse_error(_("Unexpected end of file. Expected end tag (#{@tree.open_elements[1].name}) first."))
elsif @parser.innerHTML and @tree.openElements.length > 1 elsif @parser.inner_html and @tree.open_elements.length > 1
# XXX This is not what the specification says. Not sure what to do here. # XXX This is not what the specification says. Not sure what to do here.
@parser.parseError(_('XXX innerHTML EOF')) parse_error(_('XXX inner_html EOF'))
end end
# Betting ends. # Betting ends.
end end
@ -98,11 +101,11 @@ module HTML5
def processComment(data) def processComment(data)
# For most phases the following is correct. Where it's not it will be # For most phases the following is correct. Where it's not it will be
# overridden. # overridden.
@tree.insertComment(data, @tree.openElements[-1]) @tree.insert_comment(data, @tree.open_elements.last)
end end
def processDoctype(name, publicId, systemId, correct) def processDoctype(name, publicId, systemId, correct)
@parser.parseError(_('Unexpected DOCTYPE. Ignored.')) parse_error(_('Unexpected DOCTYPE. Ignored.'))
end end
def processSpaceCharacters(data) def processSpaceCharacters(data)
@ -114,17 +117,17 @@ module HTML5
end end
def startTagHtml(name, attributes) def startTagHtml(name, attributes)
if @parser.firstStartTag == false and name == 'html' if @parser.first_start_tag == false and name == 'html'
@parser.parseError(_('html needs to be the first start tag.')) parse_error(_('html needs to be the first start tag.'))
end end
# XXX Need a check here to see if the first start tag token emitted is # XXX Need a check here to see if the first start tag token emitted is
# this token... If it's not, invoke @parser.parseError. # this token... If it's not, invoke parse_error.
attributes.each do |attr, value| attributes.each do |attr, value|
unless @tree.openElements[0].attributes.has_key?(attr) unless @tree.open_elements.first.attributes.has_key?(attr)
@tree.openElements[0].attributes[attr] = value @tree.open_elements.first.attributes[attr] = value
end end
end end
@parser.firstStartTag = false @parser.first_start_tag = false
end end
def processEndTag(name) def processEndTag(name)
@ -146,11 +149,10 @@ module HTML5
def remove_open_elements_until(name=nil) def remove_open_elements_until(name=nil)
finished = false finished = false
until finished until finished
element = @tree.openElements.pop element = @tree.open_elements.pop
finished = name.nil?? yield(element) : element.name == name finished = name.nil? ? yield(element) : element.name == name
end end
return element return element
end end
end end
end end

View file

@ -3,38 +3,37 @@ require 'html5/html5parser/phase'
module HTML5 module HTML5
class RootElementPhase < Phase class RootElementPhase < Phase
def processEOF def process_eof
insertHtmlElement insert_html_element
@parser.phase.processEOF @parser.phase.process_eof
end end
def processComment(data) def processComment(data)
@tree.insertComment(data, @tree.document) @tree.insert_comment(data, @tree.document)
end end
def processSpaceCharacters(data) def processSpaceCharacters(data)
@tree.insertText(data, @tree.document)
end end
def processCharacters(data) def processCharacters(data)
insertHtmlElement insert_html_element
@parser.phase.processCharacters(data) @parser.phase.processCharacters(data)
end end
def processStartTag(name, attributes) def processStartTag(name, attributes)
@parser.firstStartTag = true if name == 'html' @parser.first_start_tag = true if name == 'html'
insertHtmlElement insert_html_element
@parser.phase.processStartTag(name, attributes) @parser.phase.processStartTag(name, attributes)
end end
def processEndTag(name) def processEndTag(name)
insertHtmlElement insert_html_element
@parser.phase.processEndTag(name) @parser.phase.processEndTag(name)
end end
def insertHtmlElement def insert_html_element
element = @tree.createElement('html', {}) element = @tree.createElement('html', {})
@tree.openElements.push(element) @tree.open_elements.push(element)
@tree.document.appendChild(element) @tree.document.appendChild(element)
@parser.phase = @parser.phases[:beforeHead] @parser.phase = @parser.phases[:beforeHead]
end end

View file

@ -3,34 +3,33 @@ require 'html5/html5parser/phase'
module HTML5 module HTML5
class TrailingEndPhase < Phase class TrailingEndPhase < Phase
def processEOF def process_eof
end end
def processComment(data) def processComment(data)
@tree.insertComment(data, @tree.document) @tree.insert_comment(data, @tree.document)
end end
def processSpaceCharacters(data) def processSpaceCharacters(data)
@parser.lastPhase.processSpaceCharacters(data) @parser.last_phase.processSpaceCharacters(data)
end end
def processCharacters(data) def processCharacters(data)
@parser.parseError(_('Unexpected non-space characters. Expected end of file.')) parse_error(_('Unexpected non-space characters. Expected end of file.'))
@parser.phase = @parser.lastPhase @parser.phase = @parser.last_phase
@parser.phase.processCharacters(data) @parser.phase.processCharacters(data)
end end
def processStartTag(name, attributes) def processStartTag(name, attributes)
@parser.parseError(_('Unexpected start tag (#{name}). Expected end of file.')) parse_error(_('Unexpected start tag (#{name}). Expected end of file.'))
@parser.phase = @parser.lastPhase @parser.phase = @parser.last_phase
@parser.phase.processStartTag(name, attributes) @parser.phase.processStartTag(name, attributes)
end end
def processEndTag(name) def processEndTag(name)
@parser.parseError(_('Unexpected end tag (#{name}). Expected end of file.')) parse_error(_('Unexpected end tag (#{name}). Expected end of file.'))
@parser.phase = @parser.lastPhase @parser.phase = @parser.last_phase
@parser.phase.processEndTag(name) @parser.phase.processEndTag(name)
end end
end end
end end

View file

@ -27,11 +27,11 @@ module HTML5
# parseMeta - Look for a <meta> element containing encoding information # parseMeta - Look for a <meta> element containing encoding information
def initialize(source, options = {}) def initialize(source, options = {})
@encoding = nil @encoding = nil
@parse_meta = true @parse_meta = true
@chardet = true @chardet = true
options.each { |name, value| instance_variable_set("@#{name}", value) } options.each {|name, value| instance_variable_set("@#{name}", value) }
# Raw Stream # Raw Stream
@raw_stream = open_stream(source) @raw_stream = open_stream(source)
@ -297,7 +297,7 @@ module HTML5
end end
when 0xC0 .. 0xFF when 0xC0 .. 0xFF
if @win1252 if instance_variables.include?("@win1252") && @win1252
"\xC3" + (c-64).chr # convert to utf-8 "\xC3" + (c-64).chr # convert to utf-8
elsif @buffer[@tell-1 .. @tell+3] =~ /^ elsif @buffer[@tell-1 .. @tell+3] =~ /^
( [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte ( [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte

View file

@ -24,7 +24,7 @@ module HTML5
@phases[:initial] = XmlRootPhase.new(self, @tree) @phases[:initial] = XmlRootPhase.new(self, @tree)
end end
def normalizeToken(token) def normalize_token(token)
case token[:type] case token[:type]
when :StartTag, :EmptyTag when :StartTag, :EmptyTag
# We need to remove the duplicate attributes and convert attributes # We need to remove the duplicate attributes and convert attributes
@ -34,23 +34,23 @@ module HTML5
# For EmptyTags, process both a Start and an End tag # For EmptyTags, process both a Start and an End tag
if token[:type] == :EmptyTag if token[:type] == :EmptyTag
save = @tokenizer.contentModelFlag save = @tokenizer.content_model_flag
@phase.processStartTag(token[:name], token[:data]) @phase.processStartTag(token[:name], token[:data])
@tokenizer.contentModelFlag = save @tokenizer.content_model_flag = save
token[:data] = {} token[:data] = {}
token[:type] = :EndTag token[:type] = :EndTag
end end
when :Characters when :Characters
# un-escape RCDATA_ELEMENTS (e.g. style, script) # un-escape RCDATA_ELEMENTS (e.g. style, script)
if @tokenizer.contentModelFlag == :CDATA if @tokenizer.content_model_flag == :CDATA
token[:data] = token[:data]. token[:data] = token[:data].
gsub('&lt;','<').gsub('&gt;','>').gsub('&amp;','&') gsub('&lt;','<').gsub('&gt;','>').gsub('&amp;','&')
end end
when :EndTag when :EndTag
if token[:data] if token[:data]
parseError(_("End tag contains unexpected attributes.")) parse_error(_("End tag contains unexpected attributes."))
end end
when :Comment when :Comment
@ -74,22 +74,22 @@ module HTML5
@phases[:rootElement] = XhmlRootPhase.new(self, @tree) @phases[:rootElement] = XhmlRootPhase.new(self, @tree)
end end
def normalizeToken(token) def normalize_token(token)
super(token) super(token)
# ensure that non-void XHTML elements have content so that separate # ensure that non-void XHTML elements have content so that separate
# open and close tags are emitted # open and close tags are emitted
if token[:type] == :EndTag if token[:type] == :EndTag
if VOID_ELEMENTS.include? token[:name] if VOID_ELEMENTS.include? token[:name]
if @tree.openElements[-1].name != token["name"]: if @tree.open_elements[-1].name != token["name"]:
token[:type] = :EmptyTag token[:type] = :EmptyTag
token["data"] ||= {} token["data"] ||= {}
end end
else else
if token[:name] == @tree.openElements[-1].name and \ if token[:name] == @tree.open_elements[-1].name and \
not @tree.openElements[-1].hasContent not @tree.open_elements[-1].hasContent
@tree.insertText('') unless @tree.insertText('') unless
@tree.openElements.any? {|e| @tree.open_elements.any? {|e|
e.attributes.keys.include? 'xmlns' and e.attributes.keys.include? 'xmlns' and
e.attributes['xmlns'] != 'http://www.w3.org/1999/xhtml' e.attributes['xmlns'] != 'http://www.w3.org/1999/xhtml'
} }
@ -102,9 +102,9 @@ module HTML5
end end
class XhmlRootPhase < RootElementPhase class XhmlRootPhase < RootElementPhase
def insertHtmlElement def insert_html_element
element = @tree.createElement("html", {'xmlns' => 'http://www.w3.org/1999/xhtml'}) element = @tree.createElement("html", {'xmlns' => 'http://www.w3.org/1999/xhtml'})
@tree.openElements.push(element) @tree.open_elements.push(element)
@tree.document.appendChild(element) @tree.document.appendChild(element)
@parser.phase = @parser.phases[:beforeHead] @parser.phase = @parser.phases[:beforeHead]
end end
@ -115,15 +115,15 @@ module HTML5
@start_tag_handlers = Hash.new(:startTagOther) @start_tag_handlers = Hash.new(:startTagOther)
@end_tag_handlers = Hash.new(:endTagOther) @end_tag_handlers = Hash.new(:endTagOther)
def startTagOther(name, attributes) def startTagOther(name, attributes)
@tree.openElements.push(@tree.document) @tree.open_elements.push(@tree.document)
element = @tree.createElement(name, attributes) element = @tree.createElement(name, attributes)
@tree.openElements[-1].appendChild(element) @tree.open_elements[-1].appendChild(element)
@tree.openElements.push(element) @tree.open_elements.push(element)
@parser.phase = XmlElementPhase.new(@parser,@tree) @parser.phase = XmlElementPhase.new(@parser,@tree)
end end
def endTagOther(name) def endTagOther(name)
super super
@tree.openElements.pop @tree.open_elements.pop
end end
end end
@ -135,17 +135,17 @@ module HTML5
def startTagOther(name, attributes) def startTagOther(name, attributes)
element = @tree.createElement(name, attributes) element = @tree.createElement(name, attributes)
@tree.openElements[-1].appendChild(element) @tree.open_elements[-1].appendChild(element)
@tree.openElements.push(element) @tree.open_elements.push(element)
end end
def endTagOther(name) def endTagOther(name)
for node in @tree.openElements.reverse for node in @tree.open_elements.reverse
if node.name == name if node.name == name
{} while @tree.openElements.pop != node {} while @tree.open_elements.pop != node
break break
else else
@parser.parseError parse_error
end end
end end
end end

View file

@ -13,11 +13,11 @@ module HTML5
# or, if you already have a parse tree (in this example, a REXML tree), # or, if you already have a parse tree (in this example, a REXML tree),
# at the Serializer stage: # at the Serializer stage:
# #
# tokens = TreeWalkers.getTreeWalker('rexml').new(tree) # tokens = TreeWalkers.get_tree_walker('rexml').new(tree)
# HTMLSerializer.serialize(tokens, {:encoding=>'utf-8', # HTMLSerializer.serialize(tokens, {:encoding=>'utf-8',
# :sanitize => true}) # :sanitize => true})
module HTMLSanitizeModule module HTMLSanitizeModule
ACCEPTABLE_ELEMENTS = %w[a abbr acronym address area b big blockquote br ACCEPTABLE_ELEMENTS = %w[a abbr acronym address area b big blockquote br
button caption center cite code col colgroup dd del dfn dir div dl dt button caption center cite code col colgroup dd del dfn dir div dl dt

View file

@ -13,18 +13,18 @@ module HTML5
end end
def initialize(options={}) def initialize(options={})
@quote_attr_values = false @quote_attr_values = false
@quote_char = '"' @quote_char = '"'
@use_best_quote_char = true @use_best_quote_char = true
@minimize_boolean_attributes = true @minimize_boolean_attributes = true
@use_trailing_solidus = false @use_trailing_solidus = false
@space_before_trailing_solidus = true @space_before_trailing_solidus = true
@escape_lt_in_attrs = false @escape_lt_in_attrs = false
@escape_rcdata = false @escape_rcdata = false
@omit_optional_tags = true @omit_optional_tags = true
@sanitize = false @sanitize = false
@strip_whitespace = false @strip_whitespace = false
@ -73,7 +73,7 @@ module HTML5
elsif [:Characters, :SpaceCharacters].include? type elsif [:Characters, :SpaceCharacters].include? type
if type == :SpaceCharacters or in_cdata if type == :SpaceCharacters or in_cdata
if in_cdata and token[:data].include?("</") if in_cdata and token[:data].include?("</")
serializeError(_("Unexpected </ in CDATA")) serialize_error(_("Unexpected </ in CDATA"))
end end
result << token[:data] result << token[:data]
else else
@ -85,7 +85,7 @@ module HTML5
if RCDATA_ELEMENTS.include?(name) and not @escape_rcdata if RCDATA_ELEMENTS.include?(name) and not @escape_rcdata
in_cdata = true in_cdata = true
elsif in_cdata elsif in_cdata
serializeError(_("Unexpected child element of a CDATA element")) serialize_error(_("Unexpected child element of a CDATA element"))
end end
attributes = [] attributes = []
for k,v in attrs = token[:data].to_a.sort for k,v in attrs = token[:data].to_a.sort
@ -137,19 +137,19 @@ module HTML5
if RCDATA_ELEMENTS.include?(name) if RCDATA_ELEMENTS.include?(name)
in_cdata = false in_cdata = false
elsif in_cdata elsif in_cdata
serializeError(_("Unexpected child element of a CDATA element")) serialize_error(_("Unexpected child element of a CDATA element"))
end end
end_tag = "</#{name}>" end_tag = "</#{name}>"
result << end_tag result << end_tag
elsif type == :Comment elsif type == :Comment
data = token[:data] data = token[:data]
serializeError(_("Comment contains --")) if data.index("--") serialize_error(_("Comment contains --")) if data.index("--")
comment = "<!--%s-->" % token[:data] comment = "<!--%s-->" % token[:data]
result << comment result << comment
else else
serializeError(token[:data]) serialize_error(token[:data])
end end
end end
@ -163,13 +163,15 @@ module HTML5
alias :render :serialize alias :render :serialize
def serializeError(data="XXX ERROR MESSAGE NEEDED") def serialize_error(data="XXX ERROR MESSAGE NEEDED")
# XXX The idea is to make data mandatory. # XXX The idea is to make data mandatory.
@errors.push(data) @errors.push(data)
if @strict if @strict
raise SerializeError raise SerializeError
end end
end end
def _(string); string; end
end end
# Error in serialized tree # Error in serialized tree

View file

@ -4,12 +4,12 @@ module HTML5
class XHTMLSerializer < HTMLSerializer class XHTMLSerializer < HTMLSerializer
DEFAULTS = { DEFAULTS = {
:quote_attr_values => true, :quote_attr_values => true,
:minimize_boolean_attributes => false, :minimize_boolean_attributes => false,
:use_trailing_solidus => true, :use_trailing_solidus => true,
:escape_lt_in_attrs => true, :escape_lt_in_attrs => true,
:omit_optional_tags => false, :omit_optional_tags => false,
:escape_rcdata => true :escape_rcdata => true
} }
def initialize(options={}) def initialize(options={})

View file

@ -0,0 +1,45 @@
module HTML5
module Sniffer
# 4.7.4
def html_or_feed str
s = str[0, 512] # steps 1, 2
pos = 0
while pos < s.length
case s[pos]
when 0x09, 0x20, 0x0A, 0x0D # tab, space, LF, CR
pos += 1
when 0x3C # "<"
pos += 1
if s[pos..pos+2] == "!--" # [0x21, 0x2D, 0x2D]
pos += 3
until s[pos..pos+2] == "-->" or pos >= s.length
pos += 1
end
pos += 3
elsif s[pos] == 0x21 # "!"
pos += 1
until s[pos] == 0x3E or pos >= s.length # ">"
pos += 1
end
pos += 1
elsif s[pos] == 0x3F # "?"
until s[pos..pos+1] == "?>" or pos >= s.length # [0x3F, 0x3E]
pos += 1
end
pos += 2
elsif s[pos..pos+2] == "rss" # [0x72, 0x73, 0x73]
return "application/rss+xml"
elsif s[pos..pos+3] == "feed" # [0x66, 0x65, 0x65, 0x64]
return "application/atom+xml"
elsif s[pos..pos+6] == "rdf:RDF" # [0x72, 0x64, 0x66, 0x3A, 0x52, 0x44, 0x46]
raise NotImplementedError
end
else
break
end
end
"text/html"
end
end
end

File diff suppressed because it is too large Load diff

View file

@ -18,7 +18,7 @@ module HTML5
end end
end end
alias :getTreeBuilder :[] alias :get_tree_builder :[]
end end
end end
end end

View file

@ -24,9 +24,9 @@ module HTML5
attr_accessor :_flags attr_accessor :_flags
def initialize(name) def initialize(name)
@parent = nil @parent = nil
@childNodes = [] @childNodes = []
@_flags = [] @_flags = []
end end
# Insert node as a child of the current node # Insert node as a child of the current node
@ -76,13 +76,13 @@ module HTML5
# Base treebuilder implementation # Base treebuilder implementation
class TreeBuilder class TreeBuilder
attr_accessor :openElements attr_accessor :open_elements
attr_accessor :activeFormattingElements attr_accessor :activeFormattingElements
attr_accessor :document attr_accessor :document
attr_accessor :headPointer attr_accessor :head_pointer
attr_accessor :formPointer attr_accessor :formPointer
@ -106,25 +106,25 @@ module HTML5
end end
def reset def reset
@openElements = [] @open_elements = []
@activeFormattingElements = [] @activeFormattingElements = []
#XXX - rename these to headElement, formElement #XXX - rename these to headElement, formElement
@headPointer = nil @head_pointer = nil
@formPointer = nil @formPointer = nil
self.insertFromTable = false self.insert_from_table = false
@document = @documentClass.new @document = @documentClass.new
end end
def elementInScope(target, tableVariant=false) def elementInScope(target, tableVariant=false)
# Exit early when possible. # Exit early when possible.
return true if @openElements[-1].name == target return true if @open_elements[-1].name == target
# AT How about while true and simply set node to [-1] and set it to # AT How about while true and simply set node to [-1] and set it to
# [-2] at the end... # [-2] at the end...
@openElements.reverse.each do |element| @open_elements.reverse.each do |element|
if element.name == target if element.name == target
return true return true
elsif element.name == 'table' elsif element.name == 'table'
@ -149,10 +149,10 @@ module HTML5
# Step 2 and step 3: we start with the last element. So i is -1. # Step 2 and step 3: we start with the last element. So i is -1.
i = -1 i = -1
entry = @activeFormattingElements[i] entry = @activeFormattingElements[i]
return if entry == Marker or @openElements.include?(entry) return if entry == Marker or @open_elements.include?(entry)
# Step 6 # Step 6
until entry == Marker or @openElements.include?(entry) until entry == Marker or @open_elements.include?(entry)
# Step 5: let entry be one earlier in the list. # Step 5: let entry be one earlier in the list.
i -= 1 i -= 1
begin begin
@ -171,7 +171,7 @@ module HTML5
clone = @activeFormattingElements[i].cloneNode clone = @activeFormattingElements[i].cloneNode
# Step 9 # Step 9
element = insertElement(clone.name, clone.attributes) element = insert_element(clone.name, clone.attributes)
# Step 10 # Step 10
@activeFormattingElements[i] = element @activeFormattingElements[i] = element
@ -198,12 +198,15 @@ module HTML5
return false return false
end end
def insertDoctype(name) def insertDoctype(name, public_id, system_id)
@document.appendChild(@doctypeClass.new(name)) doctype = @doctypeClass.new(name)
doctype.public_id = public_id
doctype.system_id = system_id
@document.appendChild(doctype)
end end
def insertComment(data, parent=nil) def insert_comment(data, parent=nil)
parent = @openElements[-1] if parent.nil? parent = @open_elements[-1] if parent.nil?
parent.appendChild(@commentClass.new(data)) parent.appendChild(@commentClass.new(data))
end end
@ -216,28 +219,28 @@ module HTML5
# Switch the function used to insert an element from the # Switch the function used to insert an element from the
# normal one to the misnested table one and back again # normal one to the misnested table one and back again
def insertFromTable=(value) def insert_from_table=(value)
@insertFromTable = value @insert_from_table = value
@insertElement = value ? :insertElementTable : :insertElementNormal @insert_element = value ? :insert_elementTable : :insert_elementNormal
end end
def insertElement(name, attributes) def insert_element(name, attributes)
send(@insertElement, name, attributes) send(@insert_element, name, attributes)
end end
def insertElementNormal(name, attributes) def insert_elementNormal(name, attributes)
element = @elementClass.new(name) element = @elementClass.new(name)
element.attributes = attributes element.attributes = attributes
@openElements[-1].appendChild(element) @open_elements.last.appendChild(element)
@openElements.push(element) @open_elements.push(element)
return element return element
end end
# Create an element and insert it into the tree # Create an element and insert it into the tree
def insertElementTable(name, attributes) def insert_elementTable(name, attributes)
element = @elementClass.new(name) element = @elementClass.new(name)
element.attributes = attributes element.attributes = attributes
if TABLE_INSERT_MODE_ELEMENTS.include?(@openElements[-1].name) if TABLE_INSERT_MODE_ELEMENTS.include?(@open_elements.last.name)
#We should be in the InTable mode. This means we want to do #We should be in the InTable mode. This means we want to do
#special magic element rearranging #special magic element rearranging
parent, insertBefore = getTableMisnestedNodePosition parent, insertBefore = getTableMisnestedNodePosition
@ -246,17 +249,17 @@ module HTML5
else else
parent.insertBefore(element, insertBefore) parent.insertBefore(element, insertBefore)
end end
@openElements.push(element) @open_elements.push(element)
else else
return insertElementNormal(name, attributes) return insert_elementNormal(name, attributes)
end end
return element return element
end end
def insertText(data, parent=nil) def insertText(data, parent=nil)
parent = @openElements[-1] if parent.nil? parent = @open_elements[-1] if parent.nil?
if (not(@insertFromTable) or (@insertFromTable and not TABLE_INSERT_MODE_ELEMENTS.include?(@openElements[-1].name))) if (not(@insert_from_table) or (@insert_from_table and not TABLE_INSERT_MODE_ELEMENTS.include?(@open_elements[-1].name)))
parent.insertText(data) parent.insertText(data)
else else
#We should be in the InTable mode. This means we want to do #We should be in the InTable mode. This means we want to do
@ -265,7 +268,7 @@ module HTML5
parent.insertText(data, insertBefore) parent.insertText(data, insertBefore)
end end
end end
# Get the foster parent element, and sibling to insert before # Get the foster parent element, and sibling to insert before
# (or nil) when inserting a misnested table node # (or nil) when inserting a misnested table node
def getTableMisnestedNodePosition def getTableMisnestedNodePosition
@ -275,7 +278,7 @@ module HTML5
lastTable = nil lastTable = nil
fosterParent = nil fosterParent = nil
insertBefore = nil insertBefore = nil
@openElements.reverse.each do |element| @open_elements.reverse.each do |element|
if element.name == "table" if element.name == "table"
lastTable = element lastTable = element
break break
@ -288,33 +291,34 @@ module HTML5
fosterParent = lastTable.parent fosterParent = lastTable.parent
insertBefore = lastTable insertBefore = lastTable
else else
fosterParent = @openElements[@openElements.index(lastTable) - 1] fosterParent = @open_elements[@open_elements.index(lastTable) - 1]
end end
else else
fosterParent = @openElements[0] fosterParent = @open_elements[0]
end end
return fosterParent, insertBefore return fosterParent, insertBefore
end end
def generateImpliedEndTags(exclude=nil) def generateImpliedEndTags(exclude=nil)
name = @openElements[-1].name name = @open_elements[-1].name
if (['dd', 'dt', 'li', 'p', 'td', 'th', 'tr'].include?(name) and name != exclude) # XXX td, th and tr are not actually needed
@openElements.pop if (%w[dd dt li p td th tr].include?(name) and name != exclude)
@open_elements.pop
# XXX This is not entirely what the specification says. We should # XXX This is not entirely what the specification says. We should
# investigate it more closely. # investigate it more closely.
generateImpliedEndTags(exclude) generateImpliedEndTags(exclude)
end end
end end
def getDocument def get_document
@document @document
end end
def getFragment def get_fragment
#assert @innerHTML #assert @inner_html
fragment = @fragmentClass.new fragment = @fragmentClass.new
@openElements[0].reparentChildren(fragment) @open_elements[0].reparentChildren(fragment)
return fragment return fragment
end end

View file

@ -8,7 +8,6 @@ module HTML5
module Hpricot module Hpricot
class Node < Base::Node class Node < Base::Node
extend Forwardable extend Forwardable
def_delegators :@hpricot, :name def_delegators :@hpricot, :name
@ -22,7 +21,7 @@ module HTML5
def appendChild(node) def appendChild(node)
if node.kind_of?(TextNode) and childNodes.any? and childNodes.last.kind_of?(TextNode) if node.kind_of?(TextNode) and childNodes.any? and childNodes.last.kind_of?(TextNode)
childNodes[-1].hpricot.content = childNodes[-1].hpricot.to_s + node.hpricot.to_s childNodes.last.hpricot.content = childNodes.last.hpricot.content + node.hpricot.content
else else
childNodes << node childNodes << node
hpricot.children << node.hpricot hpricot.children << node.hpricot
@ -145,21 +144,27 @@ module HTML5
end end
class DocumentType < Node class DocumentType < Node
def_delegators :@hpricot, :public_id, :system_id
def self.hpricot_class def self.hpricot_class
::Hpricot::DocType ::Hpricot::DocType
end end
def initialize(name) def initialize(name, public_id, system_id)
begin begin
super(name) super(name)
rescue ArgumentError # needs 3... rescue ArgumentError # needs 3...
end end
@hpricot = ::Hpricot::DocType.new(name, nil, nil) @hpricot = ::Hpricot::DocType.new(name, public_id, system_id)
end end
def printTree(indent=0) def printTree(indent=0)
"\n|#{' ' * indent}<!DOCTYPE #{hpricot.target}>" if hpricot.target and hpricot.target.any?
"\n|#{' ' * indent}<!DOCTYPE #{hpricot.target}>"
else
"\n|#{' ' * indent}<!DOCTYPE >"
end
end end
end end
@ -169,7 +174,7 @@ module HTML5
end end
def printTree(indent=0) def printTree(indent=0)
childNodes.inject('') { |tree, child| tree + child.printTree(indent+2) } childNodes.inject('') {|tree, child| tree + child.printTree(indent + 2) }
end end
end end
@ -196,21 +201,26 @@ module HTML5
class TreeBuilder < Base::TreeBuilder class TreeBuilder < Base::TreeBuilder
def initialize def initialize
@documentClass = Document @documentClass = Document
@doctypeClass = DocumentType @doctypeClass = DocumentType
@elementClass = Element @elementClass = Element
@commentClass = CommentNode @commentClass = CommentNode
@fragmentClass = DocumentFragment @fragmentClass = DocumentFragment
end end
def insertDoctype(name, public_id, system_id)
doctype = @doctypeClass.new(name, public_id, system_id)
@document.appendChild(doctype)
end
def testSerializer(node) def testSerializer(node)
node.printTree node.printTree
end end
def getDocument def get_document
@document.hpricot @document.hpricot
end end
def getFragment def get_fragment
@document = super @document = super
return @document.hpricot.children return @document.hpricot.children
end end

View file

@ -17,11 +17,9 @@ module HTML5
end end
def appendChild node def appendChild node
if node.kind_of? TextNode and if node.kind_of?(TextNode) && childNodes.length > 0 && childNodes.last.kind_of?(TextNode)
childNodes.length>0 and childNodes[-1].kind_of? TextNode childNodes.last.rxobj.value = childNodes.last.rxobj.to_s + node.rxobj.to_s
childNodes[-1].rxobj.value = childNodes.last.rxobj.raw = true
childNodes[-1].rxobj.to_s + node.rxobj.to_s
childNodes[-1].rxobj.raw = true
else else
childNodes.push node childNodes.push node
rxobj.add node.rxobj rxobj.add node.rxobj
@ -45,10 +43,8 @@ module HTML5
def insertBefore node, refNode def insertBefore node, refNode
index = childNodes.index(refNode) index = childNodes.index(refNode)
if node.kind_of? TextNode and index>0 and if node.kind_of?(TextNode) and index > 0 && childNodes[index-1].kind_of?(TextNode)
childNodes[index-1].kind_of? TextNode childNodes[index-1].rxobj.value = childNodes[index-1].rxobj.to_s + node.rxobj.to_s
childNodes[index-1].rxobj.value =
childNodes[index-1].rxobj.to_s + node.rxobj.to_s
childNodes[index-1].rxobj.raw = true childNodes[index-1].rxobj.raw = true
else else
childNodes.insert index, node childNodes.insert index, node
@ -57,7 +53,7 @@ module HTML5
end end
def hasContent def hasContent
return (childNodes.length > 0) (childNodes.length > 0)
end end
end end
@ -77,7 +73,7 @@ module HTML5
end end
def attributes= value def attributes= value
value.each {|name, value| rxobj.attributes[name]=value} value.each {|name, value| rxobj.attributes[name] = value}
end end
def printTree indent=0 def printTree indent=0
@ -90,7 +86,7 @@ module HTML5
for child in childNodes for child in childNodes
tree += child.printTree(indent) tree += child.printTree(indent)
end end
return tree tree
end end
end end
@ -120,10 +116,25 @@ module HTML5
end end
class DocumentType < Node class DocumentType < Node
def_delegator :@rxobj, :public, :public_id
def_delegator :@rxobj, :system, :system_id
def self.rxclass def self.rxclass
::REXML::DocType ::REXML::DocType
end end
def initialize name, public_id, system_id
super(name)
if public_id
@rxobj = ::REXML::DocType.new [name, ::REXML::DocType::PUBLIC, public_id, system_id]
elsif system_id
@rxobj = ::REXML::DocType.new [name, ::REXML::DocType::SYSTEM, nil, system_id]
else
@rxobj = ::REXML::DocType.new name
end
end
def printTree indent=0 def printTree indent=0
"\n|#{' ' * indent}<!DOCTYPE #{name}>" "\n|#{' ' * indent}<!DOCTYPE #{name}>"
end end
@ -145,7 +156,7 @@ module HTML5
class TextNode < Node class TextNode < Node
def initialize data def initialize data
raw=data.gsub('&','&amp;').gsub('<','&lt;').gsub('>','&gt;') raw = data.gsub('&', '&amp;').gsub('<', '&lt;').gsub('>', '&gt;')
@rxobj = ::REXML::Text.new(raw, true, nil, true) @rxobj = ::REXML::Text.new(raw, true, nil, true)
end end
@ -167,21 +178,26 @@ module HTML5
class TreeBuilder < Base::TreeBuilder class TreeBuilder < Base::TreeBuilder
def initialize def initialize
@documentClass = Document @documentClass = Document
@doctypeClass = DocumentType @doctypeClass = DocumentType
@elementClass = Element @elementClass = Element
@commentClass = CommentNode @commentClass = CommentNode
@fragmentClass = DocumentFragment @fragmentClass = DocumentFragment
end end
def testSerializer node def insertDoctype(name, public_id, system_id)
node.printTree() doctype = @doctypeClass.new(name, public_id, system_id)
@document.appendChild(doctype)
end end
def getDocument def testSerializer node
node.printTree
end
def get_document
@document.rxobj @document.rxobj
end end
def getFragment def get_fragment
@document = super @document = super
return @document.rxobj.children return @document.rxobj.children
end end

View file

@ -18,17 +18,17 @@ module HTML5
def initialize name def initialize name
super super
@name = name @name = name
@value = nil @value = nil
@attributes = {} @attributes = {}
end end
def appendChild node def appendChild node
if node.kind_of? TextNode and if node.kind_of? TextNode and
childNodes.length>0 and childNodes[-1].kind_of? TextNode childNodes.length > 0 and childNodes.last.kind_of? TextNode
childNodes[-1].value += node.value childNodes.last.value += node.value
else else
childNodes.push node childNodes << node
end end
node.parent = self node.parent = self
end end
@ -55,8 +55,7 @@ module HTML5
def insertBefore node, refNode def insertBefore node, refNode
index = childNodes.index(refNode) index = childNodes.index(refNode)
if node.kind_of? TextNode and index>0 and if node.kind_of?(TextNode) && index > 0 && childNodes[index-1].kind_of?(TextNode)
childNodes[index-1].kind_of? TextNode
childNodes[index-1].value += node.value childNodes[index-1].value += node.value
else else
childNodes.insert index, node childNodes.insert index, node
@ -72,7 +71,7 @@ module HTML5
end end
def hasContent def hasContent
return (childNodes.length > 0) childNodes.length > 0
end end
end end
@ -90,7 +89,7 @@ module HTML5
for child in childNodes for child in childNodes
tree += child.printTree(indent) tree += child.printTree(indent)
end end
return tree tree
end end
end end
@ -108,13 +107,21 @@ module HTML5
for child in childNodes for child in childNodes
tree += child.printTree(indent + 2) tree += child.printTree(indent + 2)
end end
return tree tree
end end
end end
class DocumentType < Node class DocumentType < Node
attr_accessor :public_id, :system_id
def to_s def to_s
"<!DOCTYPE %s>" % name "<!DOCTYPE #{name}>"
end
def initialize name
super name
@public_id = nil
@system_id = nil
end end
end end
@ -157,19 +164,19 @@ module HTML5
class TreeBuilder < Base::TreeBuilder class TreeBuilder < Base::TreeBuilder
def initialize def initialize
@documentClass = Document @documentClass = Document
@doctypeClass = DocumentType @doctypeClass = DocumentType
@elementClass = Element @elementClass = Element
@commentClass = CommentNode @commentClass = CommentNode
@fragmentClass = DocumentFragment @fragmentClass = DocumentFragment
end end
def testSerializer node def testSerializer node
node.printTree() node.printTree
end end
def getFragment def get_fragment
@document = super @document = super
return @document.childNodes @document.childNodes
end end
end end

View file

@ -6,13 +6,13 @@ module HTML5
class << self class << self
def [](name) def [](name)
case name.to_s.downcase case name.to_s.downcase
when 'simpletree' then when 'simpletree'
require 'html5/treewalkers/simpletree' require 'html5/treewalkers/simpletree'
SimpleTree::TreeWalker SimpleTree::TreeWalker
when 'rexml' then when 'rexml'
require 'html5/treewalkers/rexml' require 'html5/treewalkers/rexml'
REXML::TreeWalker REXML::TreeWalker
when 'hpricot' then when 'hpricot'
require 'html5/treewalkers/hpricot' require 'html5/treewalkers/hpricot'
Hpricot::TreeWalker Hpricot::TreeWalker
else else
@ -20,7 +20,7 @@ module HTML5
end end
end end
alias :getTreeWalker :[] alias :get_tree_walker :[]
end end
end end
end end

View file

@ -3,153 +3,151 @@ module HTML5
module TreeWalkers module TreeWalkers
module TokenConstructor module TokenConstructor
def error(msg) def error(msg)
return {:type => "SerializeError", :data => msg} {:type => "SerializeError", :data => msg}
end
def normalize_attrs(attrs)
attrs.to_a
end
def empty_tag(name, attrs, has_children=false)
error(_("Void element has children")) if has_children
{:type => :EmptyTag, :name => name, :data => normalize_attrs(attrs)}
end
def start_tag(name, attrs)
{:type => :StartTag, :name => name, :data => normalize_attrs(attrs)}
end
def end_tag(name)
{:type => :EndTag, :name => name, :data => []}
end
def text(data)
if data =~ /\A([#{SPACE_CHARACTERS.join('')}]+)/m
yield({:type => :SpaceCharacters, :data => $1})
data = data[$1.length .. -1]
return if data.empty?
end end
def normalizeAttrs(attrs) if data =~ /([#{SPACE_CHARACTERS.join('')}]+)\Z/m
attrs.to_a yield({:type => :Characters, :data => data[0 ... -$1.length]})
yield({:type => :SpaceCharacters, :data => $1})
else
yield({:type => :Characters, :data => data})
end end
end
def emptyTag(name, attrs, hasChildren=false) def comment(data)
error(_("Void element has children")) if hasChildren {:type => :Comment, :data => data}
return({:type => :EmptyTag, :name => name, \ end
:data => normalizeAttrs(attrs)})
end
def startTag(name, attrs) def doctype(name, public_id, system_id, correct=nil)
return {:type => :StartTag, :name => name, \ {:type => :Doctype, :name => name, :public_id => public_id, :system_id => system_id, :correct => correct}
:data => normalizeAttrs(attrs)} end
end
def endTag(name) def unknown(nodeType)
return {:type => :EndTag, :name => name, :data => []} error(_("Unknown node type: ") + nodeType.to_s)
end end
def text(data) def _(str)
if data =~ /\A([#{SPACE_CHARACTERS.join('')}]+)/m str
yield({:type => :SpaceCharacters, :data => $1}) end
data = data[$1.length .. -1]
return if data.empty?
end
if data =~ /([#{SPACE_CHARACTERS.join('')}]+)\Z/m
yield({:type => :Characters, :data => data[0 ... -$1.length]})
yield({:type => :SpaceCharacters, :data => $1})
else
yield({:type => :Characters, :data => data})
end
end
def comment(data)
return {:type => :Comment, :data => data}
end
def doctype(name)
return {:type => :Doctype, :name => name, :data => name.upcase() == "HTML"}
end
def unknown(nodeType)
return error(_("Unknown node type: ") + nodeType.to_s)
end
def _(str)
str
end
end end
class Base class Base
include TokenConstructor include TokenConstructor
def initialize(tree) def initialize(tree)
@tree = tree @tree = tree
end end
def each def each
raise NotImplementedError raise NotImplementedError
end end
alias walk each alias walk each
end end
class NonRecursiveTreeWalker < TreeWalkers::Base class NonRecursiveTreeWalker < TreeWalkers::Base
def node_details(node) def node_details(node)
raise NotImplementedError raise NotImplementedError
end end
def first_child(node) def first_child(node)
raise NotImplementedError raise NotImplementedError
end end
def next_sibling(node) def next_sibling(node)
raise NotImplementedError raise NotImplementedError
end end
def parent(node) def parent(node)
raise NotImplementedError raise NotImplementedError
end end
def each def each
currentNode = @tree current_node = @tree
while currentNode != nil while current_node != nil
details = node_details(currentNode) details = node_details(current_node)
hasChildren = false has_children = false
case details.shift case details.shift
when :DOCTYPE when :DOCTYPE
yield doctype(*details) yield doctype(*details)
when :TEXT when :TEXT
text(*details) {|token| yield token} text(*details) {|token| yield token}
when :ELEMENT when :ELEMENT
name, attributes, hasChildren = details name, attributes, has_children = details
if VOID_ELEMENTS.include?(name) if VOID_ELEMENTS.include?(name)
yield emptyTag(name, attributes.to_a, hasChildren) yield empty_tag(name, attributes.to_a, has_children)
hasChildren = false has_children = false
else else
yield startTag(name, attributes.to_a) yield start_tag(name, attributes.to_a)
end
when :COMMENT
yield comment(details[0])
when :DOCUMENT, :DOCUMENT_FRAGMENT
hasChildren = true
when nil
# ignore (REXML::XMLDecl is an example)
else
yield unknown(details[0])
end
firstChild = hasChildren ? first_child(currentNode) : nil
if firstChild != nil
currentNode = firstChild
else
while currentNode != nil
details = node_details(currentNode)
if details.shift == :ELEMENT
name, attributes, hasChildren = details
yield endTag(name) if !VOID_ELEMENTS.include?(name)
end
if @tree == currentNode
currentNode = nil
else
nextSibling = next_sibling(currentNode)
if nextSibling != nil
currentNode = nextSibling
break
end
currentNode = parent(currentNode)
end
end
end
end end
when :COMMENT
yield comment(details[0])
when :DOCUMENT, :DOCUMENT_FRAGMENT
has_children = true
when nil
# ignore (REXML::XMLDecl is an example)
else
yield unknown(details[0])
end
first_child = has_children ? first_child(current_node) : nil
if first_child != nil
current_node = first_child
else
while current_node != nil
details = node_details(current_node)
if details.shift == :ELEMENT
name, attributes, has_children = details
yield end_tag(name) if !VOID_ELEMENTS.include?(name)
end
if @tree == current_node
current_node = nil
else
next_sibling = next_sibling(current_node)
if next_sibling != nil
current_node = next_sibling
break
end
current_node = parent(current_node)
end
end
end
end end
end
end end
end end

View file

@ -13,17 +13,17 @@ module HTML5
[:DOCUMENT_FRAGMENT] [:DOCUMENT_FRAGMENT]
else else
[:ELEMENT, node.name, [:ELEMENT, node.name,
node.attributes.map {|name,value| [name,value]}, node.attributes.map {|name, value| [name, value]},
!node.empty?] !node.empty?]
end end
when ::Hpricot::Text when ::Hpricot::Text
[:TEXT, node.to_plain_text] [:TEXT, node.content]
when ::Hpricot::Comment when ::Hpricot::Comment
[:COMMENT, node.content] [:COMMENT, node.content]
when ::Hpricot::Doc when ::Hpricot::Doc
[:DOCUMENT] [:DOCUMENT]
when ::Hpricot::DocType when ::Hpricot::DocType
[:DOCTYPE, node.target] [:DOCTYPE, node.target, node.public_id, node.system_id]
when ::Hpricot::XMLDecl when ::Hpricot::XMLDecl
[nil] [nil]
else else

View file

@ -23,7 +23,7 @@ module HTML5
when ::REXML::Comment when ::REXML::Comment
[:COMMENT, node.string] [:COMMENT, node.string]
when ::REXML::DocType when ::REXML::DocType
[:DOCTYPE, node.name] [:DOCTYPE, node.name, node.public, node.system]
when ::REXML::XMLDecl when ::REXML::XMLDecl
[nil] [nil]
else else

View file

@ -12,20 +12,20 @@ module HTML5
return return
when DocumentType when DocumentType
yield doctype(node.name) yield doctype(node.name, node.public_id, node.system_id)
when TextNode when TextNode
text(node.value) {|token| yield token} text(node.value) {|token| yield token}
when Element when Element
if VOID_ELEMENTS.include?(node.name) if VOID_ELEMENTS.include?(node.name)
yield emptyTag(node.name, node.attributes, node.hasContent()) yield empty_tag(node.name, node.attributes, node.hasContent())
else else
yield startTag(node.name, node.attributes) yield start_tag(node.name, node.attributes)
for child in node.childNodes for child in node.childNodes
walk(child) {|token| yield token} walk(child) {|token| yield token}
end end
yield endTag(node.name) yield end_tag(node.name)
end end
when CommentNode when CommentNode

View file

@ -0,0 +1,3 @@
module HTML5
VERSION = '0.1.0'
end

View file

@ -3,13 +3,13 @@
{"description": "bare text with leading spaces", {"description": "bare text with leading spaces",
"options": {"strip_whitespace": true}, "options": {"strip_whitespace": true},
"input": [["Characters", "\t\r\n\u000B\u000C foo"]], "input": [["Characters", "\t\r\n\u000B\u000C foo"]],
"expected": ["foo"] "expected": [" foo"]
}, },
{"description": "bare text with trailing spaces", {"description": "bare text with trailing spaces",
"options": {"strip_whitespace": true}, "options": {"strip_whitespace": true},
"input": [["Characters", "foo \t\r\n\u000B\u000C"]], "input": [["Characters", "foo \t\r\n\u000B\u000C"]],
"expected": ["foo"] "expected": ["foo "]
}, },
{"description": "bare text with inner spaces", {"description": "bare text with inner spaces",

View file

@ -0,0 +1,43 @@
[
{"type": "text/html", "input": ""},
{"type": "text/html", "input": "<!---->"},
{"type": "text/html", "input": "<!--asdfaslkjdf;laksjdf as;dkfjsd-->"},
{"type": "text/html", "input": "<!"},
{"type": "text/html", "input": "\t"},
{"type": "text/html", "input": "<!>"},
{"type": "text/html", "input": "<?"},
{"type": "text/html", "input": "<??>"},
{"type": "application/rss+xml", "input": "<rss"},
{"type": "application/atom+xml", "input": "<feed"},
{"type": "text/html", "input": "<html"},
{"type": "text/html", "input": "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n<html><head>\n<title>302 Found</title>\n</head><body>\n<h1>Found</h1>\n<p>The document has moved <a href=\"http://feeds.feedburner.com/gofug\">here</a>.</p>\n</body></html>\n"},
{"type": "text/html", "input": "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">\r\n<HTML><HEAD>\r\n <link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/289619328/feed.css\" /><link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/431602649/feed.css\" />\r\n<link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/382549546/feed.css\" />\r\n<link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/314618017/feed.css\" /><META http-equiv=\"expires\" content="},
{"type": "text/html", "input": "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\r\n<html>\r\n<head>\r\n<title>Xiaxue - Chicken pie blogger.</title><meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\"><style type=\"text/css\">\r\n<style type=\"text/css\">\r\n<!--\r\nbody {\r\n background-color: #FFF2F2;\r\n}\r\n.style1 {font-family: Georgia, \"Times New Roman\", Times, serif}\r\n.style2 {\r\n color: #8a567c;\r\n font-size: 14px;\r\n font-family: Georgia, \"Times New Roman\", Times, serif;\r\n}\r"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<head> \r\n<title>Google Operating System</title>\r\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"Description\" content=\"Unofficial news and tips about Google. A blog that watches Google's latest developments and the attempts to move your operating system online.\" />\r\n<meta name=\"generator\" c"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<head>\r\n <title>Assimilated Press</title> <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"MSSmartTagsPreventParsing\" content=\"true\" />\r\n<meta name=\"generator\" content=\"Blogger\" />\r\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"Assimilated Press - Atom\" href=\"http://assimila"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<head>\r\n <title>PostSecret</title>\r\n<META name=\"keywords\" Content=\"secrets, postcard, secret, postcards, postsecret, postsecrets,online confessional, post secret, post secrets, artomatic, post a secret\"><META name=\"discription\" Content=\"See a Secret...Share a Secret\"> <meta http-equiv=\"Content-Type\" content=\"te"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns='http://www.w3.org/1999/xhtml' xmlns:b='http://www.google.com/2005/gml/b' xmlns:data='http://www.google.com/2005/gml/data' xmlns:expr='http://www.google.com/2005/gml/expr'>\n <head>\n \n <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'/>\n <meta content='true' name='MSSmartTagsPreventParsing'/>\n <meta content='blogger' name='generator'/>\n <link rel=\"alternate\" typ"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"ja\">\n<head profile=\"http://gmpg.org/xfn/11\"> \n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" /> \n<title> CMS Lever</title><link rel=\"stylesheet\" type=\"text/css\" media=\"screen\" href=\"http://s.wordpress.com/wp-content/themes/pub/twenty-eight/2813.css\"/>\n<link rel=\"alternate\" type=\"application/rss+xml\" title=\"RSS 2.0\" h"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"en\"><head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n<title> Park Avenue Peerage</title>\t<meta name=\"generator\" content=\"WordPress.com\" />\t<!-- feeds -->\n\t<link rel=\"alternate\" type=\"application/rss+xml\" title=\"RSS 2.0\" href=\"http://parkavenuepeerage.wordpress.com/feed/\" />\t<link rel=\"pingback\" href="},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"ja\"><head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n<title> \u884c\u96f2\u6d41\u6c34 -like a floating clouds and running water-</title>\t<meta name=\"generator\" content=\"WordPress.com\" />\t<!-- feeds -->\n\t<link rel=\"alternate\" type=\"application/rss+xml\" title=\"RSS 2.0\" href=\"http://shw4.wordpress.com/feed/\" />\t<li"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n<title>Go Fug Yourself</title><link rel=\"stylesheet\" href=\"http://gofugyourself.typepad.com/go_fug_yourself/styles.css\" type=\"text/css\" />\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"Atom\" "},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"en\"><head profile=\"http://gmpg.org/xfn/11\">\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" /><title> Ladies&#8230;</title><meta name=\"generator\" content=\"WordPress.com\" /> <!-- leave this for stats --><link rel=\"stylesheet\" href=\"http://s.wordpress.com/wp-content/themes/default/style.css?1\" type=\"tex"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\r\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\r\n<head>\r\n <title>The Sartorialist</title> <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"MSSmartTagsPreventParsing\" content=\"true\" />\r\n<meta name=\"generator\" content=\"Blogger\" />\r\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"The Sartorialist - Atom\" href=\"http://thesartorialist.blogspot"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-1\" />\n<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n<title>Creating Passionate Users</title><link rel=\"stylesheet\" href=\"http://headrush.typepad.com/creating_passionate_users/styles.css\" type=\"text/css\" />\n<link rel=\"alternate\" type"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n\t\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" id=\"sixapart-standard\">\n<head>\n\t<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n\t<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n\t\n\t\n <meta name=\"keywords\" content=\"marketing, blog, seth, ideas, respect, permission\" />\n <meta name=\"description\" content=\"Seth Godin's riffs on marketing, respect, and the "},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n\t\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" id=\"sixapart-standard\">\n<head>\n\t<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n\t<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n\t\n\t\n \n <meta name=\"description\" content=\" Western Civilization hangs in the balance. This blog is part of the solution,the cure. Get your heads out of the sand and Fight the G"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=pahrefhttpwwwfeedburnercomtarget_blankimgsrchttpwwwfeedburnercomfbimagespubpowered_by_fbgifaltPoweredbyFeedBurnerstyleborder0ap\" />\n<title> From Under the Rotunda</title>\n<link rel=\"stylesheet\" href=\"http://s.wordpress.com/wp-content/themes/pub/andreas04/style.css\" type=\"text/css\""},
{"type": "application/atom+xml", "input": "<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href=\"http://www.blogger.com/styles/atom.css\" type=\"text/css\"?><feed xmlns='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/'><id>tag:blogger.com,1999:blog-10861780</id><updated>2007-07-27T12:38:50.888-07:00</updated><title type='text'>Official Google Blog</title><link rel='alternate' type='text/html' href='http://googleblog.blogspot.com/'/><link rel='next' type='application/atom+xml' href='http://googleblog.blogs"},
{"type": "application/rss+xml", "input": "<?xml version='1.0' encoding='UTF-8'?><rss xmlns:atom='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/' version='2.0'><channel><atom:id>tag:blogger.com,1999:blog-10861780</atom:id><lastBuildDate>Fri, 27 Jul 2007 19:38:50 +0000</lastBuildDate><title>Official Google Blog</title><description/><link>http://googleblog.blogspot.com/</link><managingEditor>Eric Case</managingEditor><generator>Blogger</generator><openSearch:totalResults>729</openSearch:totalResults><openSearc"},
{"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"pahrefhttpwwwfeedburnercomtarget_blankimgsrchttpwwwfeedburnercomfbimagespubpowered_by_fbgifaltPoweredbyFeedBurnerstyleborder0ap\"?>\n<!-- generator=\"wordpress/MU\" -->\n<rss version=\"2.0\"\n\txmlns:content=\"http://purl.org/rss/1.0/modules/content/\"\n\txmlns:wfw=\"http://wellformedweb.org/CommentAPI/\"\n\txmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n\t><channel>\n\t<title>From Under the Rotunda</title>\n\t<link>http://dannybernardi.wordpress.com</link>\n\t<description>The Monographs of Danny Ber"},
{"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!-- generator=\"wordpress/MU\" -->\n<rss version=\"2.0\"\n\txmlns:content=\"http://purl.org/rss/1.0/modules/content/\"\n\txmlns:wfw=\"http://wellformedweb.org/CommentAPI/\"\n\txmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n\t><channel>\n\t<title>CMS Lever</title>\n\t<link>http://kanaguri.wordpress.com</link>\n\t<description>CMS\u306e\u6c17\u306b\u306a\u3063\u305f\u3053\u3068</description>\n\t<pubDate>Wed, 18 Jul 2007 21:26:22 +0000</pubDate>\n\t<generator>http://wordpress.org/?v=MU</generator>\n\t<language>ja</languag"},
{"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:thr=\"http://purl.org/syndication/thread/1.0\">\n <title>Atlas Shrugs</title>\n <link rel=\"self\" type=\"application/atom+xml\" href=\"http://atlasshrugs2000.typepad.com/atlas_shrugs/atom.xml\" />\n <link rel=\"alternate\" type=\"text/html\" href=\"http://atlasshrugs2000.typepad.com/atlas_shrugs/\" />\n <id>tag:typepad.com,2003:weblog-132946</id>\n <updated>2007-08-15T16:07:34-04"},
{"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atom10full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:thr=\"http://purl.org/syndication/thread/1.0\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\">\r\n <title>Creating Passionate Users</title>\r\n "},
{"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atom10full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\">\r\n <title>Seth's Blog</title>\r\n <link rel=\"alternate\" type=\"text/html\" href=\"http://sethgodin.typepad.com/seths_blog/\" />\r\n <link rel=\"s"},
{"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atom10full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:openSearch=\"http://a9.com/-/spec/opensearchrss/1.0/\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\"><id>tag:blogger.com,1999:blog-32454861</id><updated>2007-07-31T21:44:09.867+02:00</upd"},
{"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atomfull.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://purl.org/atom/ns#\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\" version=\"0.3\">\r\n <title>Go Fug Yourself</title>\r\n <link rel=\"alternate\" type=\"text/html\" href=\"http://go"},
{"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/rss2full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><rss xmlns:creativeCommons=\"http://backend.userland.com/creativeCommonsRssModule\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\" version=\"2.0\"><channel><title>Google Operating System</title><link>http://googlesystem.blogspot.com/</link>"},
{"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"\"?>\n<!-- generator=\"wordpress/MU\" -->\n<rss version=\"2.0\"\n\txmlns:content=\"http://purl.org/rss/1.0/modules/content/\"\n\txmlns:wfw=\"http://wellformedweb.org/CommentAPI/\"\n\txmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n\t><channel>\n\t<title>Nunublog</title>\n\t<link>http://nunubh.wordpress.com</link>\n\t<description>Just Newbie Blog!</description>\n\t<pubDate>Mon, 09 Jul 2007 18:54:09 +0000</pubDate>\n\t<generator>http://wordpress.org/?v=MU</generator>\n\t<language>id</language>\n\t\t\t<item>\n\t\t<ti"},
{"type": "text/html", "input": "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<HEAD>\r\n<TITLE>Design*Sponge</TITLE><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"MSSmartTagsPreventParsing\" content=\"true\" />\r\n<meta name=\"generator\" content=\"Blogger\" />\r\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"Design*Sponge - Atom\" href=\"http://designsponge.blogspot.com/feeds/posts/default\" />\r\n<link rel=\"alternate\" type=\"application/rss+xml\" title=\"Design*Sponge - RSS\" href="},
{"type": "text/html", "input": "<HTML>\n<HEAD>\n<TITLE>Moved Temporarily</TITLE>\n</HEAD>\n<BODY BGCOLOR=\"#FFFFFF\" TEXT=\"#000000\">\n<H1>Moved Temporarily</H1>\nThe document has moved <A HREF=\"http://feeds.feedburner.com/thesecretdiaryofstevejobs\">here</A>.\n</BODY>\n</HTML>\n"}
]

View file

@ -11,12 +11,24 @@
"input":"foo</bar>", "input":"foo</bar>",
"output":[["Character", "foo"], ["EndTag", "bar"]]}, "output":[["Character", "foo"], ["EndTag", "bar"]]},
{"description":"End tag closing RCDATA or CDATA (case-insensitivity)",
"contentModelFlags":["RCDATA", "CDATA"],
"lastStartTag":"bar",
"input":"foo</bAr>",
"output":[["Character", "foo"], ["EndTag", "bar"]]},
{"description":"End tag with incorrect name in RCDATA or CDATA", {"description":"End tag with incorrect name in RCDATA or CDATA",
"contentModelFlags":["RCDATA", "CDATA"], "contentModelFlags":["RCDATA", "CDATA"],
"lastStartTag":"baz", "lastStartTag":"baz",
"input":"</foo>bar</baz>", "input":"</foo>bar</baz>",
"output":[["Character", "</foo>bar"], ["EndTag", "baz"]]}, "output":[["Character", "</foo>bar"], ["EndTag", "baz"]]},
{"description":"End tag with incorrect name in RCDATA or CDATA (starting like correct name)",
"contentModelFlags":["RCDATA", "CDATA"],
"lastStartTag":"baz",
"input":"</foo>bar</bazaar>",
"output":[["Character", "</foo>bar</bazaar>"]]},
{"description":"End tag closing RCDATA or CDATA, switching back to PCDATA", {"description":"End tag closing RCDATA or CDATA, switching back to PCDATA",
"contentModelFlags":["RCDATA", "CDATA"], "contentModelFlags":["RCDATA", "CDATA"],
"lastStartTag":"bar", "lastStartTag":"bar",

File diff suppressed because it is too large Load diff

View file

@ -161,6 +161,10 @@
"input":"<h a='&not1'>", "input":"<h a='&not1'>",
"output":["ParseError", ["StartTag", "h", {"a":"&not1"}]]}, "output":["ParseError", ["StartTag", "h", {"a":"&not1"}]]},
{"description":"Entity in attribute without semicolon ending in i",
"input":"<h a='&noti'>",
"output":["ParseError", ["StartTag", "h", {"a":"&noti"}]]},
{"description":"Entity in attribute without semicolon", {"description":"Entity in attribute without semicolon",
"input":"<h a='&COPY'>", "input":"<h a='&COPY'>",
"output":["ParseError", ["StartTag", "h", {"a":"©"}]]} "output":["ParseError", ["StartTag", "h", {"a":"©"}]]}

View file

@ -60,14 +60,6 @@
"input":"&#xD869;&#xDED6;", "input":"&#xD869;&#xDED6;",
"output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]}, "output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]},
{"description":"Numeric entity representing a Windows-1252 'codepoint'",
"input":"&#137;",
"output":["ParseError", ["Character", "\u2030"]]},
{"description":"Hexadecimal entity representing a Windows-1252 'codepoint'",
"input":"&#x89;",
"output":["ParseError", ["Character", "\u2030"]]},
{"description":"Hexadecimal entity with mixed uppercase and lowercase", {"description":"Hexadecimal entity with mixed uppercase and lowercase",
"input":"&#xaBcD;", "input":"&#xaBcD;",
"output":[["Character", "\uABCD"]]}, "output":[["Character", "\uABCD"]]},
@ -122,7 +114,15 @@
{"description":"Null Byte Replacement", {"description":"Null Byte Replacement",
"input":"\u0000", "input":"\u0000",
"output":["ParseError", ["Character", "\ufffd"]]} "output":["ParseError", ["Character", "\ufffd"]]},
{"description":"Comment with dash",
"input":"<!---x",
"output":["ParseError", ["Comment", "-x"]]},
{"description":"Entity + newline",
"input":"\nx\n&gt;\n",
"output":[["Character","\nx\n>\n"]]}
]} ]}

View file

@ -0,0 +1,367 @@
{"tests": [
{"description":"<",
"input":"<",
"output":["ParseError", ["Character", "<"]]},
{"description":"<>",
"input":"<>",
"output":["ParseError", ["Character", "<>"]]},
{"description":"<!",
"input":"<!",
"output":["ParseError", ["Comment", ""]]},
{"description":"<!>",
"input":"<!>",
"output":["ParseError", ["Comment", ""]]},
{"description":"<!--",
"input":"<!--",
"output":["ParseError", ["Comment", ""]]},
{"description":"<!-->",
"input":"<!-->",
"output":["ParseError", ["Comment", ""]]},
{"description":"<!---",
"input":"<!---",
"output":["ParseError", ["Comment", ""]]},
{"description":"<!--->",
"input":"<!--->",
"output":["ParseError", ["Comment", ""]]},
{"description":"<!---->",
"input":"<!---->",
"output":[["Comment", ""]]},
{"description":"<!-----",
"input":"<!-----",
"output":["ParseError", "ParseError", ["Comment", "-"]]},
{"description":"<!----.",
"input":"<!----.",
"output":["ParseError", "ParseError", ["Comment", "--."]]},
{"description":"<!---?",
"input":"<!---?",
"output":["ParseError", ["Comment", "-?"]]},
{"description":"<!--?-",
"input":"<!--?-",
"output":["ParseError", ["Comment", "?"]]},
{"description":"<!--?--",
"input":"<!--?--",
"output":["ParseError", ["Comment", "?"]]},
{"description":"<!--?-.",
"input":"<!--?-.",
"output":["ParseError", ["Comment", "?-."]]},
{"description":"<!--?.",
"input":"<!--?.",
"output":["ParseError", ["Comment", "?."]]},
{"description":"<?>",
"input":"<?>",
"output":["ParseError", ["Comment", "?"]]},
{"description":"<??",
"input":"<??",
"output":["ParseError", ["Comment", "??"]]},
{"description":"</",
"input":"</",
"output":["ParseError", ["Character", "</"]]},
{"description":"</>",
"input":"</>",
"output":["ParseError"]},
{"description":"</?",
"input":"</?",
"output":["ParseError", ["Comment", "?"]]},
{"description":">",
"input":">",
"output":[["Character", ">"]]},
{"description":"-",
"input":"-",
"output":[["Character", "-"]]},
{"description":"?",
"input":"?",
"output":[["Character", "?"]]},
{"description":"&",
"input":"&",
"output":[["Character", "&"]]},
{"description":"&#",
"input":"&#",
"output":["ParseError", ["Character", "&#"]]},
{"description":"&#9",
"input":"&#9",
"output":["ParseError", ["Character", "\t"]]},
{"description":"<!doctype >",
"input":"<!doctype >",
"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
{"description":"<!doctype ",
"input":"<!doctype ",
"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
{"description":"<!doctype!>",
"input":"<!doctype!>",
"output":["ParseError", ["DOCTYPE", "!", null, null, true]]},
{"description":"<!doctype! >",
"input":"<!doctype! >",
"output":["ParseError", ["DOCTYPE", "!", null, null, true]]},
{"description":"<!doctype! ",
"input":"<!doctype! ",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype! ?>",
"input":"<!doctype! ?>",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype! ??",
"input":"<!doctype! ??",
"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype!?",
"input":"<!doctype!?",
"output":["ParseError", "ParseError", ["DOCTYPE", "!?", null, null, false]]},
{"description":"<!doctype! public>",
"input":"<!doctype! public>",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype! public ",
"input":"<!doctype! public ",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype! public?",
"input":"<!doctype! public?",
"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype! public''",
"input":"<!doctype! public''",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", null, false]]},
{"description":"<!doctype! public'(",
"input":"<!doctype! public'(",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "(", null, false]]},
{"description":"<!doctype! public\"\">",
"input":"<!doctype! public\"\">",
"output":["ParseError", ["DOCTYPE", "!", "", null, true]]},
{"description":"<!doctype! public\"\" ",
"input":"<!doctype! public\"\" ",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", null, false]]},
{"description":"<!doctype! public\"\"?",
"input":"<!doctype! public\"\"?",
"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", "", null, false]]},
{"description":"<!doctype! public\"\"'",
"input":"<!doctype! public\"\"'",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", "", false]]},
{"description":"<!doctype! public\"\"\"",
"input":"<!doctype! public\"\"\"",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", "", false]]},
{"description":"<!doctype! public\"#",
"input":"<!doctype! public\"#",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "#", null, false]]},
{"description":"<!doctype! system>",
"input":"<!doctype! system>",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype! system ",
"input":"<!doctype! system ",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype! system?",
"input":"<!doctype! system?",
"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype! system''",
"input":"<!doctype! system''",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "", false]]},
{"description":"<!doctype! system'(",
"input":"<!doctype! system'(",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "(", false]]},
{"description":"<!doctype! system\"\">",
"input":"<!doctype! system\"\">",
"output":["ParseError", ["DOCTYPE", "!", null, "", true]]},
{"description":"<!doctype! system\"\" ",
"input":"<!doctype! system\"\" ",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "", false]]},
{"description":"<!doctype! system\"\"?",
"input":"<!doctype! system\"\"?",
"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, "", false]]},
{"description":"<!doctype! system\"#",
"input":"<!doctype! system\"#",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "#", false]]},
{"description":"</z",
"input":"</z",
"output":["ParseError", ["EndTag", "z"]]},
{"description":"<z>",
"input":"<z>",
"output":[["StartTag", "z", {}]]},
{"description":"<z ",
"input":"<z ",
"output":["ParseError", ["StartTag", "z", {}]]},
{"description":"<z/>",
"input":"<z/>",
"output":["ParseError", ["StartTag", "z", {}]]},
{"description":"<z/ ",
"input":"<z/ ",
"output":["ParseError", "ParseError", ["StartTag", "z", {}]]},
{"description":"<z//",
"input":"<z//",
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {}]]},
{"description":"<z",
"input":"<z",
"output":["ParseError", ["StartTag", "z", {}]]},
{"description":"</z",
"input":"</z",
"output":["ParseError", ["EndTag", "z"]]},
{"description":"<z0",
"input":"<z0",
"output":["ParseError", ["StartTag", "z0", {}]]},
{"description":"<z/0=>",
"input":"<z/0=>",
"output":["ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0= ",
"input":"<z/0= ",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0=?>",
"input":"<z/0=?>",
"output":["ParseError", ["StartTag", "z", {"0": "?"}]]},
{"description":"<z/0=? ",
"input":"<z/0=? ",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "?"}]]},
{"description":"<z/0=??",
"input":"<z/0=??",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "??"}]]},
{"description":"<z/0=''",
"input":"<z/0=''",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0='&",
"input":"<z/0='&",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "&"}]]},
{"description":"<z/0='%",
"input":"<z/0='%",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "%"}]]},
{"description":"<z/0=\"'",
"input":"<z/0=\"'",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "'"}]]},
{"description":"<z/0=\"\"",
"input":"<z/0=\"\"",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0=\"&",
"input":"<z/0=\"&",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "&"}]]},
{"description":"<z/0=&",
"input":"<z/0=&",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "&"}]]},
{"description":"<z/0>",
"input":"<z/0>",
"output":["ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0 =",
"input":"<z/0 =",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0 >",
"input":"<z/0 >",
"output":["ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0 ",
"input":"<z/0 ",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0 /",
"input":"<z/0 /",
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0/",
"input":"<z/0/",
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/00",
"input":"<z/00",
"output":["ParseError", "ParseError", ["StartTag", "z", {"00": ""}]]},
{"description":"<z/0 0",
"input":"<z/0 0",
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0='&#9",
"input":"<z/0='&#9",
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": "\t"}]]},
{"description":"<z/0=\"&#9",
"input":"<z/0=\"&#9",
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": "\t"}]]},
{"description":"<z/0=&#9",
"input":"<z/0=&#9",
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": "\t"}]]},
{"description":"<z/0z",
"input":"<z/0z",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0z": ""}]]},
{"description":"<z/0 z",
"input":"<z/0 z",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "z": ""}]]},
{"description":"<zz",
"input":"<zz",
"output":["ParseError", ["StartTag", "zz", {}]]},
{"description":"<z/z",
"input":"<z/z",
"output":["ParseError", "ParseError", ["StartTag", "z", {"z": ""}]]}
]}

View file

@ -0,0 +1,198 @@
{"tests": [
{"description":"< in attribute name",
"input":"<z/0 <",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "<": ""}]]},
{"description":"< in attribute value",
"input":"<z x=<",
"output":["ParseError", ["StartTag", "z", {"x": "<"}]]},
{"description":"CR EOF after doctype name",
"input":"<!doctype html \r",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"CR EOF in tag name",
"input":"<z\r",
"output":["ParseError", ["StartTag", "z", {}]]},
{"description":"Zero hex numeric entity",
"input":"&#x0",
"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
{"description":"Zero decimal numeric entity",
"input":"&#0",
"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
{"description":"Zero-prefixed hex numeric entity",
"input":"&#x000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000041;",
"output":[["Character", "A"]]},
{"description":"Zero-prefixed decimal numeric entity",
"input":"&#000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000065;",
"output":[["Character", "A"]]},
{"description":"Empty hex numeric entities",
"input":"&#x &#X ",
"output":["ParseError", ["Character", "&#x "], "ParseError", ["Character", "&#X "]]},
{"description":"Empty decimal numeric entities",
"input":"&# &#; ",
"output":["ParseError", ["Character", "&# "], "ParseError", ["Character", "&#; "]]},
{"description":"Non-BMP numeric entity",
"input":"&#x10000;",
"output":[["Character", "\uD800\uDC00"]]},
{"description":"Maximum non-BMP numeric entity",
"input":"&#X10FFFF;",
"output":[["Character", "\uDBFF\uDFFF"]]},
{"description":"Above maximum numeric entity",
"input":"&#x110000;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"32-bit hex numeric entity",
"input":"&#x80000041;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"33-bit hex numeric entity",
"input":"&#x100000041;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"33-bit decimal numeric entity",
"input":"&#4294967361;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"65-bit hex numeric entity",
"input":"&#x10000000000000041;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"65-bit decimal numeric entity",
"input":"&#18446744073709551681;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"Surrogate code point edge cases",
"input":"&#xD7FF;&#xD800;&#xD801;&#xDFFE;&#xDFFF;&#xE000;",
"output":[["Character", "\uD7FF"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD\uE000"]]},
{"description":"Uppercase start tag name",
"input":"<X>",
"output":[["StartTag", "x", {}]]},
{"description":"Uppercase end tag name",
"input":"</X>",
"output":[["EndTag", "x"]]},
{"description":"Uppercase attribute name",
"input":"<x X>",
"output":[["StartTag", "x", { "x":"" }]]},
{"description":"Tag/attribute name case edge values",
"input":"<x@AZ[`az{ @AZ[`az{>",
"output":[["StartTag", "x@az[`az{", { "@az[`az{":"" }]]},
{"description":"Duplicate different-case attributes",
"input":"<x x=1 x=2 X=3>",
"output":["ParseError", "ParseError", ["StartTag", "x", { "x":"1" }]]},
{"description":"Uppercase close tag attributes",
"input":"</x X>",
"output":["ParseError", ["EndTag", "x"]]},
{"description":"Duplicate close tag attributes",
"input":"</x x x>",
"output":["ParseError", "ParseError", ["EndTag", "x"]]},
{"description":"Permitted slash",
"input":"<br/>",
"output":[["StartTag", "br", {}]]},
{"description":"Non-permitted slash",
"input":"<xr/>",
"output":["ParseError", ["StartTag", "xr", {}]]},
{"description":"Permitted slash but in close tag",
"input":"</br/>",
"output":["ParseError", ["EndTag", "br"]]},
{"description":"Doctype public case-sensitivity (1)",
"input":"<!DoCtYpE HtMl PuBlIc \"AbC\" \"XyZ\">",
"output":[["DOCTYPE", "HtMl", "AbC", "XyZ", true]]},
{"description":"Doctype public case-sensitivity (2)",
"input":"<!dOcTyPe hTmL pUbLiC \"aBc\" \"xYz\">",
"output":[["DOCTYPE", "hTmL", "aBc", "xYz", true]]},
{"description":"Doctype system case-sensitivity (1)",
"input":"<!DoCtYpE HtMl SyStEm \"XyZ\">",
"output":[["DOCTYPE", "HtMl", null, "XyZ", true]]},
{"description":"Doctype system case-sensitivity (2)",
"input":"<!dOcTyPe hTmL sYsTeM \"xYz\">",
"output":[["DOCTYPE", "hTmL", null, "xYz", true]]},
{"description":"U+0000 in lookahead region after non-matching character",
"input":"<!doc>\u0000",
"output":["ParseError", ["Comment", "doc"], "ParseError", ["Character", "\uFFFD"]],
"ignoreErrorOrder":true},
{"description":"U+0000 in lookahead region",
"input":"<!doc\u0000",
"output":["ParseError", "ParseError", ["Comment", "doc\uFFFD"]],
"ignoreErrorOrder":true},
{"description":"CR followed by U+0000",
"input":"\r\u0000",
"output":["ParseError", ["Character", "\n\uFFFD"]],
"ignoreErrorOrder":true},
{"description":"CR followed by non-LF",
"input":"\r?",
"output":[["Character", "\n?"]]},
{"description":"CR at EOF",
"input":"\r",
"output":[["Character", "\n"]]},
{"description":"LF at EOF",
"input":"\n",
"output":[["Character", "\n"]]},
{"description":"CR LF",
"input":"\r\n",
"output":[["Character", "\n"]]},
{"description":"CR CR",
"input":"\r\r",
"output":[["Character", "\n\n"]]},
{"description":"LF LF",
"input":"\n\n",
"output":[["Character", "\n\n"]]},
{"description":"LF CR",
"input":"\n\r",
"output":[["Character", "\n\n"]]},
{"description":"text CR CR CR text",
"input":"text\r\r\rtext",
"output":[["Character", "text\n\n\ntext"]]},
{"description":"Doctype publik",
"input":"<!DOCTYPE html PUBLIK \"AbC\" \"XyZ\">",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"Doctype publi",
"input":"<!DOCTYPE html PUBLI",
"output":["ParseError", "ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"Doctype sistem",
"input":"<!DOCTYPE html SISTEM \"AbC\">",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"Doctype sys",
"input":"<!DOCTYPE html SYS",
"output":["ParseError", "ParseError", ["DOCTYPE", "html", null, null, false]]}
]}

View file

@ -113,7 +113,6 @@ Line1<br>Line2<br>Line3<br>Line4
<html><head></body></html> <html><head></body></html>
#errors #errors
6: missing document type declaration 6: missing document type declaration
19: unexpected body element end tag in head
#document #document
| <html> | <html>
| <head> | <head>
@ -159,7 +158,6 @@ Line1<br>Line2<br>Line3<br>Line4
</head> </head>
#errors #errors
7: missing document type declaration 7: missing document type declaration
7: unexpected head element end tag
#document #document
| <html> | <html>
| <head> | <head>
@ -169,7 +167,6 @@ Line1<br>Line2<br>Line3<br>Line4
</body> </body>
#errors #errors
7: missing document type declaration 7: missing document type declaration
7: unexpected body element end tag
#document #document
| <html> | <html>
| <head> | <head>
@ -437,6 +434,7 @@ Unexpected end of file
#data #data
<!DOCTYPE HTML><li>hello<li>world<ul>how<li>do</ul>you</body><!--do--> <!DOCTYPE HTML><li>hello<li>world<ul>how<li>do</ul>you</body><!--do-->
#errors #errors
Unexpected end of file. Expected </li>. XXX
#document #document
| <!DOCTYPE HTML> | <!DOCTYPE HTML>
| <html> | <html>
@ -638,7 +636,6 @@ Unexpected end of file
#data #data
<!DOCTYPE HTML><script> <!-- </script> --> </script> EOF <!DOCTYPE HTML><script> <!-- </script> --> </script> EOF
#errors #errors
52: unexpected script element end tag
#document #document
| <!DOCTYPE HTML> | <!DOCTYPE HTML>
| <html> | <html>
@ -732,6 +729,7 @@ Unexpected end of file
#errors #errors
6: missing document type declaration 6: missing document type declaration
29: mismatched font element end tag (misnested tags) 29: mismatched font element end tag (misnested tags)
AAA </font> tag strikes again
35: mismatched body element end tag (premature end of file?) 35: mismatched body element end tag (premature end of file?)
#document #document
| <html> | <html>
@ -1122,6 +1120,7 @@ Unexpected end of file
15: missing document type declaration 15: missing document type declaration
39: unexpected node in table context 39: unexpected node in table context
39: a element start tag implying a element end tag 39: a element start tag implying a element end tag
AAA violation: </a>
39: unexpected node in table context 39: unexpected node in table context
39: mismatched a element end tag (misnested tags across <table> tag) 39: mismatched a element end tag (misnested tags across <table> tag)
43: unexpected node in table context 43: unexpected node in table context
@ -1177,6 +1176,8 @@ Unexpected end of file
7: missing document type declaration 7: missing document type declaration
22: unexpected node in table context 22: unexpected node in table context
27: unexpected node in table context 27: unexpected node in table context
XXX more table voodoo
XXX more table voodoo
54: unexpected td element end tag implied other end tags 54: unexpected td element end tag implied other end tags
63: unexpected node in table context 63: unexpected node in table context
72: mismatched body element end tag (premature end of file?) 72: mismatched body element end tag (premature end of file?)
@ -1301,11 +1302,9 @@ unexpected EOF
#errors #errors
6: missing document type declaration 6: missing document type declaration
12: unexpected body element start tag 12: unexpected body element start tag
18: base element start tag out of place
24: link element start tag out of place
30: meta element start tag out of place
37: title element start tag out of place 37: title element start tag out of place
54: unexpected body element start tag 54: unexpected body element start tag
Missing end tag </p>. XXX
#document #document
| <html> | <html>
| <head> | <head>
@ -1346,7 +1345,6 @@ unexpected EOF
3: missing document type declaration 3: missing document type declaration
13: unexpected node in table context 13: unexpected node in table context
13: a element start tag implying a element end tag 13: a element start tag implying a element end tag
13: unexpected node in table context
13: mismatched a element end tag (misnested tags across <table> tag) 13: mismatched a element end tag (misnested tags across <table> tag)
21: mismatched table element end tag 21: mismatched table element end tag
27: a element start tag implying a element end tag 27: a element start tag implying a element end tag
@ -1576,6 +1574,8 @@ unexpected EOF
<ul><li></li><div><li></div><li><li><div><li><address><li><b><em></b><li></ul> <ul><li></li><div><li></div><li><li><div><li><address><li><b><em></b><li></ul>
#errors #errors
4: missing document type declaration 4: missing document type declaration
Missing end tag for <div> (nr2)
Missing end tag for <address>
69: mismatched b element end tag (misnested tags) 69: mismatched b element end tag (misnested tags)
#document #document
| <html> | <html>
@ -1620,7 +1620,6 @@ unexpected EOF
56: unexpected frameset element start tag in body 56: unexpected frameset element start tag in body
63: unexpected frame element start tag in body 63: unexpected frame element start tag in body
74: unexpected frameset element end tag 74: unexpected frameset element end tag
87: unescaped '</' in CDATA or RCDATA block
106: unexpected end of file while parsing CDATA section for element noframes 106: unexpected end of file while parsing CDATA section for element noframes
#document #document
| <html> | <html>
@ -1635,6 +1634,7 @@ unexpected EOF
4: missing document type declaration 4: missing document type declaration
15: required tr element start tag implied by unexpected td element start tag 15: required tr element start tag implied by unexpected td element start tag
27: unexpected td element end tag implied other end tags 27: unexpected td element end tag implied other end tags
Unexpected </h1> tag. Expected other.
Unexpected EOF Unexpected EOF
#document #document
| <html> | <html>
@ -1742,9 +1742,9 @@ Unexpected EOF
108: unexpected h4 element end tag 108: unexpected h4 element end tag
113: unexpected h5 element end tag 113: unexpected h5 element end tag
118: unexpected h6 element end tag 118: unexpected h6 element end tag
125: unexpected body element end tag 125: unexpected end tag token br in after body phase
130: unexpected br element end tag 130: unexpected br element end tag
134: unexpected a element end tag 134: unexpected a element end tag (AAA)
140: unexpected img element end tag 140: unexpected img element end tag
148: unexpected title element end tag 148: unexpected title element end tag
155: unexpected span element end tag 155: unexpected span element end tag
@ -1926,6 +1926,9 @@ Unexpected EOF
610: unexpected option element end tag 610: unexpected option element end tag
622: unexpected plaintext element end tag 622: unexpected plaintext element end tag
633: mismatched special end tag textarea 633: mismatched special end tag textarea
XXX
XXX
XXX
#document #document
| <html> | <html>
| <head> | <head>
@ -1935,3 +1938,13 @@ Unexpected EOF
| <tbody> | <tbody>
| <tr> | <tr>
| <p> | <p>
#data
<frameset>
#errors
10: Start tag seen without seeing a doctype first.
11: End of file seen and there were open elements.
#document
| <html>
| <head>
| <frameset>

View file

@ -12,7 +12,6 @@
<textarea>test</div>test <textarea>test</div>test
#errors #errors
10: missing document type declaration. 10: missing document type declaration.
17: unescaped '</' in CDATA or RCDATA block.
25: unexpected end of file while parsing CDATA section for element textarea. 25: unexpected end of file while parsing CDATA section for element textarea.
#document #document
| <html> | <html>
@ -87,6 +86,8 @@ Expected end tag </frameset>
#data #data
<!DOCTYPE HTML><font><p><b>test</font> <!DOCTYPE HTML><font><p><b>test</font>
#errors #errors
AAA violation. </font>
AAA violation. </font>
#document #document
| <!DOCTYPE HTML> | <!DOCTYPE HTML>
| <html> | <html>
@ -101,6 +102,7 @@ Expected end tag </frameset>
#data #data
<!DOCTYPE HTML><dt><div><dd> <!DOCTYPE HTML><dt><div><dd>
#errors #errors
Missing end tag for <div>.
#document #document
| <!DOCTYPE HTML> | <!DOCTYPE HTML>
| <html> | <html>
@ -114,7 +116,6 @@ Expected end tag </frameset>
<script></x <script></x
#errors #errors
no document type no document type
</ in script
Unexpected end of file. Expected </script> end tag. Unexpected end of file. Expected </script> end tag.
#document #document
| <html> | <html>
@ -129,6 +130,7 @@ Unexpected end of file. Expected </script> end tag.
no document type no document type
<plaintext> directly inside table <plaintext> directly inside table
Characters inside table. Characters inside table.
Characters inside table. (XXX?)
Unexpected end of file. Unexpected end of file.
#document #document
| <html> | <html>
@ -175,10 +177,10 @@ Unexpected start tag "body"
| <html> | <html>
| <head> | <head>
| <body> | <body>
| t4="4" | t1="1"
| t2="2" | t2="2"
| t3="3" | t3="3"
| t1="1" | t4="4"
#data #data
</b test </b test
@ -195,7 +197,6 @@ Unexpected end tag.
#data #data
<!DOCTYPE HTML></b test<b &=&amp>X <!DOCTYPE HTML></b test<b &=&amp>X
#errors #errors
Unexpected < in attribute
End tag contains attributes. End tag contains attributes.
Unexpected end tag. Unexpected end tag.
Named entity didn't end with ; Named entity didn't end with ;
@ -224,7 +225,6 @@ Unexpected EOF in (end) tag name
& &
#errors #errors
No doctype. No doctype.
Unfinished entity.
#document #document
| <html> | <html>
| <head> | <head>
@ -349,11 +349,11 @@ Unexpected end EOF. Missing closing tags.
| <b> | <b>
| <i> | <i>
| <u> | <u>
| " " | <b>
| <p> | <i>
| <b> | <u>
| <i> | " "
| <u> | <p>
| "X" | "X"
#data #data
@ -538,10 +538,10 @@ No doctype
| <hr> | <hr>
| <p> | <p>
| <label> | <label>
| "This is a searchable index. Insert your search keywords here:" | "This is a searchable index. Insert your search keywords here: "
| <input> | <input>
| test="x"
| name="isindex" | name="isindex"
| test="x"
| <hr> | <hr>
#data #data
@ -571,19 +571,18 @@ Unexpected EOF.
| <b> | <b>
| <i> | <i>
| <u> | <u>
| " | <b>
| <i>
| <u>
| "
" "
| <p> | <p>
| <b>
| <i>
| <u>
| "X" | "X"
#data #data
<!DOCTYPE HTML><body><title>test</body></title> <!DOCTYPE HTML><body><title>test</body></title>
#errors #errors
Unexpected start tag that belongs in the head. Unexpected start tag that belongs in the head.
Expected closing tag after </.
#document #document
| <!DOCTYPE HTML> | <!DOCTYPE HTML>
| <html> | <html>
@ -596,10 +595,7 @@ Expected closing tag after </.
<!DOCTYPE HTML><body><title>X</title><meta name=z><link rel=foo><style> <!DOCTYPE HTML><body><title>X</title><meta name=z><link rel=foo><style>
x { content:"</style" } </style> x { content:"</style" } </style>
#errors #errors
Unexpected start tag that belongs in head. Unexpected start tag that belongs in head. <title>
Unexpected start tag that belongs in head.
Unexpected start tag that belongs in head.
Expected closing tag after </.
#document #document
| <!DOCTYPE HTML> | <!DOCTYPE HTML>
| <html> | <html>
@ -632,8 +628,6 @@ x { content:"</style" } "
#errors #errors
No doctype. No doctype.
#document #document
| "
"
| <html> | <html>
| <head> | <head>
| <body> | <body>
@ -643,7 +637,6 @@ No doctype.
#errors #errors
#document #document
| <!DOCTYPE HTML> | <!DOCTYPE HTML>
| " "
| <html> | <html>
| <head> | <head>
| <body> | <body>
@ -749,8 +742,8 @@ Solidus (/) incorrectly placed.
| <body> | <body>
| "X" | "X"
| <p> | <p>
| y=""
| x="" | x=""
| y=""
| z="" | z=""
#data #data

View file

@ -131,6 +131,7 @@ y"
<!DOCTYPE htML><html><head></head><body><pre>x<div> <!DOCTYPE htML><html><head></head><body><pre>x<div>
y</pre></body></html> y</pre></body></html>
#errors #errors
End tag <pre> seen too early. Expected other end tag.
#document #document
| <!DOCTYPE htML> | <!DOCTYPE htML>
| <html> | <html>
@ -140,11 +141,12 @@ y</pre></body></html>
| "x" | "x"
| <div> | <div>
| " | "
| y" y"
#data #data
<!DOCTYPE htML><HTML><META><HEAD></HEAD></HTML> <!DOCTYPE htML><HTML><META><HEAD></HEAD></HTML>
#errors #errors
Unexpected start tag HEAD in HEAD. Ignored.
#document #document
| <!DOCTYPE htML> | <!DOCTYPE htML>
| <html> | <html>
@ -155,6 +157,7 @@ y</pre></body></html>
#data #data
<!DOCTYPE htML><HTML><HEAD><head></HEAD></HTML> <!DOCTYPE htML><HTML><HEAD><head></HEAD></HTML>
#errors #errors
Unexpected start tag HEAD in HEAD. Ignored.
#document #document
| <!DOCTYPE htML> | <!DOCTYPE htML>
| <html> | <html>
@ -164,6 +167,8 @@ y</pre></body></html>
#data #data
<textarea>foo<span>bar</span><i>baz <textarea>foo<span>bar</span><i>baz
#errors #errors
Unexpected start tag. Expected DOCTYPE.
Unexpected end of file.
#document #document
| <html> | <html>
| <head> | <head>
@ -174,6 +179,8 @@ y</pre></body></html>
#data #data
<title>foo<span>bar</em><i>baz <title>foo<span>bar</em><i>baz
#errors #errors
Unexpected start tag. Expected DOCTYPE.
Unexpected end of file.
#document #document
| <html> | <html>
| <head> | <head>
@ -236,6 +243,8 @@ Missing end tag (div)
#data #data
<!doctype html><nobr><nobr><nobr> <!doctype html><nobr><nobr><nobr>
#errors #errors
Unexpected <nobr> tag.
Unexpected <nobr> tag.
Unexpected end of file. Unexpected end of file.
#document #document
| <!DOCTYPE html> | <!DOCTYPE html>
@ -249,6 +258,7 @@ Unexpected end of file.
#data #data
<!doctype html><nobr><nobr></nobr><nobr> <!doctype html><nobr><nobr></nobr><nobr>
#errors #errors
Unexpected <nobr> tag.
Unexpected end of file. Unexpected end of file.
#document #document
| <!DOCTYPE html> | <!DOCTYPE html>

View file

@ -41,6 +41,7 @@ plaintext
#data #data
setting html's innerHTML setting html's innerHTML
#errors #errors
XXX innerHTML EOF
#document-fragment #document-fragment
html html
#document #document
@ -51,6 +52,7 @@ html
#data #data
<title>setting head's innerHTML</title> <title>setting head's innerHTML</title>
#errors #errors
Unexpected title element that belongs in head.
#document-fragment #document-fragment
head head
#document #document

View file

@ -110,7 +110,6 @@ No DOCTYPE
<style> <!</-- </style>x <style> <!</-- </style>x
#errors #errors
No DOCTYPE No DOCTYPE
Unexpected end of file
#document #document
| <html> | <html>
| <head> | <head>
@ -118,3 +117,59 @@ Unexpected end of file
| " <!</-- " | " <!</-- "
| <body> | <body>
| "x" | "x"
#data
<xmp> <!-- > --> </xmp>
#errors
No DOCTYPE
#document
| <html>
| <head>
| <body>
| <xmp>
| " <!-- > --> "
#data
<title>&amp;</title>
#errors
No DOCTYPE
#document
| <html>
| <head>
| <title>
| "&"
| <body>
#data
<title><!--&amp;--></title>
#errors
No DOCTYPE
#document
| <html>
| <head>
| <title>
| "<!--&amp;-->"
| <body>
#data
<title><!--</title>
#errors
No DOCTYPE
Unexpected EOF
#document
| <html>
| <head>
| <title>
| "<!--</title>"
| <body>
#data
<noscript><!--</noscript>--></noscript>
#errors
No DOCTYPE
#document
| <html>
| <head>
| <noscript>
| "<!--</noscript>-->"
| <body>

View file

@ -1,6 +1,7 @@
#data #data
<!doctype html></head> <head> <!doctype html></head> <head>
#errors #errors
Unexpected start tag head. Ignored.
#document #document
| <!DOCTYPE html> | <!DOCTYPE html>
| <html> | <html>
@ -11,6 +12,9 @@
#data #data
<!doctype html></html> <head> <!doctype html></html> <head>
#errors #errors
Unexpected start tag head.
Unexpected start tag head in after body phase.
Unexpected start tag head. Ignored.
#document #document
| <!DOCTYPE html> | <!DOCTYPE html>
| <html> | <html>
@ -21,6 +25,7 @@
#data #data
<!doctype html></body><meta> <!doctype html></body><meta>
#errors #errors
Unexpected meta element in after body phase.
#document #document
| <!DOCTYPE html> | <!DOCTYPE html>
| <html> | <html>
@ -45,7 +50,6 @@ Unexpected end of file.
#data #data
<!doctype HTml><title>&amp;</title> <!doctype HTml><title>&amp;</title>
#errors #errors
Unexpected end of file.
#document #document
| <!DOCTYPE HTml> | <!DOCTYPE HTml>
| <html> | <html>
@ -57,7 +61,6 @@ Unexpected end of file.
#data #data
<!doctype HTml><title><!--&amp;--></title> <!doctype HTml><title><!--&amp;--></title>
#errors #errors
Unexpected end of file.
#document #document
| <!DOCTYPE HTml> | <!DOCTYPE HTml>
| <html> | <html>
@ -65,3 +68,26 @@ Unexpected end of file.
| <title> | <title>
| "<!--&amp;-->" | "<!--&amp;-->"
| <body> | <body>
#data
<!doctype>
#errors
No space after "doctype"
Unexpected ">"
Incorrect doctype
#document
| <!DOCTYPE >
| <html>
| <head>
| <body>
#data
<!---x
#errors
End of file in comment
End of file before doctype
#document
| <!-- -x -->
| <html>
| <head>
| <body>

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,159 @@
{"tests": [
{"description": "valid single class attribute value",
"input": "<span class=a>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading space",
"input": "<span class=' a'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with trailing space",
"input": "<span class='a '>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading and trailing space",
"input": "<span class=' a '>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading tab",
"input": "<span class=' a'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with trailing tab",
"input": "<span class='a '>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading and trailing tab",
"input": "<span class=' a '>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading LF",
"input": "<span class='
a'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with trailing LF",
"input": "<span class='a
'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading and trailing LF",
"input": "<span class='
a
'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading LT",
"input": "<span class=' a'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with trailing LT",
"input": "<span class='a '>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading and trailing LT",
"input": "<span class=' a '>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading FF",
"input": "<span class=' a'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with trailing FF",
"input": "<span class='a '>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading and trailing FF",
"input": "<span class=' a '>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading CR",
"input": "<span class=' a'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with trailing CR",
"input": "<span class='a '>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading and trailing CR",
"input": "<span class=' a '>",
"fail-if": "invalid-attribute-value"},
{"description": "valid double class attribute value separated by space",
"input": "<span class='a b'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid double class attribute value separated by tab",
"input": "<span class='a b'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid double class attribute value separated by LF",
"input": "<span class='a
b'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid double class attribute value separated by LT",
"input": "<span class='a b'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid double class attribute value separated by FF",
"input": "<span class='a b'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid double class attribute value separated by CR",
"input": "<span class='a b'>",
"fail-if": "invalid-attribute-value"},
{"description": "invalid duplicated class attribute value separated by space",
"input": "<span class='a a'>",
"fail-unless": "invalid-attribute-value"},
{"description": "invalid duplicated class attribute value separated by tab",
"input": "<span class='a a'>",
"fail-unless": "invalid-attribute-value"},
{"description": "invalid duplicated class attribute value separated by LF",
"input": "<span class='a
a'>",
"fail-unless": "invalid-attribute-value"},
{"description": "invalid duplicated class attribute value separated by LT",
"input": "<span class='a a'>",
"fail-unless": "invalid-attribute-value"},
{"description": "invalid duplicated class attribute value separated by FF",
"input": "<span class='a a'>",
"fail-unless": "invalid-attribute-value"},
{"description": "invalid duplicated class attribute value separated by CR",
"input": "<span class='a a'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated class attribute value separated by space",
"input": "<span class='a a'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated class attribute value separated by tab",
"input": "<span class='a a'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated class attribute value separated by LF",
"input": "<span class='a
a'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated class attribute value separated by LT",
"input": "<span class='a a'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated class attribute value separated by FF",
"input": "<span class='a a'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated class attribute value separated by CR",
"input": "<span class='a a'>",
"fail-unless": "duplicate-value-in-token-list"}
]}

View file

@ -0,0 +1,59 @@
{"tests": [
{"description": "valid contenteditable attribute value 'true'",
"input": "<span contenteditable=true>",
"fail-if": "invalid-attribute-value"},
{"description": "valid contenteditable attribute value 'TRUE'",
"input": "<span contenteditable=TRUE>",
"fail-if": "invalid-attribute-value"},
{"description": "valid contenteditable attribute value 'TrUe'",
"input": "<span contenteditable=TrUe>",
"fail-if": "invalid-attribute-value"},
{"description": "valid contenteditable attribute value 'false'",
"input": "<span contenteditable=false>",
"fail-if": "invalid-attribute-value"},
{"description": "valid contenteditable attribute value 'FALSE'",
"input": "<span contenteditable=FALSE>",
"fail-if": "invalid-attribute-value"},
{"description": "valid contenteditable attribute value 'FalSe'",
"input": "<span contenteditable=FalSe>",
"fail-if": "invalid-attribute-value"},
{"description": "valid contenteditable attribute value ''",
"input": "<span contenteditable=''>",
"fail-if": "invalid-attribute-value"},
{"description": "valid contenteditable attribute value (not specified)",
"input": "<span contenteditable>",
"fail-if": "invalid-attribute-value"},
{"description": "invalid contenteditable attribute value 'foo'",
"input": "<span contenteditable=foo>",
"fail-unless": "invalid-attribute-value"},
{"description": "invalid contenteditable attribute value '0'",
"input": "<span contenteditable=0>",
"fail-unless": "invalid-attribute-value"},
{"description": "invalid contenteditable attribute value '1'",
"input": "<span contenteditable=1>",
"fail-unless": "invalid-attribute-value"},
{"description": "invalid contenteditable attribute value 'yes'",
"input": "<span contenteditable=yes>",
"fail-unless": "invalid-attribute-value"},
{"description": "invalid contenteditable attribute value 'no'",
"input": "<span contenteditable=no>",
"fail-unless": "invalid-attribute-value"},
{"description": "invalid contenteditable attribute value 'inherit'",
"input": "<span contenteditable=inherit>",
"fail-unless": "invalid-attribute-value"}
]}

View file

@ -0,0 +1,118 @@
{"tests": [
{"description": "contextmenu points to valid ID earlier",
"input": "<menu id=a><span contextmenu=a>",
"fail-if": "id-does-not-exist"},
{"description": "contextmenu points to valid ID later",
"input": "<span contextmenu=a><menu id=a>",
"fail-if": "id-does-not-exist"},
{"description": "contextmenu points to non-existent ID",
"input": "<span contextmenu=a>",
"fail-unless": "id-does-not-exist"},
{"description": "contextmenu points to ID on non-menu element",
"input": "<span id=a><span contextmenu=a>",
"fail-unless": "contextmenu-must-point-to-menu"},
{"description": "uppercase contextmenu points to ID on non-menu element",
"input": "<span id=a><span CONTEXTMENU=a>",
"fail-unless": "contextmenu-must-point-to-menu"},
{"description": "valid ID 'a'",
"input": "<span contextmenu=a>",
"fail-if": "invalid-attribute-value"},
{"description": "valid ID '1'",
"input": "<span contextmenu=1>",
"fail-if": "invalid-attribute-value"},
{"description": "wacky but valid ID",
"input": "<span contextmenu='<html><head><title>a</title></head><body><p>b</p></body></html>'>",
"fail-if": "invalid-attribute-value"},
{"description": "invalid blank ID",
"input": "<span id>",
"fail-unless": "attribute-value-can-not-be-blank"},
{"description": "invalid blank ID with quotes",
"input": "<span contextmenu=''>",
"fail-unless": "attribute-value-can-not-be-blank"},
{"description": "invalid ID because of leading space",
"input": "<span contextmenu=' a'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing space",
"input": "<span contextmenu='a '>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of space in value",
"input": "<span contextmenu='a b'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading tab",
"input": "<span contextmenu=' a'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing tab",
"input": "<span contextmenu='a '>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of tab in value",
"input": "<span contextmenu='a b'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading LF",
"input": "<span contextmenu='
a'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing LF",
"input": "<span contextmenu='a
'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of LF in value",
"input": "<span contextmenu='a
b'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading LT",
"input": "<span contextmenu=' a'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing LT",
"input": "<span contextmenu='a '>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of LT in value",
"input": "<span contextmenu='a b'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading FF",
"input": "<span contextmenu=' a'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing FF",
"input": "<span contextmenu='a '>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of FF in value",
"input": "<span contextmenu='a b'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading CR",
"input": "<span contextmenu=' a'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing CR",
"input": "<span contextmenu='a '>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of CR in value",
"input": "<span contextmenu='a b'>",
"fail-unless": "space-in-id"}
]}

View file

@ -0,0 +1,118 @@
{"tests": [
{"description": "valid ID 'a'",
"input": "<span id=a>",
"fail-if": "invalid-attribute-value"},
{"description": "valid ID '1'",
"input": "<span id=1>",
"fail-if": "invalid-attribute-value"},
{"description": "wacky but valid ID",
"input": "<span id='<html><head><title>a</title></head><body><p>b</p></body></html>'>",
"fail-if": "invalid-attribute-value"},
{"description": "invalid blank ID",
"input": "<span id>",
"fail-unless": "attribute-value-can-not-be-blank"},
{"description": "invalid blank ID with quotes",
"input": "<span id=''>",
"fail-unless": "attribute-value-can-not-be-blank"},
{"description": "invalid ID because of leading space",
"input": "<span id=' a'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing space",
"input": "<span id='a '>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of space in value",
"input": "<span id='a b'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading tab",
"input": "<span id=' a'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing tab",
"input": "<span id='a '>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of tab in value",
"input": "<span id='a b'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading LF",
"input": "<span id='
a'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing LF",
"input": "<span id='a
'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of LF in value",
"input": "<span id='a
b'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading LT",
"input": "<span id=' a'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing LT",
"input": "<span id='a '>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of LT in value",
"input": "<span id='a b'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading FF",
"input": "<span id=' a'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing FF",
"input": "<span id='a '>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of FF in value",
"input": "<span id='a b'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading CR",
"input": "<span id=' a'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing CR",
"input": "<span id='a '>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of CR in value",
"input": "<span id='a b'>",
"fail-unless": "space-in-id"},
{"description": "duplicate ID values",
"input": "<span id=a><span id=a>",
"fail-unless": "duplicate-id"},
{"description": "duplicate ID values with spaces (weird but true)",
"input": "<span id='a '><span id='a '>",
"fail-unless": "duplicate-id"},
{"description": "not duplicate ID values because spaces don't match",
"input": "<span id=a><span id='a '>",
"fail-if": "duplicate-id"},
{"description": "not duplicate ID values because spaces don't match",
"input": "<span id=' a'><span id='a '>",
"fail-if": "duplicate-id"},
{"description": "not duplicate ID values because case doesn't match",
"input": "<span id=a><span id=A>",
"fail-if": "duplicate-id"}
]}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,375 @@
{"tests": [
{"description": "unknown start tag <foo>",
"input": "<foo>",
"fail-unless": "unknown-start-tag"},
{"description": "allowed start tag <code>",
"input": "<code>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <kbd>",
"input": "<kbd>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <aside>",
"input": "<aside>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <datagrid>",
"input": "<datagrid>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <font>",
"input": "<font>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <noscript>",
"input": "<noscript>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <style>",
"input": "<style>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <img>",
"input": "<img>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <title>",
"input": "<title>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <menu>",
"input": "<menu>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <tr>",
"input": "<tr>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <param>",
"input": "<param>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <li>",
"input": "<li>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <source>",
"input": "<source>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <tfoot>",
"input": "<tfoot>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <th>",
"input": "<th>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <td>",
"input": "<td>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <dl>",
"input": "<dl>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <blockquote>",
"input": "<blockquote>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <dd>",
"input": "<dd>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <abbr>",
"input": "<abbr>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <dt>",
"input": "<dt>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <noembed>",
"input": "<noembed>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <p>",
"input": "<p>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <small>",
"input": "<small>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <meter>",
"input": "<meter>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <em>",
"input": "<em>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <meta>",
"input": "<meta>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <video>",
"input": "<video>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <div>",
"input": "<div>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <canvas>",
"input": "<canvas>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <sub>",
"input": "<sub>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <section>",
"input": "<section>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <sup>",
"input": "<sup>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <progress>",
"input": "<progress>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <body>",
"input": "<body>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <base>",
"input": "<base>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <br>",
"input": "<br>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <address>",
"input": "<address>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <article>",
"input": "<article>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <strong>",
"input": "<strong>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <legend>",
"input": "<legend>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <event-source>",
"input": "<event-source>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <ol>",
"input": "<ol>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <script>",
"input": "<script>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <caption>",
"input": "<caption>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <dialog>",
"input": "<dialog>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <col>",
"input": "<col>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <h2>",
"input": "<h2>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <h3>",
"input": "<h3>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <h1>",
"input": "<h1>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <h6>",
"input": "<h6>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <h4>",
"input": "<h4>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <h5>",
"input": "<h5>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <header>",
"input": "<header>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <table>",
"input": "<table>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <span>",
"input": "<span>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <area>",
"input": "<area>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <dfn>",
"input": "<dfn>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <var>",
"input": "<var>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <cite>",
"input": "<cite>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <thead>",
"input": "<thead>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <head>",
"input": "<head>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <hr>",
"input": "<hr>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <link>",
"input": "<link>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <datatemplate>",
"input": "<datatemplate>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <b>",
"input": "<b>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <colgroup>",
"input": "<colgroup>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <ul>",
"input": "<ul>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <del>",
"input": "<del>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <iframe>",
"input": "<iframe>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <pre>",
"input": "<pre>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <figure>",
"input": "<figure>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <ins>",
"input": "<ins>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <tbody>",
"input": "<tbody>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <html>",
"input": "<html>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <nav>",
"input": "<nav>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <details>",
"input": "<details>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <samp>",
"input": "<samp>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <map>",
"input": "<map>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <nest>",
"input": "<nest>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <object>",
"input": "<object>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <a>",
"input": "<a>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <footer>",
"input": "<footer>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <i>",
"input": "<i>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <m>",
"input": "<m>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <rule>",
"input": "<rule>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <q>",
"input": "<q>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <command>",
"input": "<command>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <time>",
"input": "<time>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <audio>",
"input": "<audio>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <bdo>",
"input": "<bdo>",
"fail-if": "unknown-start-tag"}
]}

View file

@ -16,19 +16,8 @@ def html5_test_files(subdirectory)
Dir[File.join(TESTDATA_DIR, subdirectory, '*.*')] Dir[File.join(TESTDATA_DIR, subdirectory, '*.*')]
end end
begin require 'rubygems'
require 'rubygems' require 'json'
require 'json'
rescue LoadError
class JSON
def self.parse json
json.gsub!(/"\s*:/, '"=>')
json.gsub!(/\\u[0-9a-fA-F]{4}/) {|x| [x[2..-1].to_i(16)].pack('U')}
null = nil
eval json
end
end
end
module HTML5 module HTML5
module TestSupport module TestSupport

View file

@ -6,7 +6,7 @@ XMLELEM = /<(\w+\s*)((?:[-:\w]+="[^"]*"\s*)+)(\/?)>/
def assert_xml_equal(input, expected=nil, parser=HTML5::XMLParser) def assert_xml_equal(input, expected=nil, parser=HTML5::XMLParser)
sortattrs = proc {"<#{$1+$2.split.sort.join(' ')+$3}>"} sortattrs = proc {"<#{$1+$2.split.sort.join(' ')+$3}>"}
document = parser.parse(input.chomp).root document = parser.parse(input.chomp, :lowercase_attr_name => false, :lowercase_element_name => false).root
if not expected if not expected
expected = input.chomp.gsub(XMLELEM,&sortattrs) expected = input.chomp.gsub(XMLELEM,&sortattrs)
expected = expected.gsub(/&#(\d+);/) {[$1.to_i].pack('U')} expected = expected.gsub(/&#(\d+);/) {[$1.to_i].pack('U')}
@ -257,6 +257,22 @@ EOX1
<head><title>PROLOG</title></head> <head><title>PROLOG</title></head>
<body> <body>
</body></html> </body></html>
EOX2
end
def test_tagsoup
assert_xhtml_equal <<EOX1, <<EOX2.strip
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>TAGSOUP</title></head>
<body>
<u><blockquote><p></u>
</body></html>
EOX1
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>TAGSOUP</title></head>
<body>
<u/><blockquote><u/><p><u/>
</p></blockquote></body></html>
EOX2 EOX2
end end

View file

@ -26,8 +26,9 @@ class Html5ParserTestCase < Test::Unit::TestCase
test_name = File.basename(test_file).sub('.dat', '') test_name = File.basename(test_file).sub('.dat', '')
TestData.new(test_file, %w(data errors document-fragment document)). TestData.new(test_file, %w(data errors document-fragment document)).
each_with_index do |(input, errors, innerHTML, expected), index| each_with_index do |(input, errors, inner_html, expected), index|
errors = errors.split("\n")
expected = expected.gsub("\n| ","\n")[2..-1] expected = expected.gsub("\n| ","\n")[2..-1]
$tree_types_to_test.each do |tree_name| $tree_types_to_test.each do |tree_name|
@ -35,8 +36,8 @@ class Html5ParserTestCase < Test::Unit::TestCase
parser = HTMLParser.new(:tree => TreeBuilders[tree_name]) parser = HTMLParser.new(:tree => TreeBuilders[tree_name])
if innerHTML if inner_html
parser.parseFragment(input, innerHTML) parser.parse_fragment(input, inner_html)
else else
parser.parse(input) parser.parse(input)
end end
@ -49,16 +50,15 @@ class Html5ParserTestCase < Test::Unit::TestCase
'', 'Recieved:', actual_output '', 'Recieved:', actual_output
].join("\n") ].join("\n")
if $CHECK_PARSER_ERRORS actual_errors = parser.errors.map do |(line, col), message|
actual_errors = parser.errors.map do |(line, col), message| 'Line: %i Col: %i %s' % [line, col, message]
'Line: %i Col: %i %s' % [line, col, message]
end
assert_equal errors.length, parser.errors.length, [
'Input', input + "\n",
'Expected errors:', errors.join("\n"),
'Actual errors:', actual_errors.join("\n")
].join("\n")
end end
assert_equal errors.length, parser.errors.length, [
'', 'Input', input,
'', "Expected errors (#{errors.length}):", errors.join("\n"),
'', "Actual errors (#{actual_errors.length}):",
actual_errors.join("\n")
].join("\n")
end end
end end

View file

@ -12,17 +12,17 @@ class SanitizeTest < Test::Unit::TestCase
include HTML5 include HTML5
def sanitize_xhtml stream def sanitize_xhtml stream
XHTMLParser.parseFragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8'}).to_s XHTMLParser.parse_fragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8', :lowercase_element_name => false, :lowercase_attr_name => false}).to_s
end end
def sanitize_html stream def sanitize_html stream
HTMLParser.parseFragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8'}).to_s HTMLParser.parse_fragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8', :lowercase_element_name => false, :lowercase_attr_name => false}).to_s
end end
def sanitize_rexml stream def sanitize_rexml stream
require 'rexml/document' require 'rexml/document'
doc = REXML::Document.new("<div xmlns='http://www.w3.org/1999/xhtml'>#{stream}</div>") doc = REXML::Document.new("<div xmlns='http://www.w3.org/1999/xhtml'>#{stream}</div>")
tokens = TreeWalkers.getTreeWalker('rexml').new(doc) tokens = TreeWalkers.get_tree_walker('rexml').new(doc)
XHTMLSerializer.serialize(tokens, {:encoding=>'utf-8', XHTMLSerializer.serialize(tokens, {:encoding=>'utf-8',
:quote_char => "'", :quote_char => "'",
:inject_meta_charset => false, :inject_meta_charset => false,
@ -39,8 +39,8 @@ class SanitizeTest < Test::Unit::TestCase
HTMLSanitizer::ALLOWED_ELEMENTS.each do |tag_name| HTMLSanitizer::ALLOWED_ELEMENTS.each do |tag_name|
define_method "test_should_allow_#{tag_name}_tag" do define_method "test_should_allow_#{tag_name}_tag" do
input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>" input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
htmloutput = "<#{tag_name.downcase} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name.downcase}>" htmloutput = "<#{tag_name.downcase} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name.downcase}>"
xhtmloutput = "<#{tag_name} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name}>" xhtmloutput = "<#{tag_name} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name}>"
rexmloutput = xhtmloutput rexmloutput = xhtmloutput

View file

@ -12,17 +12,17 @@ class JsonWalker < HTML5::TreeWalkers::Base
@tree.each do |token| @tree.each do |token|
case token[0] case token[0]
when 'StartTag' when 'StartTag'
yield startTag(token[1], token[2]) yield start_tag(token[1], token[2])
when 'EndTag' when 'EndTag'
yield endTag(token[1]) yield end_tag(token[1])
when 'EmptyTag' when 'EmptyTag'
yield emptyTag(token[1], token[2]) yield empty_tag(token[1], token[2])
when 'Comment' when 'Comment'
yield comment(token[1]) yield comment(token[1])
when 'Characters', 'SpaceCharacters' when 'Characters', 'SpaceCharacters'
text(token[1]) {|textToken| yield textToken} text(token[1]) {|textToken| yield textToken}
when 'Doctype' when 'Doctype'
yield doctype(token[1]) yield doctype(token[1], token[2], token[3])
else else
raise "Unknown token type: " + token[0] raise "Unknown token type: " + token[0]
end end

View file

@ -0,0 +1,27 @@
require File.join(File.dirname(__FILE__), 'preamble')
require "html5/sniffer"
class TestFeedTypeSniffer < Test::Unit::TestCase
include HTML5
include TestSupport
include Sniffer
html5_test_files('sniffer').each do |test_file|
test_name = File.basename(test_file).sub('.test', '')
tests = JSON.parse(File.read(test_file))
tests.each_with_index do |data, index|
define_method('test_%s_%d' % [test_name, index + 1]) do
assert_equal data['type'], html_or_feed(data['input'])
end
end
end
# each_with_index do |t, i|
# define_method "test_#{i}" do
# assert_equal t[0], sniff_feed_type(t[1])
# end
# end
end

View file

@ -6,6 +6,33 @@ require 'tokenizer_test_parser'
class Html5TokenizerTestCase < Test::Unit::TestCase class Html5TokenizerTestCase < Test::Unit::TestCase
def assert_tokens_match(expectedTokens, receivedTokens, ignoreErrorOrder, message)
if !ignoreErrorOrder
return expectedTokens == receivedTokens
else
#Sort the tokens into two groups; non-parse errors and parse errors
expected = [[],[]]
received = [[],[]]
for token in expectedTokens
if token != "ParseError"
expected[0] << token
else
expected[1] << token
end
end
for token in receivedTokens
if token != "ParseError"
received[0] << token
else
received[1] << token
end
end
assert_equal expected, received, message
end
end
def type_of?(token_name, token) def type_of?(token_name, token)
token != 'ParseError' and token_name == token.first token != 'ParseError' and token_name == token.first
end end
@ -38,9 +65,9 @@ class Html5TokenizerTestCase < Test::Unit::TestCase
assert_nothing_raised message do assert_nothing_raised message do
tokenizer = HTML5::HTMLTokenizer.new(data['input']) tokenizer = HTML5::HTMLTokenizer.new(data['input'])
tokenizer.contentModelFlag = content_model_flag.to_sym tokenizer.content_model_flag = content_model_flag.to_sym
tokenizer.currentToken = {:type => :startTag, :name => data['lastStartTag']} if data.has_key?('lastStartTag') tokenizer.current_token = {:type => :startTag, :name => data['lastStartTag']} if data.has_key?('lastStartTag')
tokens = TokenizerTestParser.new(tokenizer).parse tokens = TokenizerTestParser.new(tokenizer).parse
@ -48,7 +75,7 @@ class Html5TokenizerTestCase < Test::Unit::TestCase
expected = concatenate_consecutive_characters(data['output']) expected = concatenate_consecutive_characters(data['output'])
assert_equal expected, actual, message assert_tokens_match expected, actual, data["ignoreErrorOrder"], message
end end
end end
end end

View file

@ -60,7 +60,11 @@ class TestTreeWalkers < Test::Unit::TestCase
when :Comment when :Comment
output << "#{' '*indent}<!-- #{token[:data]} -->" output << "#{' '*indent}<!-- #{token[:data]} -->"
when :Doctype when :Doctype
output << "#{' '*indent}<!DOCTYPE #{token[:name]}>" if token[:name] and token[:name].any?
output << "#{' '*indent}<!DOCTYPE #{token[:name]}>"
else
output << "#{' '*indent}<!DOCTYPE >"
end
when :Characters, :SpaceCharacters when :Characters, :SpaceCharacters
output << "#{' '*indent}\"#{token[:data]}\"" output << "#{' '*indent}\"#{token[:data]}\""
else else
@ -76,7 +80,7 @@ class TestTreeWalkers < Test::Unit::TestCase
next if test_name == 'tests5' # TODO next if test_name == 'tests5' # TODO
TestData.new(test_file, %w(data errors document-fragment document)). TestData.new(test_file, %w(data errors document-fragment document)).
each_with_index do |(input, errors, innerHTML, expected), index| each_with_index do |(input, errors, inner_html, expected), index|
expected = expected.gsub("\n| ","\n")[2..-1] expected = expected.gsub("\n| ","\n")[2..-1]
@ -86,13 +90,13 @@ class TestTreeWalkers < Test::Unit::TestCase
parser = HTML5::HTMLParser.new(:tree => tree_class[:builder]) parser = HTML5::HTMLParser.new(:tree => tree_class[:builder])
if innerHTML if inner_html
parser.parseFragment(input, innerHTML) parser.parse_fragment(input, inner_html)
else else
parser.parse(input) parser.parse(input)
end end
document = parser.tree.getDocument document = parser.tree.get_document
begin begin
output = sortattrs(convertTokens(tree_class[:walker].new(document))) output = sortattrs(convertTokens(tree_class[:walker].new(document)))

View file

@ -54,7 +54,7 @@ class TokenizerTestParser
@outputTokens.push(["Character", token[:data]]) @outputTokens.push(["Character", token[:data]])
end end
def processEOF(token) def process_eof(token)
end end
def processParseError(token) def processParseError(token)