Minor S5 tweaks and Sync with Latest HTML5lib

This commit is contained in:
Jacques Distler 2007-08-30 12:19:10 -05:00
parent dbed460843
commit 81d3cdc8e4
81 changed files with 9887 additions and 1687 deletions

View file

@ -18,7 +18,7 @@ xml.feed('xmlns' => "http://www.w3.org/2005/Atom", "xml:lang" => 'en') do
xml.name(page.author)
end
if @hide_description
xml.summary('Content suppressed.', 'type' => 'text')
xml.summary("Updated by #{page.author} on #{page.updated_at.getgm.strftime("%Y-%m-%d")} at #{page.updated_at.getgm.strftime("%H:%M:%SZ")}.", 'type' => 'text')
else
xml.content('type' => 'xhtml', 'xml:base' => url_for(:only_path => false, :web => @web_name, :action => @link_action, :id => page.name) ) do
xml.div('xmlns' => 'http://www.w3.org/1999/xhtml' ) do

View file

@ -16,4 +16,4 @@ table.plaintable {
text-align:center;
margin-left:30px;
}
.noborder td, .noborder th {border:0}

View file

@ -1,6 +1,6 @@
/* Following are the presentation styles -- edit away! */
body {background: #FFF; color: #000; font-size: 2em;}
body {background: #FFF; color: #000; font-size: 1.6em;}
:link, :visited {text-decoration: none; color: #00C;}
#controls :active {color: #8A8 !important;}
#controls :focus {outline: 1px dotted #272;}

View file

@ -0,0 +1,64 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN" "http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f.dtd" >
<!-- Do not edit this document! The system will likely break if you do. -->
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Notes</title>
<link rel="stylesheet" href="default/notes.css" type="text/css" />
<script type="text/javascript">
// <![CDATA[
document.onkeyup = opener.keys;
document.onkeypress = opener.trap;
document.onclick = opener.clicker;
// ]]>
</script>
</head>
<body onload="opener.s5NotesWindowLoaded=true;" onunload="opener.s5NotesWindowLoaded=false;">
<div class="timers" id="elapsed">
<h1>
<a href="#" onclick="opener.minimizeTimer('elapsed'); return false;">Elapsed Time</a>
</h1>
<ul>
<li>
<h2>Presentation</h2>
<span class="clock" id="elapsed-presentation">00:00:00</span>
</li>
<li>
<h2>Current Slide</h2>
<span class="clock" id="elapsed-slide">00:00:00</span>
</li>
</ul>
<div class="controls">
<a href="#reset-elapsed" onclick="opener.resetElapsedTime(); return false;" title="Reset Elapsed Time">|&larr;</a>
</div>
</div>
<div class="timers" id="remaining">
<h1>
<a href="#" onclick="opener.minimizeTimer('remaining'); return false;">Remaining Time</a>
</h1>
<p>
<a href="#subtract-remaining" class="control" id="minus" onclick="opener.alterRemainingTime('-5'); return false;" title="Subtract 5 Minutes">-</a>
<span class="clock" id="timeLeft">00:00:00</span>
<a href="#add-remaining" class="control" id="plus" onclick="opener.alterRemainingTime('5'); return false;" title="Add 5 Minutes">+</a>
</p>
<div class="controls">
<form action="#" onsubmit="opener.resetRemainingTime(); return false;">
<input type="text" class="text" id="startFrom" value="0" size="4" maxlength="4" />
<a href="#toggle-remaining" onclick="opener.toggleRemainingTime(); return false;" title="Pause/Run Remaining Time">||</a>
<a href="#reset-remaining" onclick="opener.resetRemainingTime(); return false;" title="Reset Remaining Time">|&larr;</a>
</form>
</div>
</div>
<h2 id="slide">...</h2>
<div id="notes"></div>
<h2 id="next">...</h2>
<div id="nextnotes"></div>
</body>
</html>

5
vendor/plugins/HTML5lib/History.txt vendored Normal file
View file

@ -0,0 +1,5 @@
== 0.1.0 / 2007-08-07
* 1 major enhancement
* Birthday!

59
vendor/plugins/HTML5lib/Manifest.txt vendored Normal file
View file

@ -0,0 +1,59 @@
History.txt
Manifest.txt
README
Rakefile.rb
lib/html5.rb
lib/html5/constants.rb
lib/html5/filters/base.rb
lib/html5/filters/inject_meta_charset.rb
lib/html5/filters/optionaltags.rb
lib/html5/filters/sanitizer.rb
lib/html5/filters/whitespace.rb
lib/html5/html5parser.rb
lib/html5/html5parser/after_body_phase.rb
lib/html5/html5parser/after_frameset_phase.rb
lib/html5/html5parser/after_head_phase.rb
lib/html5/html5parser/before_head_phase.rb
lib/html5/html5parser/in_body_phase.rb
lib/html5/html5parser/in_caption_phase.rb
lib/html5/html5parser/in_cell_phase.rb
lib/html5/html5parser/in_column_group_phase.rb
lib/html5/html5parser/in_frameset_phase.rb
lib/html5/html5parser/in_head_phase.rb
lib/html5/html5parser/in_row_phase.rb
lib/html5/html5parser/in_select_phase.rb
lib/html5/html5parser/in_table_body_phase.rb
lib/html5/html5parser/in_table_phase.rb
lib/html5/html5parser/initial_phase.rb
lib/html5/html5parser/phase.rb
lib/html5/html5parser/root_element_phase.rb
lib/html5/html5parser/trailing_end_phase.rb
lib/html5/inputstream.rb
lib/html5/liberalxmlparser.rb
lib/html5/sanitizer.rb
lib/html5/serializer.rb
lib/html5/serializer/htmlserializer.rb
lib/html5/serializer/xhtmlserializer.rb
lib/html5/tokenizer.rb
lib/html5/treebuilders.rb
lib/html5/treebuilders/base.rb
lib/html5/treebuilders/hpricot.rb
lib/html5/treebuilders/rexml.rb
lib/html5/treebuilders/simpletree.rb
lib/html5/treewalkers.rb
lib/html5/treewalkers/base.rb
lib/html5/treewalkers/hpricot.rb
lib/html5/treewalkers/rexml.rb
lib/html5/treewalkers/simpletree.rb
lib/html5/version.rb
parse.rb
tests/preamble.rb
tests/test_encoding.rb
tests/test_lxp.rb
tests/test_parser.rb
tests/test_sanitizer.rb
tests/test_serializer.rb
tests/test_stream.rb
tests/test_tokenizer.rb
tests/test_treewalkers.rb
tests/tokenizer_test_parser.rb

View file

@ -1,9 +1,45 @@
= HTML5lib
html5
by Ryan King, et al
http://code.google.com/p/html5lib
== Basic Usage
== DESCRIPTION:
require 'html5lib'
A ruby implementation of the parsing algorithm in HTML5.
doc = HTML5lib.parse('<html>...</html>')
doc.class # REXML::Document
== FEATURES/PROBLEMS:
== SYNOPSIS:
TODO
== REQUIREMENTS:
* chardet, only tested with 0.9.0
== INSTALL:
* sudo gem install html5
== LICENSE:
Copyright (c) 2006-2007 The Authors
Contributers:
James Graham - jg307@cam.ac.uk
Anne van Kesteren - annevankesteren@gmail.com
Lachlan Hunt - lachlan.hunt@lachy.id.au
Matt McDonald - kanashii@kanashii.ca
Sam Ruby - rubys@intertwingly.net
Ian Hickson (Google) - ian@hixie.ch
Thomas Broyer - t.broyer@ltgt.net
Jacques Distler - distler@golem.ph.utexas.edu
Ryan King - ryan@theryanking.com
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View file

@ -1,7 +1,33 @@
require 'rake'
require 'rake/testtask'
require 'hoe'
require 'lib/html5/version'
Rake::TestTask.new do |task|
task.pattern = 'tests/test_*.rb'
task.verbose = true
Hoe.new("html5", HTML5::VERSION) do |p|
p.name = "html5"
p.description = p.paragraphs_of('README', 2..5).join("\n\n")
p.summary = "HTML5 parser/tokenizer."
p.author = ['Ryan King'] # TODO: add more names
p.email = 'ryan@theryanking.com'
p.url = 'http://code.google.com/p/html5lib'
p.need_zip = true
p.extra_deps << ['chardet', '>= 0.9.0']
p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
end
require 'rcov/rcovtask'
namespace :test do
namespace :coverage do
desc "Delete aggregate coverage data."
task(:clean) { rm_f "coverage.data" }
end
desc 'Aggregate code coverage for unit, functional and integration tests'
Rcov::RcovTask.new(:coverage => "test:coverage:clean") do |t|
t.libs << "tests"
t.test_files = FileList["tests/test_*.rb"]
t.output_dir = "tests/coverage/"
t.verbose = true
end
end

215
vendor/plugins/HTML5lib/bin/html5 vendored Executable file
View file

@ -0,0 +1,215 @@
#!/usr/bin/env ruby
$:.unshift File.dirname(__FILE__), 'lib'
def parse(opts, args)
encoding = nil
f = args[-1]
if f
begin
if f[0..6] == 'http://'
require 'open-uri'
f = URI.parse(f).open
encoding = f.charset
elsif f == '-'
f = $stdin
else
f = open(f)
end
rescue
end
else
$stderr.write("No filename provided. Use -h for help\n")
exit(1)
end
require 'html5/treebuilders'
treebuilder = HTML5::TreeBuilders[opts.treebuilder]
if opts.output == :xml
require 'html5/liberalxmlparser'
p = HTML5::XMLParser.new(:tree=>treebuilder)
else
require 'html5/html5parser'
p = HTML5::HTMLParser.new(:tree=>treebuilder)
end
if opts.parsemethod == :parse
args = [f, encoding]
else
args = [f, 'div', encoding]
end
if opts.profile
require 'profiler'
Profiler__::start_profile
p.send(opts.parsemethod, *args)
Profiler__::stop_profile
Profiler__::print_profile($stderr)
elsif opts.time
require 'time' # TODO: switch to benchmark
t0 = Time.new
document = p.send(opts.parsemethod, *args)
t1 = Time.new
print_output(p, document, opts)
t2 = Time.new
puts "\n\nRun took: %fs (plus %fs to print the output)"%[t1-t0, t2-t1]
else
document = p.send(opts.parsemethod, *args)
print_output(p, document, opts)
end
end
def print_output(parser, document, opts)
puts "Encoding: #{parser.tokenizer.stream.char_encoding}" if opts.encoding
case opts.output
when :xml
print document
when :html
require 'html5/treewalkers'
tokens = HTML5::TreeWalkers[opts.treebuilder].new(document)
require 'html5/serializer'
puts HTML5::HTMLSerializer.serialize(tokens, opts.serializer)
when :hilite
print document.hilite
when :tree
document = [document] unless document.respond_to?(:each)
document.each {|fragment| puts parser.tree.testSerializer(fragment)}
end
if opts.error
errList=[]
for pos, message in parser.errors
errList << ("Line %i Col %i"%pos + " " + message)
end
$stdout.write("\nParse errors:\n" + errList.join("\n")+"\n")
end
end
require 'ostruct'
options = OpenStruct.new
options.profile = false
options.time = false
options.output = :html
options.treebuilder = 'simpletree'
options.error = false
options.encoding = false
options.parsemethod = :parse
options.serializer = {
:encoding => 'utf-8',
:omit_optional_tags => false,
:inject_meta_charset => false
}
require 'optparse'
opts = OptionParser.new do |opts|
opts.separator ""
opts.separator "Parse Options:"
opts.on("-b", "--treebuilder NAME") do |treebuilder|
options.treebuilder = treebuilder
end
opts.on("-f", "--fragment", "Parse as a fragment") do |parse|
options.parsemethod = :parse_fragment
end
opts.separator ""
opts.separator "Filter Options:"
opts.on("--[no-]inject-meta-charset", "inject <meta charset>") do |inject|
options.serializer[:inject_meta_charset] = inject
end
opts.on("--[no-]strip-whitespace", "strip unnecessary whitespace") do |strip|
options.serializer[:strip_whitespace] = strip
end
opts.on("--[no-]sanitize", "escape unsafe tags") do |sanitize|
options.serializer[:sanitize] = sanitize
end
opts.separator ""
opts.separator "Output Options:"
opts.on("--tree", "output as debug tree") do |tree|
options.output = :tree
end
opts.on("-x", "--xml", "output as xml") do |xml|
options.output = :xml
options.treebuilder = "rexml"
end
opts.on("--[no-]html", "Output as html") do |html|
options.output = (html ? :html : nil)
end
opts.on("--hilite", "Output as formatted highlighted code.") do |hilite|
options.output = :hilite
end
opts.on("-e", "--error", "Print a list of parse errors") do |error|
options.error = error
end
opts.separator ""
opts.separator "Serialization Options:"
opts.on("--[no-]omit-optional-tags", "Omit optional tags") do |omit|
options.serializer[:omit_optional_tags] = omit
end
opts.on("--[no-]quote-attr-values", "Quote attribute values") do |quote|
options.serializer[:quote_attr_values] = quote
end
opts.on("--[no-]use-best-quote-char", "Use best quote character") do |best|
options.serializer[:use_best_quote_char] = best
end
opts.on("--quote-char C", "Use specified quote character") do |c|
options.serializer[:quote_char] = c
end
opts.on("--[no-]minimize-boolean-attributes", "Minimize boolean attributes") do |min|
options.serializer[:minimize_boolean_attributes] = min
end
opts.on("--[no-]use-trailing-solidus", "Use trailing solidus") do |slash|
options.serializer[:use_trailing_solidus] = slash
end
opts.on("--[no-]escape-lt-in-attrs", "Escape less than signs in attribute values") do |lt|
options.serializer[:escape_lt_in_attrs] = lt
end
opts.on("--[no-]escape-rcdata", "Escape rcdata element values") do |rcdata|
options.serializer[:escape_rcdata] = rcdata
end
opts.separator ""
opts.separator "Other Options:"
opts.on("-p", "--[no-]profile", "Profile the run") do |profile|
options.profile = profile
end
opts.on("-t", "--[no-]time", "Time the run") do |time|
options.time = time
end
opts.on("-c", "--[no-]encoding", "Print character encoding used") do |encoding|
options.encoding = encoding
end
opts.on_tail("-h", "--help", "Show this message") do
puts opts
exit
end
end
opts.parse!(ARGV)
parse options, ARGV

View file

@ -1,11 +1,13 @@
require 'html5/html5parser'
require 'html5/version'
module HTML5
def self.parse(stream, options={})
HTMLParser.parse(stream, options)
end
def self.parseFragment(stream, options={})
HTMLParser.parse(stream, options)
end
def self.parse(stream, options={})
HTMLParser.parse(stream, options)
end
def self.parse_fragment(stream, options={})
HTMLParser.parse(stream, options)
end
end

View file

@ -161,23 +161,24 @@ module HTML5
]
BOOLEAN_ATTRIBUTES = {
:global => %w[irrelevant],
'style' => %w[scoped],
'img' => %w[ismap],
'audio' => %w[autoplay controls],
'video' => %w[autoplay controls],
'script' => %w[defer async],
'details' => %w[open],
:global => %w[irrelevant],
'style' => %w[scoped],
'img' => %w[ismap],
'audio' => %w[autoplay controls],
'video' => %w[autoplay controls],
'script' => %w[defer async],
'details' => %w[open],
'datagrid' => %w[multiple disabled],
'command' => %w[hidden disabled checked default],
'menu' => %w[autosubmit],
'command' => %w[hidden disabled checked default],
'menu' => %w[autosubmit],
'fieldset' => %w[disabled readonly],
'option' => %w[disabled readonly selected],
'option' => %w[disabled readonly selected],
'optgroup' => %w[disabled readonly],
'button' => %w[disabled autofocus],
'input' => %w[disabled readonly required autofocus checked ismap],
'select' => %w[disabled readonly autofocus multiple],
'output' => %w[disabled readonly]
'button' => %w[disabled autofocus],
'input' => %w[disabled readonly required autofocus checked ismap],
'select' => %w[disabled readonly autofocus multiple],
'output' => %w[disabled readonly]
}
# entitiesWindows1252 has to be _ordered_ and needs to have an index.
@ -227,372 +228,372 @@ module HTML5
# print ' ENTITIES = {\n ' + ',\n '.join(list) + '\n }'
ENTITIES = {
'AElig' => "\xc3\x86",
'AElig;' => "\xc3\x86",
'AMP' => '&',
'AMP;' => '&',
'Aacute' => "\xc3\x81",
'Aacute;' => "\xc3\x81",
'Acirc' => "\xc3\x82",
'Acirc;' => "\xc3\x82",
'Agrave' => "\xc3\x80",
'Agrave;' => "\xc3\x80",
'Alpha;' => "\xce\x91",
'Aring' => "\xc3\x85",
'Aring;' => "\xc3\x85",
'Atilde' => "\xc3\x83",
'Atilde;' => "\xc3\x83",
'Auml' => "\xc3\x84",
'Auml;' => "\xc3\x84",
'Beta;' => "\xce\x92",
'COPY' => "\xc2\xa9",
'COPY;' => "\xc2\xa9",
'Ccedil' => "\xc3\x87",
'Ccedil;' => "\xc3\x87",
'Chi;' => "\xce\xa7",
'Dagger;' => "\xe2\x80\xa1",
'Delta;' => "\xce\x94",
'ETH' => "\xc3\x90",
'ETH;' => "\xc3\x90",
'Eacute' => "\xc3\x89",
'Eacute;' => "\xc3\x89",
'Ecirc' => "\xc3\x8a",
'Ecirc;' => "\xc3\x8a",
'Egrave' => "\xc3\x88",
'Egrave;' => "\xc3\x88",
'Epsilon;' => "\xce\x95",
'Eta;' => "\xce\x97",
'Euml' => "\xc3\x8b",
'Euml;' => "\xc3\x8b",
'GT' => '>',
'GT;' => '>',
'Gamma;' => "\xce\x93",
'Iacute' => "\xc3\x8d",
'Iacute;' => "\xc3\x8d",
'Icirc' => "\xc3\x8e",
'Icirc;' => "\xc3\x8e",
'Igrave' => "\xc3\x8c",
'Igrave;' => "\xc3\x8c",
'Iota;' => "\xce\x99",
'Iuml' => "\xc3\x8f",
'Iuml;' => "\xc3\x8f",
'Kappa;' => "\xce\x9a",
'LT' => '<',
'LT;' => '<',
'Lambda;' => "\xce\x9b",
'Mu;' => "\xce\x9c",
'Ntilde' => "\xc3\x91",
'Ntilde;' => "\xc3\x91",
'Nu;' => "\xce\x9d",
'OElig;' => "\xc5\x92",
'Oacute' => "\xc3\x93",
'Oacute;' => "\xc3\x93",
'Ocirc' => "\xc3\x94",
'Ocirc;' => "\xc3\x94",
'Ograve' => "\xc3\x92",
'Ograve;' => "\xc3\x92",
'Omega;' => "\xce\xa9",
'Omicron;' => "\xce\x9f",
'Oslash' => "\xc3\x98",
'Oslash;' => "\xc3\x98",
'Otilde' => "\xc3\x95",
'Otilde;' => "\xc3\x95",
'Ouml' => "\xc3\x96",
'Ouml;' => "\xc3\x96",
'Phi;' => "\xce\xa6",
'Pi;' => "\xce\xa0",
'Prime;' => "\xe2\x80\xb3",
'Psi;' => "\xce\xa8",
'QUOT' => '"',
'QUOT;' => '"',
'REG' => "\xc2\xae",
'REG;' => "\xc2\xae",
'Rho;' => "\xce\xa1",
'Scaron;' => "\xc5\xa0",
'Sigma;' => "\xce\xa3",
'THORN' => "\xc3\x9e",
'THORN;' => "\xc3\x9e",
'TRADE;' => "\xe2\x84\xa2",
'Tau;' => "\xce\xa4",
'Theta;' => "\xce\x98",
'Uacute' => "\xc3\x9a",
'Uacute;' => "\xc3\x9a",
'Ucirc' => "\xc3\x9b",
'Ucirc;' => "\xc3\x9b",
'Ugrave' => "\xc3\x99",
'Ugrave;' => "\xc3\x99",
'Upsilon;' => "\xce\xa5",
'Uuml' => "\xc3\x9c",
'Uuml;' => "\xc3\x9c",
'Xi;' => "\xce\x9e",
'Yacute' => "\xc3\x9d",
'Yacute;' => "\xc3\x9d",
'Yuml;' => "\xc5\xb8",
'Zeta;' => "\xce\x96",
'aacute' => "\xc3\xa1",
'aacute;' => "\xc3\xa1",
'acirc' => "\xc3\xa2",
'acirc;' => "\xc3\xa2",
'acute' => "\xc2\xb4",
'acute;' => "\xc2\xb4",
'aelig' => "\xc3\xa6",
'aelig;' => "\xc3\xa6",
'agrave' => "\xc3\xa0",
'agrave;' => "\xc3\xa0",
'alefsym;' => "\xe2\x84\xb5",
'alpha;' => "\xce\xb1",
'amp' => '&',
'amp;' => '&',
'and;' => "\xe2\x88\xa7",
'ang;' => "\xe2\x88\xa0",
'apos;' => "'",
'aring' => "\xc3\xa5",
'aring;' => "\xc3\xa5",
'asymp;' => "\xe2\x89\x88",
'atilde' => "\xc3\xa3",
'atilde;' => "\xc3\xa3",
'auml' => "\xc3\xa4",
'auml;' => "\xc3\xa4",
'bdquo;' => "\xe2\x80\x9e",
'beta;' => "\xce\xb2",
'brvbar' => "\xc2\xa6",
'brvbar;' => "\xc2\xa6",
'bull;' => "\xe2\x80\xa2",
'cap;' => "\xe2\x88\xa9",
'ccedil' => "\xc3\xa7",
'ccedil;' => "\xc3\xa7",
'cedil' => "\xc2\xb8",
'cedil;' => "\xc2\xb8",
'cent' => "\xc2\xa2",
'cent;' => "\xc2\xa2",
'chi;' => "\xcf\x87",
'circ;' => "\xcb\x86",
'clubs;' => "\xe2\x99\xa3",
'cong;' => "\xe2\x89\x85",
'copy' => "\xc2\xa9",
'copy;' => "\xc2\xa9",
'crarr;' => "\xe2\x86\xb5",
'cup;' => "\xe2\x88\xaa",
'curren' => "\xc2\xa4",
'curren;' => "\xc2\xa4",
'dArr;' => "\xe2\x87\x93",
'dagger;' => "\xe2\x80\xa0",
'darr;' => "\xe2\x86\x93",
'deg' => "\xc2\xb0",
'deg;' => "\xc2\xb0",
'delta;' => "\xce\xb4",
'diams;' => "\xe2\x99\xa6",
'divide' => "\xc3\xb7",
'divide;' => "\xc3\xb7",
'eacute' => "\xc3\xa9",
'eacute;' => "\xc3\xa9",
'ecirc' => "\xc3\xaa",
'ecirc;' => "\xc3\xaa",
'egrave' => "\xc3\xa8",
'egrave;' => "\xc3\xa8",
'empty;' => "\xe2\x88\x85",
'emsp;' => "\xe2\x80\x83",
'ensp;' => "\xe2\x80\x82",
'epsilon;' => "\xce\xb5",
'equiv;' => "\xe2\x89\xa1",
'eta;' => "\xce\xb7",
'eth' => "\xc3\xb0",
'eth;' => "\xc3\xb0",
'euml' => "\xc3\xab",
'euml;' => "\xc3\xab",
'euro;' => "\xe2\x82\xac",
'exist;' => "\xe2\x88\x83",
'fnof;' => "\xc6\x92",
'forall;' => "\xe2\x88\x80",
'frac12' => "\xc2\xbd",
'frac12;' => "\xc2\xbd",
'frac14' => "\xc2\xbc",
'frac14;' => "\xc2\xbc",
'frac34' => "\xc2\xbe",
'frac34;' => "\xc2\xbe",
'frasl;' => "\xe2\x81\x84",
'gamma;' => "\xce\xb3",
'ge;' => "\xe2\x89\xa5",
'gt' => '>',
'gt;' => '>',
'hArr;' => "\xe2\x87\x94",
'harr;' => "\xe2\x86\x94",
'hearts;' => "\xe2\x99\xa5",
'hellip;' => "\xe2\x80\xa6",
'iacute' => "\xc3\xad",
'iacute;' => "\xc3\xad",
'icirc' => "\xc3\xae",
'icirc;' => "\xc3\xae",
'iexcl' => "\xc2\xa1",
'iexcl;' => "\xc2\xa1",
'igrave' => "\xc3\xac",
'igrave;' => "\xc3\xac",
'image;' => "\xe2\x84\x91",
'infin;' => "\xe2\x88\x9e",
'int;' => "\xe2\x88\xab",
'iota;' => "\xce\xb9",
'iquest' => "\xc2\xbf",
'iquest;' => "\xc2\xbf",
'isin;' => "\xe2\x88\x88",
'iuml' => "\xc3\xaf",
'iuml;' => "\xc3\xaf",
'kappa;' => "\xce\xba",
'lArr;' => "\xe2\x87\x90",
'lambda;' => "\xce\xbb",
'lang;' => "\xe3\x80\x88",
'laquo' => "\xc2\xab",
'laquo;' => "\xc2\xab",
'larr;' => "\xe2\x86\x90",
'lceil;' => "\xe2\x8c\x88",
'ldquo;' => "\xe2\x80\x9c",
'le;' => "\xe2\x89\xa4",
'lfloor;' => "\xe2\x8c\x8a",
'lowast;' => "\xe2\x88\x97",
'loz;' => "\xe2\x97\x8a",
'lrm;' => "\xe2\x80\x8e",
'lsaquo;' => "\xe2\x80\xb9",
'lsquo;' => "\xe2\x80\x98",
'lt' => '<',
'lt;' => '<',
'macr' => "\xc2\xaf",
'macr;' => "\xc2\xaf",
'mdash;' => "\xe2\x80\x94",
'micro' => "\xc2\xb5",
'micro;' => "\xc2\xb5",
'middot' => "\xc2\xb7",
'middot;' => "\xc2\xb7",
'minus;' => "\xe2\x88\x92",
'mu;' => "\xce\xbc",
'nabla;' => "\xe2\x88\x87",
'nbsp' => "\xc2\xa0",
'nbsp;' => "\xc2\xa0",
'ndash;' => "\xe2\x80\x93",
'ne;' => "\xe2\x89\xa0",
'ni;' => "\xe2\x88\x8b",
'not' => "\xc2\xac",
'not;' => "\xc2\xac",
'notin;' => "\xe2\x88\x89",
'nsub;' => "\xe2\x8a\x84",
'ntilde' => "\xc3\xb1",
'ntilde;' => "\xc3\xb1",
'nu;' => "\xce\xbd",
'oacute' => "\xc3\xb3",
'oacute;' => "\xc3\xb3",
'ocirc' => "\xc3\xb4",
'ocirc;' => "\xc3\xb4",
'oelig;' => "\xc5\x93",
'ograve' => "\xc3\xb2",
'ograve;' => "\xc3\xb2",
'oline;' => "\xe2\x80\xbe",
'omega;' => "\xcf\x89",
'omicron;' => "\xce\xbf",
'oplus;' => "\xe2\x8a\x95",
'or;' => "\xe2\x88\xa8",
'ordf' => "\xc2\xaa",
'ordf;' => "\xc2\xaa",
'ordm' => "\xc2\xba",
'ordm;' => "\xc2\xba",
'oslash' => "\xc3\xb8",
'oslash;' => "\xc3\xb8",
'otilde' => "\xc3\xb5",
'otilde;' => "\xc3\xb5",
'otimes;' => "\xe2\x8a\x97",
'ouml' => "\xc3\xb6",
'ouml;' => "\xc3\xb6",
'para' => "\xc2\xb6",
'para;' => "\xc2\xb6",
'part;' => "\xe2\x88\x82",
'permil;' => "\xe2\x80\xb0",
'perp;' => "\xe2\x8a\xa5",
'phi;' => "\xcf\x86",
'pi;' => "\xcf\x80",
'piv;' => "\xcf\x96",
'plusmn' => "\xc2\xb1",
'plusmn;' => "\xc2\xb1",
'pound' => "\xc2\xa3",
'pound;' => "\xc2\xa3",
'prime;' => "\xe2\x80\xb2",
'prod;' => "\xe2\x88\x8f",
'prop;' => "\xe2\x88\x9d",
'psi;' => "\xcf\x88",
'quot' => '"',
'quot;' => '"',
'rArr;' => "\xe2\x87\x92",
'radic;' => "\xe2\x88\x9a",
'rang;' => "\xe3\x80\x89",
'raquo' => "\xc2\xbb",
'raquo;' => "\xc2\xbb",
'rarr;' => "\xe2\x86\x92",
'rceil;' => "\xe2\x8c\x89",
'rdquo;' => "\xe2\x80\x9d",
'real;' => "\xe2\x84\x9c",
'reg' => "\xc2\xae",
'reg;' => "\xc2\xae",
'rfloor;' => "\xe2\x8c\x8b",
'rho;' => "\xcf\x81",
'rlm;' => "\xe2\x80\x8f",
'rsaquo;' => "\xe2\x80\xba",
'rsquo;' => "\xe2\x80\x99",
'sbquo;' => "\xe2\x80\x9a",
'scaron;' => "\xc5\xa1",
'sdot;' => "\xe2\x8b\x85",
'sect' => "\xc2\xa7",
'sect;' => "\xc2\xa7",
'shy' => "\xc2\xad",
'shy;' => "\xc2\xad",
'sigma;' => "\xcf\x83",
'sigmaf;' => "\xcf\x82",
'sim;' => "\xe2\x88\xbc",
'spades;' => "\xe2\x99\xa0",
'sub;' => "\xe2\x8a\x82",
'sube;' => "\xe2\x8a\x86",
'sum;' => "\xe2\x88\x91",
'sup1' => "\xc2\xb9",
'sup1;' => "\xc2\xb9",
'sup2' => "\xc2\xb2",
'sup2;' => "\xc2\xb2",
'sup3' => "\xc2\xb3",
'sup3;' => "\xc2\xb3",
'sup;' => "\xe2\x8a\x83",
'supe;' => "\xe2\x8a\x87",
'szlig' => "\xc3\x9f",
'szlig;' => "\xc3\x9f",
'tau;' => "\xcf\x84",
'there4;' => "\xe2\x88\xb4",
'theta;' => "\xce\xb8",
'AElig' => "\xc3\x86",
'AElig;' => "\xc3\x86",
'AMP' => '&',
'AMP;' => '&',
'Aacute' => "\xc3\x81",
'Aacute;' => "\xc3\x81",
'Acirc' => "\xc3\x82",
'Acirc;' => "\xc3\x82",
'Agrave' => "\xc3\x80",
'Agrave;' => "\xc3\x80",
'Alpha;' => "\xce\x91",
'Aring' => "\xc3\x85",
'Aring;' => "\xc3\x85",
'Atilde' => "\xc3\x83",
'Atilde;' => "\xc3\x83",
'Auml' => "\xc3\x84",
'Auml;' => "\xc3\x84",
'Beta;' => "\xce\x92",
'COPY' => "\xc2\xa9",
'COPY;' => "\xc2\xa9",
'Ccedil' => "\xc3\x87",
'Ccedil;' => "\xc3\x87",
'Chi;' => "\xce\xa7",
'Dagger;' => "\xe2\x80\xa1",
'Delta;' => "\xce\x94",
'ETH' => "\xc3\x90",
'ETH;' => "\xc3\x90",
'Eacute' => "\xc3\x89",
'Eacute;' => "\xc3\x89",
'Ecirc' => "\xc3\x8a",
'Ecirc;' => "\xc3\x8a",
'Egrave' => "\xc3\x88",
'Egrave;' => "\xc3\x88",
'Epsilon;' => "\xce\x95",
'Eta;' => "\xce\x97",
'Euml' => "\xc3\x8b",
'Euml;' => "\xc3\x8b",
'GT' => '>',
'GT;' => '>',
'Gamma;' => "\xce\x93",
'Iacute' => "\xc3\x8d",
'Iacute;' => "\xc3\x8d",
'Icirc' => "\xc3\x8e",
'Icirc;' => "\xc3\x8e",
'Igrave' => "\xc3\x8c",
'Igrave;' => "\xc3\x8c",
'Iota;' => "\xce\x99",
'Iuml' => "\xc3\x8f",
'Iuml;' => "\xc3\x8f",
'Kappa;' => "\xce\x9a",
'LT' => '<',
'LT;' => '<',
'Lambda;' => "\xce\x9b",
'Mu;' => "\xce\x9c",
'Ntilde' => "\xc3\x91",
'Ntilde;' => "\xc3\x91",
'Nu;' => "\xce\x9d",
'OElig;' => "\xc5\x92",
'Oacute' => "\xc3\x93",
'Oacute;' => "\xc3\x93",
'Ocirc' => "\xc3\x94",
'Ocirc;' => "\xc3\x94",
'Ograve' => "\xc3\x92",
'Ograve;' => "\xc3\x92",
'Omega;' => "\xce\xa9",
'Omicron;' => "\xce\x9f",
'Oslash' => "\xc3\x98",
'Oslash;' => "\xc3\x98",
'Otilde' => "\xc3\x95",
'Otilde;' => "\xc3\x95",
'Ouml' => "\xc3\x96",
'Ouml;' => "\xc3\x96",
'Phi;' => "\xce\xa6",
'Pi;' => "\xce\xa0",
'Prime;' => "\xe2\x80\xb3",
'Psi;' => "\xce\xa8",
'QUOT' => '"',
'QUOT;' => '"',
'REG' => "\xc2\xae",
'REG;' => "\xc2\xae",
'Rho;' => "\xce\xa1",
'Scaron;' => "\xc5\xa0",
'Sigma;' => "\xce\xa3",
'THORN' => "\xc3\x9e",
'THORN;' => "\xc3\x9e",
'TRADE;' => "\xe2\x84\xa2",
'Tau;' => "\xce\xa4",
'Theta;' => "\xce\x98",
'Uacute' => "\xc3\x9a",
'Uacute;' => "\xc3\x9a",
'Ucirc' => "\xc3\x9b",
'Ucirc;' => "\xc3\x9b",
'Ugrave' => "\xc3\x99",
'Ugrave;' => "\xc3\x99",
'Upsilon;' => "\xce\xa5",
'Uuml' => "\xc3\x9c",
'Uuml;' => "\xc3\x9c",
'Xi;' => "\xce\x9e",
'Yacute' => "\xc3\x9d",
'Yacute;' => "\xc3\x9d",
'Yuml;' => "\xc5\xb8",
'Zeta;' => "\xce\x96",
'aacute' => "\xc3\xa1",
'aacute;' => "\xc3\xa1",
'acirc' => "\xc3\xa2",
'acirc;' => "\xc3\xa2",
'acute' => "\xc2\xb4",
'acute;' => "\xc2\xb4",
'aelig' => "\xc3\xa6",
'aelig;' => "\xc3\xa6",
'agrave' => "\xc3\xa0",
'agrave;' => "\xc3\xa0",
'alefsym;' => "\xe2\x84\xb5",
'alpha;' => "\xce\xb1",
'amp' => '&',
'amp;' => '&',
'and;' => "\xe2\x88\xa7",
'ang;' => "\xe2\x88\xa0",
'apos;' => "'",
'aring' => "\xc3\xa5",
'aring;' => "\xc3\xa5",
'asymp;' => "\xe2\x89\x88",
'atilde' => "\xc3\xa3",
'atilde;' => "\xc3\xa3",
'auml' => "\xc3\xa4",
'auml;' => "\xc3\xa4",
'bdquo;' => "\xe2\x80\x9e",
'beta;' => "\xce\xb2",
'brvbar' => "\xc2\xa6",
'brvbar;' => "\xc2\xa6",
'bull;' => "\xe2\x80\xa2",
'cap;' => "\xe2\x88\xa9",
'ccedil' => "\xc3\xa7",
'ccedil;' => "\xc3\xa7",
'cedil' => "\xc2\xb8",
'cedil;' => "\xc2\xb8",
'cent' => "\xc2\xa2",
'cent;' => "\xc2\xa2",
'chi;' => "\xcf\x87",
'circ;' => "\xcb\x86",
'clubs;' => "\xe2\x99\xa3",
'cong;' => "\xe2\x89\x85",
'copy' => "\xc2\xa9",
'copy;' => "\xc2\xa9",
'crarr;' => "\xe2\x86\xb5",
'cup;' => "\xe2\x88\xaa",
'curren' => "\xc2\xa4",
'curren;' => "\xc2\xa4",
'dArr;' => "\xe2\x87\x93",
'dagger;' => "\xe2\x80\xa0",
'darr;' => "\xe2\x86\x93",
'deg' => "\xc2\xb0",
'deg;' => "\xc2\xb0",
'delta;' => "\xce\xb4",
'diams;' => "\xe2\x99\xa6",
'divide' => "\xc3\xb7",
'divide;' => "\xc3\xb7",
'eacute' => "\xc3\xa9",
'eacute;' => "\xc3\xa9",
'ecirc' => "\xc3\xaa",
'ecirc;' => "\xc3\xaa",
'egrave' => "\xc3\xa8",
'egrave;' => "\xc3\xa8",
'empty;' => "\xe2\x88\x85",
'emsp;' => "\xe2\x80\x83",
'ensp;' => "\xe2\x80\x82",
'epsilon;' => "\xce\xb5",
'equiv;' => "\xe2\x89\xa1",
'eta;' => "\xce\xb7",
'eth' => "\xc3\xb0",
'eth;' => "\xc3\xb0",
'euml' => "\xc3\xab",
'euml;' => "\xc3\xab",
'euro;' => "\xe2\x82\xac",
'exist;' => "\xe2\x88\x83",
'fnof;' => "\xc6\x92",
'forall;' => "\xe2\x88\x80",
'frac12' => "\xc2\xbd",
'frac12;' => "\xc2\xbd",
'frac14' => "\xc2\xbc",
'frac14;' => "\xc2\xbc",
'frac34' => "\xc2\xbe",
'frac34;' => "\xc2\xbe",
'frasl;' => "\xe2\x81\x84",
'gamma;' => "\xce\xb3",
'ge;' => "\xe2\x89\xa5",
'gt' => '>',
'gt;' => '>',
'hArr;' => "\xe2\x87\x94",
'harr;' => "\xe2\x86\x94",
'hearts;' => "\xe2\x99\xa5",
'hellip;' => "\xe2\x80\xa6",
'iacute' => "\xc3\xad",
'iacute;' => "\xc3\xad",
'icirc' => "\xc3\xae",
'icirc;' => "\xc3\xae",
'iexcl' => "\xc2\xa1",
'iexcl;' => "\xc2\xa1",
'igrave' => "\xc3\xac",
'igrave;' => "\xc3\xac",
'image;' => "\xe2\x84\x91",
'infin;' => "\xe2\x88\x9e",
'int;' => "\xe2\x88\xab",
'iota;' => "\xce\xb9",
'iquest' => "\xc2\xbf",
'iquest;' => "\xc2\xbf",
'isin;' => "\xe2\x88\x88",
'iuml' => "\xc3\xaf",
'iuml;' => "\xc3\xaf",
'kappa;' => "\xce\xba",
'lArr;' => "\xe2\x87\x90",
'lambda;' => "\xce\xbb",
'lang;' => "\xe3\x80\x88",
'laquo' => "\xc2\xab",
'laquo;' => "\xc2\xab",
'larr;' => "\xe2\x86\x90",
'lceil;' => "\xe2\x8c\x88",
'ldquo;' => "\xe2\x80\x9c",
'le;' => "\xe2\x89\xa4",
'lfloor;' => "\xe2\x8c\x8a",
'lowast;' => "\xe2\x88\x97",
'loz;' => "\xe2\x97\x8a",
'lrm;' => "\xe2\x80\x8e",
'lsaquo;' => "\xe2\x80\xb9",
'lsquo;' => "\xe2\x80\x98",
'lt' => '<',
'lt;' => '<',
'macr' => "\xc2\xaf",
'macr;' => "\xc2\xaf",
'mdash;' => "\xe2\x80\x94",
'micro' => "\xc2\xb5",
'micro;' => "\xc2\xb5",
'middot' => "\xc2\xb7",
'middot;' => "\xc2\xb7",
'minus;' => "\xe2\x88\x92",
'mu;' => "\xce\xbc",
'nabla;' => "\xe2\x88\x87",
'nbsp' => "\xc2\xa0",
'nbsp;' => "\xc2\xa0",
'ndash;' => "\xe2\x80\x93",
'ne;' => "\xe2\x89\xa0",
'ni;' => "\xe2\x88\x8b",
'not' => "\xc2\xac",
'not;' => "\xc2\xac",
'notin;' => "\xe2\x88\x89",
'nsub;' => "\xe2\x8a\x84",
'ntilde' => "\xc3\xb1",
'ntilde;' => "\xc3\xb1",
'nu;' => "\xce\xbd",
'oacute' => "\xc3\xb3",
'oacute;' => "\xc3\xb3",
'ocirc' => "\xc3\xb4",
'ocirc;' => "\xc3\xb4",
'oelig;' => "\xc5\x93",
'ograve' => "\xc3\xb2",
'ograve;' => "\xc3\xb2",
'oline;' => "\xe2\x80\xbe",
'omega;' => "\xcf\x89",
'omicron;' => "\xce\xbf",
'oplus;' => "\xe2\x8a\x95",
'or;' => "\xe2\x88\xa8",
'ordf' => "\xc2\xaa",
'ordf;' => "\xc2\xaa",
'ordm' => "\xc2\xba",
'ordm;' => "\xc2\xba",
'oslash' => "\xc3\xb8",
'oslash;' => "\xc3\xb8",
'otilde' => "\xc3\xb5",
'otilde;' => "\xc3\xb5",
'otimes;' => "\xe2\x8a\x97",
'ouml' => "\xc3\xb6",
'ouml;' => "\xc3\xb6",
'para' => "\xc2\xb6",
'para;' => "\xc2\xb6",
'part;' => "\xe2\x88\x82",
'permil;' => "\xe2\x80\xb0",
'perp;' => "\xe2\x8a\xa5",
'phi;' => "\xcf\x86",
'pi;' => "\xcf\x80",
'piv;' => "\xcf\x96",
'plusmn' => "\xc2\xb1",
'plusmn;' => "\xc2\xb1",
'pound' => "\xc2\xa3",
'pound;' => "\xc2\xa3",
'prime;' => "\xe2\x80\xb2",
'prod;' => "\xe2\x88\x8f",
'prop;' => "\xe2\x88\x9d",
'psi;' => "\xcf\x88",
'quot' => '"',
'quot;' => '"',
'rArr;' => "\xe2\x87\x92",
'radic;' => "\xe2\x88\x9a",
'rang;' => "\xe3\x80\x89",
'raquo' => "\xc2\xbb",
'raquo;' => "\xc2\xbb",
'rarr;' => "\xe2\x86\x92",
'rceil;' => "\xe2\x8c\x89",
'rdquo;' => "\xe2\x80\x9d",
'real;' => "\xe2\x84\x9c",
'reg' => "\xc2\xae",
'reg;' => "\xc2\xae",
'rfloor;' => "\xe2\x8c\x8b",
'rho;' => "\xcf\x81",
'rlm;' => "\xe2\x80\x8f",
'rsaquo;' => "\xe2\x80\xba",
'rsquo;' => "\xe2\x80\x99",
'sbquo;' => "\xe2\x80\x9a",
'scaron;' => "\xc5\xa1",
'sdot;' => "\xe2\x8b\x85",
'sect' => "\xc2\xa7",
'sect;' => "\xc2\xa7",
'shy' => "\xc2\xad",
'shy;' => "\xc2\xad",
'sigma;' => "\xcf\x83",
'sigmaf;' => "\xcf\x82",
'sim;' => "\xe2\x88\xbc",
'spades;' => "\xe2\x99\xa0",
'sub;' => "\xe2\x8a\x82",
'sube;' => "\xe2\x8a\x86",
'sum;' => "\xe2\x88\x91",
'sup1' => "\xc2\xb9",
'sup1;' => "\xc2\xb9",
'sup2' => "\xc2\xb2",
'sup2;' => "\xc2\xb2",
'sup3' => "\xc2\xb3",
'sup3;' => "\xc2\xb3",
'sup;' => "\xe2\x8a\x83",
'supe;' => "\xe2\x8a\x87",
'szlig' => "\xc3\x9f",
'szlig;' => "\xc3\x9f",
'tau;' => "\xcf\x84",
'there4;' => "\xe2\x88\xb4",
'theta;' => "\xce\xb8",
'thetasym;' => "\xcf\x91",
'thinsp;' => "\xe2\x80\x89",
'thorn' => "\xc3\xbe",
'thorn;' => "\xc3\xbe",
'tilde;' => "\xcb\x9c",
'times' => "\xc3\x97",
'times;' => "\xc3\x97",
'trade;' => "\xe2\x84\xa2",
'uArr;' => "\xe2\x87\x91",
'uacute' => "\xc3\xba",
'uacute;' => "\xc3\xba",
'uarr;' => "\xe2\x86\x91",
'ucirc' => "\xc3\xbb",
'ucirc;' => "\xc3\xbb",
'ugrave' => "\xc3\xb9",
'ugrave;' => "\xc3\xb9",
'uml' => "\xc2\xa8",
'uml;' => "\xc2\xa8",
'upsih;' => "\xcf\x92",
'upsilon;' => "\xcf\x85",
'uuml' => "\xc3\xbc",
'uuml;' => "\xc3\xbc",
'weierp;' => "\xe2\x84\x98",
'xi;' => "\xce\xbe",
'yacute' => "\xc3\xbd",
'yacute;' => "\xc3\xbd",
'yen' => "\xc2\xa5",
'yen;' => "\xc2\xa5",
'yuml' => "\xc3\xbf",
'yuml;' => "\xc3\xbf",
'zeta;' => "\xce\xb6",
'zwj;' => "\xe2\x80\x8d",
'zwnj;' => "\xe2\x80\x8c"
'thinsp;' => "\xe2\x80\x89",
'thorn' => "\xc3\xbe",
'thorn;' => "\xc3\xbe",
'tilde;' => "\xcb\x9c",
'times' => "\xc3\x97",
'times;' => "\xc3\x97",
'trade;' => "\xe2\x84\xa2",
'uArr;' => "\xe2\x87\x91",
'uacute' => "\xc3\xba",
'uacute;' => "\xc3\xba",
'uarr;' => "\xe2\x86\x91",
'ucirc' => "\xc3\xbb",
'ucirc;' => "\xc3\xbb",
'ugrave' => "\xc3\xb9",
'ugrave;' => "\xc3\xb9",
'uml' => "\xc2\xa8",
'uml;' => "\xc2\xa8",
'upsih;' => "\xcf\x92",
'upsilon;' => "\xcf\x85",
'uuml' => "\xc3\xbc",
'uuml;' => "\xc3\xbc",
'weierp;' => "\xe2\x84\x98",
'xi;' => "\xce\xbe",
'yacute' => "\xc3\xbd",
'yacute;' => "\xc3\xbd",
'yen' => "\xc2\xa5",
'yen;' => "\xc2\xa5",
'yuml' => "\xc3\xbf",
'yuml;' => "\xc3\xbf",
'zeta;' => "\xce\xb6",
'zwj;' => "\xe2\x80\x8d",
'zwnj;' => "\xe2\x80\x8c"
}
ENCODINGS = %w[

View file

@ -21,9 +21,9 @@ module HTML5
when :EmptyTag
if token[:name].downcase == "meta"
# replace charset with actual encoding
token[:data].each_with_index do |(name,value),index|
token[:data].each_with_index do |(name, value), index|
if name == 'charset'
token[:data][index][1]=@encoding
token[:data][index][1] = @encoding
meta_found = true
end
end
@ -31,7 +31,7 @@ module HTML5
# replace charset with actual encoding
has_http_equiv_content_type = false
content_index = -1
token[:data].each_with_index do |(name,value),i|
token[:data].each_with_index do |(name, value), i|
if name.downcase == 'charset'
token[:data][i] = ['charset', @encoding]
meta_found = true
@ -43,30 +43,27 @@ module HTML5
end
end
if not meta_found
if has_http_equiv_content_type and content_index >= 0
token[:data][content_index][1] =
'text/html; charset=%s' % @encoding
if !meta_found
if has_http_equiv_content_type && content_index >= 0
token[:data][content_index][1] = 'text/html; charset=%s' % @encoding
meta_found = true
end
end
elsif token[:name].downcase == "head" and not meta_found
elsif token[:name].downcase == "head" && !meta_found
# insert meta into empty head
yield(:type => :StartTag, :name => "head", :data => token[:data])
yield(:type => :EmptyTag, :name => "meta",
:data => [["charset", @encoding]])
yield(:type => :EndTag, :name => "head")
yield :type => :StartTag, :name => "head", :data => token[:data]
yield :type => :EmptyTag, :name => "meta", :data => [["charset", @encoding]]
yield :type => :EndTag, :name => "head"
meta_found = true
next
end
when :EndTag
if token[:name].downcase == "head" and pending.any?
if token[:name].downcase == "head" && pending.any?
# insert meta into head (if necessary) and flush pending queue
yield pending.shift
yield(:type => :EmptyTag, :name => "meta",
:data => [["charset", @encoding]]) if not meta_found
yield :type => :EmptyTag, :name => "meta", :data => [["charset", @encoding]] if !meta_found
yield pending.shift while pending.any?
meta_found = true
state = :post_head

View file

@ -75,8 +75,7 @@ module HTML5
if type == :StartTag
# omit the thead and tfoot elements' end tag when they are
# immediately followed by a tbody element. See is_optional_end.
if previous and previous[:type] == :EndTag and \
%w(tbody thead tfoot).include?(previous[:name])
if previous and previous[:type] == :EndTag && %w(tbody thead tfoot).include?(previous[:name])
return false
end
@ -85,7 +84,7 @@ module HTML5
return false
end
end
return false
return false
end
def is_optional_end(tagname, nexttok)

View file

@ -21,7 +21,7 @@ module HTML5
preserve -= 1 if preserve > 0
when :SpaceCharacters
next if preserve == 0
token[:data] = " " if preserve == 0 && token[:data]
when :Characters
token[:data] = token[:data].sub(SPACES,' ') if preserve == 0

View file

@ -16,7 +16,7 @@ module HTML5
#
class HTMLParser
attr_accessor :phase, :firstStartTag, :innerHTML, :lastPhase, :insertFromTable
attr_accessor :phase, :first_start_tag, :inner_html, :last_phase, :insert_from_table
attr_reader :phases, :tokenizer, :tree, :errors
@ -25,10 +25,10 @@ module HTML5
new(options).parse(stream,encoding)
end
def self.parseFragment(stream, options = {})
def self.parse_fragment(stream, options = {})
container = options.delete(:container) || 'div'
encoding = options.delete(:encoding)
new(options).parseFragment(stream,container,encoding)
new(options).parse_fragment(stream, container, encoding)
end
@@phases = %w( initial rootElement beforeHead inHead afterHead inBody inTable inCaption
@ -44,56 +44,58 @@ module HTML5
@tokenizer = HTMLTokenizer
@tree = TreeBuilders::REXML::TreeBuilder
options.each { |name, value| instance_variable_set("@#{name}", value) }
options.each {|name, value| instance_variable_set("@#{name}", value) }
@lowercase_attr_name = nil unless instance_variables.include?("@lowercase_attr_name")
@lowercase_element_name = nil unless instance_variables.include?("@lowercase_element_name")
@tree = @tree.new
@phases = @@phases.inject({}) do |phases, phase_name|
phase_class_name = phase_name.sub(/(.)/) { $1.upcase } + 'Phase'
phases[phase_name.to_sym] = HTML5.const_get(phase_class_name).new(self, @tree)
phases
phases
end
end
def _parse(stream, innerHTML, encoding, container = 'div')
def _parse(stream, inner_html, encoding, container = 'div')
@tree.reset
@firstStartTag = false
@first_start_tag = false
@errors = []
@tokenizer = @tokenizer.class unless Class === @tokenizer
@tokenizer = @tokenizer.new(stream, :encoding => encoding,
:parseMeta => !innerHTML)
:parseMeta => !inner_html, :lowercase_attr_name => @lowercase_attr_name, :lowercase_element_name => @lowercase_element_name)
if innerHTML
case @innerHTML = container.downcase
if inner_html
case @inner_html = container.downcase
when 'title', 'textarea'
@tokenizer.contentModelFlag = :RCDATA
@tokenizer.content_model_flag = :RCDATA
when 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript'
@tokenizer.contentModelFlag = :CDATA
@tokenizer.content_model_flag = :CDATA
when 'plaintext'
@tokenizer.contentModelFlag = :PLAINTEXT
@tokenizer.content_model_flag = :PLAINTEXT
else
# contentModelFlag already is PCDATA
#@tokenizer.contentModelFlag = :PCDATA
# content_model_flag already is PCDATA
#@tokenizer.content_model_flag = :PCDATA
end
@phase = @phases[:rootElement]
@phase.insertHtmlElement
resetInsertionMode
@phase.insert_html_element
reset_insertion_mode
else
@innerHTML = false
@inner_html = false
@phase = @phases[:initial]
end
# We only seem to have InBodyPhase testcases where the following is
# relevant ... need others too
@lastPhase = nil
@last_phase = nil
# XXX This is temporary for the moment so there isn't any other
# changes needed for the parser to work with the iterable tokenizer
@tokenizer.each do |token|
token = normalizeToken(token)
token = normalize_token(token)
method = 'process%s' % token[:type]
@ -108,12 +110,12 @@ module HTML5
@phase.send method, token[:name], token[:publicId],
token[:systemId], token[:correct]
else
parseError(token[:data])
parse_error(token[:data])
end
end
# When the loop finishes it's EOF
@phase.processEOF
@phase.process_eof
end
# Parse a HTML document into a well-formed tree
@ -126,12 +128,12 @@ module HTML5
# element)
def parse(stream, encoding=nil)
_parse(stream, false, encoding)
return @tree.getDocument
@tree.get_document
end
# Parse a HTML fragment into a well-formed tree fragment
# container - name of the element we're setting the innerHTML property
# container - name of the element we're setting the inner_html property
# if set to nil, default to 'div'
#
# stream - a filelike object or string containing the HTML to be parsed
@ -140,19 +142,19 @@ module HTML5
# the encoding. If specified, that encoding will be used,
# regardless of any BOM or later declaration (such as in a meta
# element)
def parseFragment(stream, container='div', encoding=nil)
def parse_fragment(stream, container='div', encoding=nil)
_parse(stream, true, encoding, container)
return @tree.getFragment
@tree.get_fragment
end
def parseError(data = 'XXX ERROR MESSAGE NEEDED')
def parse_error(data = 'XXX ERROR MESSAGE NEEDED')
# XXX The idea is to make data mandatory.
@errors.push([@tokenizer.stream.position, data])
raise ParseError if @strict
end
# HTML5 specific normalizations to the token stream
def normalizeToken(token)
def normalize_token(token)
if token[:type] == :EmptyTag
# When a solidus (/) is encountered within a tag name what happens
@ -161,75 +163,75 @@ module HTML5
# thing and if it doesn't it's wrong for everyone.
unless VOID_ELEMENTS.include?(token[:name])
parseError(_('Solidus (/) incorrectly placed in tag.'))
parse_error(_('Solidus (/) incorrectly placed in tag.'))
end
token[:type] = :StartTag
end
if token[:type] == :StartTag
token[:name] = token[:name].tr(ASCII_UPPERCASE,ASCII_LOWERCASE)
token[:name] = token[:name].downcase
# We need to remove the duplicate attributes and convert attributes
# to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
unless token[:data].empty?
data = token[:data].reverse.map { |attr, value| [attr.tr(ASCII_UPPERCASE, ASCII_LOWERCASE), value] }
data = token[:data].reverse.map {|attr, value| [attr.downcase, value] }
token[:data] = Hash[*data.flatten]
end
elsif token[:type] == :EndTag
parseError(_('End tag contains unexpected attributes.')) unless token[:data].empty?
parse_error(_('End tag contains unexpected attributes.')) unless token[:data].empty?
token[:name] = token[:name].downcase
end
return token
token
end
@@new_modes = {
'select' => :inSelect,
'td' => :inCell,
'th' => :inCell,
'tr' => :inRow,
'tbody' => :inTableBody,
'thead' => :inTableBody,
'tfoot' => :inTableBody,
'caption' => :inCaption,
'select' => :inSelect,
'td' => :inCell,
'th' => :inCell,
'tr' => :inRow,
'tbody' => :inTableBody,
'thead' => :inTableBody,
'tfoot' => :inTableBody,
'caption' => :inCaption,
'colgroup' => :inColumnGroup,
'table' => :inTable,
'head' => :inBody,
'body' => :inBody,
'table' => :inTable,
'head' => :inBody,
'body' => :inBody,
'frameset' => :inFrameset
}
def resetInsertionMode
def reset_insertion_mode
# The name of this method is mostly historical. (It's also used in the
# specification.)
last = false
@tree.openElements.reverse.each do |node|
nodeName = node.name
@tree.open_elements.reverse.each do |node|
node_name = node.name
if node == @tree.openElements[0]
if node == @tree.open_elements.first
last = true
unless ['td', 'th'].include?(nodeName)
unless ['td', 'th'].include?(node_name)
# XXX
# assert @innerHTML
nodeName = @innerHTML
# assert @inner_html
node_name = @inner_html
end
end
# Check for conditions that should only happen in the innerHTML
# Check for conditions that should only happen in the inner_html
# case
if ['select', 'colgroup', 'head', 'frameset'].include?(nodeName)
if ['select', 'colgroup', 'head', 'frameset'].include?(node_name)
# XXX
# assert @innerHTML
# assert @inner_html
end
if @@new_modes.has_key?(nodeName)
@phase = @phases[@@new_modes[nodeName]]
elsif nodeName == 'html'
@phase = @phases[@tree.headPointer.nil?? :beforeHead : :afterHead]
if @@new_modes.has_key?(node_name)
@phase = @phases[@@new_modes[node_name]]
elsif node_name == 'html'
@phase = @phases[@tree.head_pointer.nil?? :beforeHead : :afterHead]
elsif last
@phase = @phases[:inBody]
else

View file

@ -8,36 +8,36 @@ module HTML5
def processComment(data)
# This is needed because data is to be appended to the <html> element
# here and not to whatever is currently open.
@tree.insertComment(data, @tree.openElements[0])
@tree.insert_comment(data, @tree.open_elements.first)
end
def processCharacters(data)
@parser.parseError(_('Unexpected non-space characters in the after body phase.'))
parse_error(_('Unexpected non-space characters in the after body phase.'))
@parser.phase = @parser.phases[:inBody]
@parser.phase.processCharacters(data)
end
def processStartTag(name, attributes)
@parser.parseError(_("Unexpected start tag token (#{name}) in the after body phase."))
parse_error(_("Unexpected start tag token (#{name}) in the after body phase."))
@parser.phase = @parser.phases[:inBody]
@parser.phase.processStartTag(name, attributes)
end
def endTagHtml(name)
if @parser.innerHTML
@parser.parseError
if @parser.inner_html
parse_error
else
# XXX: This may need to be done, not sure
# Don't set lastPhase to the current phase but to the inBody phase
# Don't set last_phase to the current phase but to the inBody phase
# instead. No need for extra parse errors if there's something after </html>.
# Try "<!doctype html>X</html>X" for instance.
@parser.lastPhase = @parser.phase
@parser.phase = @parser.phases[:trailingEnd]
@parser.last_phase = @parser.phase
@parser.phase = @parser.phases[:trailingEnd]
end
end
def endTagOther(name)
@parser.parseError(_("Unexpected end tag token (#{name}) in the after body phase."))
parse_error(_("Unexpected end tag token (#{name}) in the after body phase."))
@parser.phase = @parser.phases[:inBody]
@parser.phase.processEndTag(name)
end

View file

@ -10,7 +10,7 @@ module HTML5
handle_end 'html'
def processCharacters(data)
@parser.parseError(_('Unexpected non-space characters in the after frameset phase. Ignored.'))
parse_error(_('Unexpected non-space characters in the after frameset phase. Ignored.'))
end
def startTagNoframes(name, attributes)
@ -18,16 +18,16 @@ module HTML5
end
def startTagOther(name, attributes)
@parser.parseError(_("Unexpected start tag (#{name}) in the after frameset phase. Ignored."))
parse_error(_("Unexpected start tag (#{name}) in the after frameset phase. Ignored."))
end
def endTagHtml(name)
@parser.lastPhase = @parser.phase
@parser.phase = @parser.phases[:trailingEnd]
@parser.last_phase = @parser.phase
@parser.phase = @parser.phases[:trailingEnd]
end
def endTagOther(name)
@parser.parseError(_("Unexpected end tag (#{name}) in the after frameset phase. Ignored."))
parse_error(_("Unexpected end tag (#{name}) in the after frameset phase. Ignored."))
end
end

View file

@ -2,47 +2,47 @@ require 'html5/html5parser/phase'
module HTML5
class AfterHeadPhase < Phase
handle_start 'html', 'body', 'frameset', %w( base link meta script style title ) => 'FromHead'
def processEOF
anythingElse
@parser.phase.processEOF
def process_eof
anything_else
@parser.phase.process_eof
end
def processCharacters(data)
anythingElse
anything_else
@parser.phase.processCharacters(data)
end
def startTagBody(name, attributes)
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inBody]
end
def startTagFrameset(name, attributes)
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inFrameset]
end
def startTagFromHead(name, attributes)
@parser.parseError(_("Unexpected start tag (#{name}) that can be in head. Moved."))
parse_error(_("Unexpected start tag (#{name}) that can be in head. Moved."))
@parser.phase = @parser.phases[:inHead]
@parser.phase.processStartTag(name, attributes)
end
def startTagOther(name, attributes)
anythingElse
anything_else
@parser.phase.processStartTag(name, attributes)
end
def processEndTag(name)
anythingElse
anything_else
@parser.phase.processEndTag(name)
end
def anythingElse
@tree.insertElement('body', {})
def anything_else
@tree.insert_element('body', {})
@parser.phase = @parser.phases[:inBody]
end

View file

@ -7,9 +7,9 @@ module HTML5
handle_end %w( html head body br p ) => 'ImplyHead'
def processEOF
def process_eof
startTagHead('head', {})
@parser.phase.processEOF
@parser.phase.process_eof
end
def processCharacters(data)
@ -18,8 +18,8 @@ module HTML5
end
def startTagHead(name, attributes)
@tree.insertElement(name, attributes)
@tree.headPointer = @tree.openElements[-1]
@tree.insert_element(name, attributes)
@tree.head_pointer = @tree.open_elements[-1]
@parser.phase = @parser.phases[:inHead]
end
@ -34,7 +34,7 @@ module HTML5
end
def endTagOther(name)
@parser.parseError(_("Unexpected end tag (#{name}) after the (implied) root element."))
parse_error(_("Unexpected end tag (#{name}) after the (implied) root element."))
end
end

View file

@ -51,25 +51,40 @@ module HTML5
# for special handling of whitespace in <pre>
@processSpaceCharactersDropNewline = false
if $-w
$-w = false
alias processSpaceCharactersNonPre processSpaceCharacters
$-w = true
else
alias processSpaceCharactersNonPre processSpaceCharacters
end
end
def processSpaceCharactersDropNewline(data)
#Sometimes (start of <pre> blocks) we want to drop leading newlines
@processSpaceCharactersDropNewline = false
if (data.length > 0 and data[0] == ?\n and
%w[pre textarea].include?(@tree.openElements[-1].name) and
not @tree.openElements[-1].hasContent)
# #Sometimes (start of <pre> blocks) we want to drop leading newlines
if $-w
$-w = false
alias processSpaceCharacters processSpaceCharactersNonPre
$-w = true
else
alias processSpaceCharacters processSpaceCharactersNonPre
end
if (data.length > 0 and data[0] == ?\n &&
%w[pre textarea].include?(@tree.open_elements.last.name) && !@tree.open_elements.last.hasContent)
data = data[1..-1]
end
@tree.insertText(data) if data.length > 0
if data.length > 0
@tree.reconstructActiveFormattingElements
@tree.insertText(data)
end
end
def processSpaceCharacters(data)
if @processSpaceCharactersDropNewline
processSpaceCharactersDropNewline(data)
else
super(data)
end
@tree.reconstructActiveFormattingElements()
@tree.insertText(data)
end
def processCharacters(data)
@ -85,20 +100,19 @@ module HTML5
end
def startTagTitle(name, attributes)
@parser.parseError(_("Unexpected start tag (#{name}) that belongs in the head. Moved."))
parse_error(_("Unexpected start tag (#{name}) that belongs in the head. Moved."))
@parser.phases[:inHead].processStartTag(name, attributes)
end
def startTagBody(name, attributes)
@parser.parseError(_('Unexpected start tag (body).'))
parse_error(_('Unexpected start tag (body).'))
if (@tree.openElements.length == 1 or
@tree.openElements[1].name != 'body')
assert @parser.innerHTML
if (@tree.open_elements.length == 1 || @tree.open_elements[1].name != 'body')
assert @parser.inner_html
else
attributes.each do |attr, value|
unless @tree.openElements[1].attributes.has_key?(attr)
@tree.openElements[1].attributes[attr] = value
unless @tree.open_elements[1].attributes.has_key?(attr)
@tree.open_elements[1].attributes[attr] = value
end
end
end
@ -106,17 +120,17 @@ module HTML5
def startTagCloseP(name, attributes)
endTagP('p') if in_scope?('p')
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
@processSpaceCharactersDropNewline = true if name == 'pre'
end
def startTagForm(name, attributes)
if @tree.formPointer
@parser.parseError(_('Unexpected start tag (form). Ignored.'))
parse_error(_('Unexpected start tag (form). Ignored.'))
else
endTagP('p') if in_scope?('p')
@tree.insertElement(name, attributes)
@tree.formPointer = @tree.openElements[-1]
@tree.insert_element(name, attributes)
@tree.formPointer = @tree.open_elements[-1]
end
end
@ -125,31 +139,28 @@ module HTML5
stopNames = {'li' => ['li'], 'dd' => ['dd', 'dt'], 'dt' => ['dd', 'dt']}
stopName = stopNames[name]
@tree.openElements.reverse.each_with_index do |node, i|
@tree.open_elements.reverse.each_with_index do |node, i|
if stopName.include?(node.name)
poppedNodes = (0..i).collect { @tree.openElements.pop }
poppedNodes = (0..i).collect { @tree.open_elements.pop }
if i >= 1
@parser.parseError(_("Missing end tag%s (%s)" % [
(i>1 ? 's' : ''),
poppedNodes.reverse.map {|item| item.name}.join(', ')]))
parse_error(_("Missing end tag%s (%s)" % [(i>1 ? 's' : ''), poppedNodes.reverse.map{|item| item.name}.join(', ')]))
end
break
end
# Phrasing elements are all non special, non scoping, non
# formatting elements
break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) and
not ['address', 'div'].include?(node.name))
break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) && !%w[address div].include?(node.name))
end
# Always insert an <li> element.
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
end
def startTagPlaintext(name, attributes)
endTagP('p') if in_scope?('p')
@tree.insertElement(name, attributes)
@parser.tokenizer.contentModelFlag = :PLAINTEXT
@tree.insert_element(name, attributes)
@parser.tokenizer.content_model_flag = :PLAINTEXT
end
def startTagHeading(name, attributes)
@ -158,7 +169,7 @@ module HTML5
# Uncomment the following for IE7 behavior:
# HEADING_ELEMENTS.each do |element|
# if in_scope?(element)
# @parser.parseError(_("Unexpected start tag (#{name})."))
# parse_error(_("Unexpected start tag (#{name})."))
#
# remove_open_elements_until do |element|
# HEADING_ELEMENTS.include?(element.name)
@ -167,14 +178,14 @@ module HTML5
# break
# end
# end
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
end
def startTagA(name, attributes)
if afeAElement = @tree.elementInActiveFormattingElements('a')
@parser.parseError(_('Unexpected start tag (a) implies end tag (a).'))
parse_error(_('Unexpected start tag (a) implies end tag (a).'))
endTagFormatting('a')
@tree.openElements.delete(afeAElement) if @tree.openElements.include?(afeAElement)
@tree.open_elements.delete(afeAElement) if @tree.open_elements.include?(afeAElement)
@tree.activeFormattingElements.delete(afeAElement) if @tree.activeFormattingElements.include?(afeAElement)
end
@tree.reconstructActiveFormattingElements
@ -188,77 +199,82 @@ module HTML5
def startTagNobr(name, attributes)
@tree.reconstructActiveFormattingElements
processEndTag('nobr') if in_scope?('nobr')
if in_scope?('nobr')
parse_error(_('Unexpected start tag (nobr) implies end tag (nobr).'))
processEndTag('nobr')
# XXX Need tests that trigger the following
@tree.reconstructActiveFormattingElements
end
addFormattingElement(name, attributes)
end
def startTagButton(name, attributes)
if in_scope?('button')
@parser.parseError(_('Unexpected start tag (button) implied end tag (button).'))
parse_error(_('Unexpected start tag (button) implied end tag (button).'))
processEndTag('button')
@parser.phase.processStartTag(name, attributes)
else
@tree.reconstructActiveFormattingElements
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
@tree.activeFormattingElements.push(Marker)
end
end
def startTagMarqueeObject(name, attributes)
@tree.reconstructActiveFormattingElements
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
@tree.activeFormattingElements.push(Marker)
end
def startTagXmp(name, attributes)
@tree.reconstructActiveFormattingElements
@tree.insertElement(name, attributes)
@parser.tokenizer.contentModelFlag = :CDATA
@tree.insert_element(name, attributes)
@parser.tokenizer.content_model_flag = :CDATA
end
def startTagTable(name, attributes)
processEndTag('p') if in_scope?('p')
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inTable]
end
def startTagVoidFormatting(name, attributes)
@tree.reconstructActiveFormattingElements
@tree.insertElement(name, attributes)
@tree.openElements.pop
@tree.insert_element(name, attributes)
@tree.open_elements.pop
end
def startTagHr(name, attributes)
endTagP('p') if in_scope?('p')
@tree.insertElement(name, attributes)
@tree.openElements.pop
@tree.insert_element(name, attributes)
@tree.open_elements.pop
end
def startTagImage(name, attributes)
# No really...
@parser.parseError(_('Unexpected start tag (image). Treated as img.'))
parse_error(_('Unexpected start tag (image). Treated as img.'))
processStartTag('img', attributes)
end
def startTagInput(name, attributes)
@tree.reconstructActiveFormattingElements
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
if @tree.formPointer
# XXX Not exactly sure what to do here
# @tree.openElements[-1].form = @tree.formPointer
# @tree.open_elements[-1].form = @tree.formPointer
end
@tree.openElements.pop
@tree.open_elements.pop
end
def startTagIsindex(name, attributes)
@parser.parseError(_("Unexpected start tag isindex. Don't use it!"))
parse_error(_("Unexpected start tag isindex. Don't use it!"))
return if @tree.formPointer
processStartTag('form', {})
processStartTag('hr', {})
processStartTag('p', {})
processStartTag('label', {})
# XXX Localization ...
processCharacters('This is a searchable index. Insert your search keywords here:')
processCharacters('This is a searchable index. Insert your search keywords here: ')
attributes['name'] = 'isindex'
attrs = attributes.to_a
processStartTag('input', attributes)
@ -270,20 +286,21 @@ module HTML5
def startTagTextarea(name, attributes)
# XXX Form element pointer checking here as well...
@tree.insertElement(name, attributes)
@parser.tokenizer.contentModelFlag = :RCDATA
@tree.insert_element(name, attributes)
@parser.tokenizer.content_model_flag = :RCDATA
@processSpaceCharactersDropNewline = true
alias processSpaceCharacters processSpaceCharactersDropNewline
end
# iframe, noembed noframes, noscript(if scripting enabled)
def startTagCdata(name, attributes)
@tree.insertElement(name, attributes)
@parser.tokenizer.contentModelFlag = :CDATA
@tree.insert_element(name, attributes)
@parser.tokenizer.content_model_flag = :CDATA
end
def startTagSelect(name, attributes)
@tree.reconstructActiveFormattingElements
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inSelect]
end
@ -293,7 +310,7 @@ module HTML5
# "caption", "col", "colgroup", "frame", "frameset", "head",
# "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
# "tr", "noscript"
@parser.parseError(_("Unexpected start tag (#{name}). Ignored."))
parse_error(_("Unexpected start tag (#{name}). Ignored."))
end
def startTagNew(name, attributes)
@ -306,14 +323,14 @@ module HTML5
def startTagOther(name, attributes)
@tree.reconstructActiveFormattingElements
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
end
def endTagP(name)
@tree.generateImpliedEndTags('p') if in_scope?('p')
@parser.parseError(_('Unexpected end tag (p).')) unless @tree.openElements[-1].name == 'p'
parse_error(_('Unexpected end tag (p).')) unless @tree.open_elements.last.name == 'p'
if in_scope?('p')
@tree.openElements.pop while in_scope?('p')
@tree.open_elements.pop while in_scope?('p')
else
startTagCloseP('p', {})
endTagP('p')
@ -324,20 +341,20 @@ module HTML5
# XXX Need to take open <p> tags into account here. We shouldn't imply
# </p> but we should not throw a parse error either. Specification is
# likely to be updated.
unless @tree.openElements[1].name == 'body'
# innerHTML case
@parser.parseError
unless @tree.open_elements[1].name == 'body'
# inner_html case
parse_error
return
end
unless @tree.openElements[-1].name == 'body'
@parser.parseError(_("Unexpected end tag (body). Missing end tag (#{@tree.openElements[-1].name})."))
unless @tree.open_elements.last.name == 'body'
parse_error(_("Unexpected end tag (body). Missing end tag (#{@tree.open_elements[-1].name})."))
end
@parser.phase = @parser.phases[:afterBody]
end
def endTagHtml(name)
endTagBody(name)
@parser.phase.processEndTag(name) unless @parser.innerHTML
@parser.phase.processEndTag(name) unless @parser.inner_html
end
def endTagBlock(name)
@ -346,8 +363,8 @@ module HTML5
@tree.generateImpliedEndTags if in_scope?(name)
unless @tree.openElements[-1].name == name
@parser.parseError(_("End tag (#{name}) seen too early. Expected other end tag."))
unless @tree.open_elements.last.name == name
parse_error(_("End tag (#{name}) seen too early. Expected other end tag."))
end
if in_scope?(name)
@ -359,22 +376,20 @@ module HTML5
if in_scope?(name)
@tree.generateImpliedEndTags
end
if @tree.openElements[-1].name != name
@parser.parseError(_("End tag (form) seen too early. Ignored."))
if @tree.open_elements.last.name != name
parse_error(_("End tag (form) seen too early. Ignored."))
else
@tree.openElements.pop
@tree.open_elements.pop
end
@tree.formPointer = nil
end
def endTagListItem(name)
# AT Could merge this with the Block case
if in_scope?(name)
@tree.generateImpliedEndTags(name)
@tree.generateImpliedEndTags(name) if in_scope?(name)
unless @tree.openElements[-1].name == name
@parser.parseError(_("End tag (#{name}) seen too early. Expected other end tag."))
end
unless @tree.open_elements.last.name == name
parse_error(_("End tag (#{name}) seen too early. " + 'Expected other end tag.'))
end
remove_open_elements_until(name) if in_scope?(name)
@ -388,13 +403,13 @@ module HTML5
end
end
unless @tree.openElements[-1].name == name
@parser.parseError(_("Unexpected end tag (#{name}). Expected other end tag."))
unless @tree.open_elements.last.name == name
parse_error(_("Unexpected end tag (#{name}). Expected other end tag."))
end
HEADING_ELEMENTS.each do |element|
if in_scope?(element)
remove_open_elements_until { |element| HEADING_ELEMENTS.include?(element.name) }
remove_open_elements_until {|element| HEADING_ELEMENTS.include?(element.name)}
break
end
end
@ -403,30 +418,30 @@ module HTML5
# The much-feared adoption agency algorithm
def endTagFormatting(name)
# http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
# XXX Better parseError messages appreciated.
# XXX Better parse_error messages appreciated.
while true
# Step 1 paragraph 1
afeElement = @tree.elementInActiveFormattingElements(name)
if not afeElement or (@tree.openElements.include?(afeElement) and not in_scope?(afeElement.name))
@parser.parseError(_("End tag (#{name}) violates step 1, paragraph 1 of the adoption agency algorithm."))
if !afeElement or (@tree.open_elements.include?(afeElement) && !in_scope?(afeElement.name))
parse_error(_("End tag (#{name}) violates step 1, paragraph 1 of the adoption agency algorithm."))
return
# Step 1 paragraph 2
elsif not @tree.openElements.include?(afeElement)
@parser.parseError(_("End tag (#{name}) violates step 1, paragraph 2 of the adoption agency algorithm."))
elsif not @tree.open_elements.include?(afeElement)
parse_error(_("End tag (#{name}) violates step 1, paragraph 2 of the adoption agency algorithm."))
@tree.activeFormattingElements.delete(afeElement)
return
end
# Step 1 paragraph 3
if afeElement != @tree.openElements[-1]
@parser.parseError(_("End tag (#{name}) violates step 1, paragraph 3 of the adoption agency algorithm."))
if afeElement != @tree.open_elements.last
parse_error(_("End tag (#{name}) violates step 1, paragraph 3 of the adoption agency algorithm."))
end
# Step 2
# Start of the adoption agency algorithm proper
afeIndex = @tree.openElements.index(afeElement)
afeIndex = @tree.open_elements.index(afeElement)
furthestBlock = nil
@tree.openElements[afeIndex..-1].each do |element|
@tree.open_elements[afeIndex..-1].each do |element|
if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(element.name)
furthestBlock = element
break
@ -435,11 +450,11 @@ module HTML5
# Step 3
if furthestBlock.nil?
element = remove_open_elements_until { |element| element == afeElement }
element = remove_open_elements_until {|element| element == afeElement }
@tree.activeFormattingElements.delete(element)
return
end
commonAncestor = @tree.openElements[afeIndex - 1]
commonAncestor = @tree.open_elements[afeIndex - 1]
# Step 5
furthestBlock.parent.removeChild(furthestBlock) if furthestBlock.parent
@ -456,11 +471,11 @@ module HTML5
while true
# AT replace this with a function and recursion?
# Node is element before node in open elements
node = @tree.openElements[@tree.openElements.index(node) - 1]
node = @tree.open_elements[@tree.open_elements.index(node) - 1]
until @tree.activeFormattingElements.include?(node)
tmpNode = node
node = @tree.openElements[@tree.openElements.index(node) - 1]
@tree.openElements.delete(tmpNode)
node = @tree.open_elements[@tree.open_elements.index(node) - 1]
@tree.open_elements.delete(tmpNode)
end
# Step 7.3
break if node == afeElement
@ -477,7 +492,7 @@ module HTML5
clone = node.cloneNode
# Replace node with clone
@tree.activeFormattingElements[@tree.activeFormattingElements.index(node)] = clone
@tree.openElements[@tree.openElements.index(node)] = clone
@tree.open_elements[@tree.open_elements.index(node)] = clone
node = clone
end
# Step 7.6
@ -507,47 +522,47 @@ module HTML5
@tree.activeFormattingElements.insert([bookmark,@tree.activeFormattingElements.length].min, clone)
# Step 13
@tree.openElements.delete(afeElement)
@tree.openElements.insert(@tree.openElements.index(furthestBlock) + 1, clone)
@tree.open_elements.delete(afeElement)
@tree.open_elements.insert(@tree.open_elements.index(furthestBlock) + 1, clone)
end
end
def endTagButtonMarqueeObject(name)
@tree.generateImpliedEndTags if in_scope?(name)
unless @tree.openElements[-1].name == name
@parser.parseError(_("Unexpected end tag (#{name}). Expected other end tag first."))
unless @tree.open_elements.last.name == name
parse_error(_("Unexpected end tag (#{name}). Expected other end tag first."))
end
if in_scope?(name)
remove_open_elements_until(name)
@tree.clearActiveFormattingElements
end
end
def endTagMisplaced(name)
# This handles elements with end tags in other insertion modes.
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
parse_error(_("Unexpected end tag (#{name}). Ignored."))
end
def endTagBr(name)
@parser.parseError(_("Unexpected end tag (br). Treated as br element."))
parse_error(_("Unexpected end tag (br). Treated as br element."))
@tree.reconstructActiveFormattingElements
@tree.insertElement(name, {})
@tree.openElements.pop()
@tree.insert_element(name, {})
@tree.open_elements.pop()
end
def endTagNone(name)
# This handles elements with no end tag.
@parser.parseError(_("This tag (#{name}) has no end tag"))
parse_error(_("This tag (#{name}) has no end tag"))
end
def endTagCdataTextAreaXmp(name)
if @tree.openElements[-1].name == name
@tree.openElements.pop
if @tree.open_elements.last.name == name
@tree.open_elements.pop
else
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
parse_error(_("Unexpected end tag (#{name}). Ignored."))
end
end
@ -561,20 +576,20 @@ module HTML5
def endTagOther(name)
# XXX This logic should be moved into the treebuilder
@tree.openElements.reverse.each do |node|
@tree.open_elements.reverse.each do |node|
if node.name == name
@tree.generateImpliedEndTags
unless @tree.openElements[-1].name == name
@parser.parseError(_("Unexpected end tag (#{name})."))
unless @tree.open_elements.last.name == name
parse_error(_("Unexpected end tag (#{name})."))
end
remove_open_elements_until { |element| element == node }
remove_open_elements_until {|element| element == node }
break
else
if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name)
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
parse_error(_("Unexpected end tag (#{name}). Ignored."))
break
end
end
@ -584,8 +599,8 @@ module HTML5
protected
def addFormattingElement(name, attributes)
@tree.insertElement(name, attributes)
@tree.activeFormattingElements.push(@tree.openElements[-1])
@tree.insert_element(name, attributes)
@tree.activeFormattingElements.push(@tree.open_elements.last)
end
end

View file

@ -10,7 +10,7 @@ module HTML5
handle_end 'caption', 'table', %w( body col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
def ignoreEndTagCaption
not in_scope?('caption', true)
!in_scope?('caption', true)
end
def processCharacters(data)
@ -18,7 +18,7 @@ module HTML5
end
def startTagTableElement(name, attributes)
@parser.parseError
parse_error
#XXX Have to duplicate logic here to find out if the tag is ignored
ignoreEndTag = ignoreEndTagCaption
@parser.phase.processEndTag('caption')
@ -31,15 +31,15 @@ module HTML5
def endTagCaption(name)
if ignoreEndTagCaption
# innerHTML case
assert @parser.innerHTML
@parser.parseError
# inner_html case
assert @parser.inner_html
parse_error
else
# AT this code is quite similar to endTagTable in "InTable"
@tree.generateImpliedEndTags
unless @tree.openElements[-1].name == 'caption'
@parser.parseError(_("Unexpected end tag (caption). Missing end tags."))
unless @tree.open_elements[-1].name == 'caption'
parse_error(_("Unexpected end tag (caption). Missing end tags."))
end
remove_open_elements_until('caption')
@ -50,14 +50,14 @@ module HTML5
end
def endTagTable(name)
@parser.parseError
parse_error
ignoreEndTag = ignoreEndTagCaption
@parser.phase.processEndTag('caption')
@parser.phase.processEndTag(name) unless ignoreEndTag
end
def endTagIgnore(name)
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
parse_error(_("Unexpected end tag (#{name}). Ignored."))
end
def endTagOther(name)

View file

@ -20,8 +20,8 @@ module HTML5
closeCell
@parser.phase.processStartTag(name, attributes)
else
# innerHTML case
@parser.parseError
# inner_html case
parse_error
end
end
@ -32,22 +32,22 @@ module HTML5
def endTagTableCell(name)
if in_scope?(name, true)
@tree.generateImpliedEndTags(name)
if @tree.openElements[-1].name != name
@parser.parseError("Got table cell end tag (#{name}) while required end tags are missing.")
if @tree.open_elements.last.name != name
parse_error("Got table cell end tag (#{name}) while required end tags are missing.")
remove_open_elements_until(name)
else
@tree.openElements.pop
@tree.open_elements.pop
end
@tree.clearActiveFormattingElements
@parser.phase = @parser.phases[:inRow]
else
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
parse_error(_("Unexpected end tag (#{name}). Ignored."))
end
end
def endTagIgnore(name)
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
parse_error(_("Unexpected end tag (#{name}). Ignored."))
end
def endTagImply(name)
@ -55,8 +55,8 @@ module HTML5
closeCell
@parser.phase.processEndTag(name)
else
# sometimes innerHTML case
@parser.parseError
# sometimes inner_html case
parse_error
end
end

View file

@ -10,7 +10,7 @@ module HTML5
handle_end 'colgroup', 'col'
def ignoreEndTagColgroup
@tree.openElements[-1].name == 'html'
@tree.open_elements[-1].name == 'html'
end
def processCharacters(data)
@ -20,8 +20,8 @@ module HTML5
end
def startTagCol(name, attributes)
@tree.insertElement(name, attributes)
@tree.openElements.pop
@tree.insert_element(name, attributes)
@tree.open_elements.pop
end
def startTagOther(name, attributes)
@ -32,17 +32,17 @@ module HTML5
def endTagColgroup(name)
if ignoreEndTagColgroup
# innerHTML case
assert @parser.innerHTML
@parser.parseError
# inner_html case
assert @parser.inner_html
parse_error
else
@tree.openElements.pop
@tree.open_elements.pop
@parser.phase = @parser.phases[:inTable]
end
end
def endTagCol(name)
@parser.parseError(_('Unexpected end tag (col). col has no end tag.'))
parse_error(_('Unexpected end tag (col). col has no end tag.'))
end
def endTagOther(name)

View file

@ -10,16 +10,16 @@ module HTML5
handle_end 'frameset', 'noframes'
def processCharacters(data)
@parser.parseError(_('Unexpected characters in the frameset phase. Characters ignored.'))
parse_error(_('Unexpected characters in the frameset phase. Characters ignored.'))
end
def startTagFrameset(name, attributes)
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
end
def startTagFrame(name, attributes)
@tree.insertElement(name, attributes)
@tree.openElements.pop
@tree.insert_element(name, attributes)
@tree.open_elements.pop
end
def startTagNoframes(name, attributes)
@ -27,19 +27,19 @@ module HTML5
end
def startTagOther(name, attributes)
@parser.parseError(_("Unexpected start tag token (#{name}) in the frameset phase. Ignored"))
parse_error(_("Unexpected start tag token (#{name}) in the frameset phase. Ignored"))
end
def endTagFrameset(name)
if @tree.openElements[-1].name == 'html'
# innerHTML case
@parser.parseError(_("Unexpected end tag token (frameset) in the frameset phase (innerHTML)."))
if @tree.open_elements.last.name == 'html'
# inner_html case
parse_error(_("Unexpected end tag token (frameset) in the frameset phase (inner_html)."))
else
@tree.openElements.pop
@tree.open_elements.pop
end
if (not @parser.innerHTML and
@tree.openElements[-1].name != 'frameset')
# If we're not in innerHTML mode and the the current node is not a
if (not @parser.inner_html and
@tree.open_elements.last.name != 'frameset')
# If we're not in inner_html mode and the the current node is not a
# "frameset" element (anymore) then switch.
@parser.phase = @parser.phases[:afterFrameset]
end
@ -50,7 +50,7 @@ module HTML5
end
def endTagOther(name)
@parser.parseError(_("Unexpected end tag token (#{name}) in the frameset phase. Ignored."))
parse_error(_("Unexpected end tag token (#{name}) in the frameset phase. Ignored."))
end
end

View file

@ -3,108 +3,120 @@ require 'html5/html5parser/phase'
module HTML5
class InHeadPhase < Phase
handle_start 'html', 'head', 'title', 'style', 'script', %w( base link meta )
handle_start 'html', 'head', 'title', 'style', 'script', 'noscript'
handle_start %w( base link meta )
handle_end 'head'
handle_end %w( html body br p ) => 'ImplyAfterHead'
handle_end %w( title style script )
handle_end %w( title style script noscript )
def processEOF
if ['title', 'style', 'script'].include?(name = @tree.openElements[-1].name)
@parser.parseError(_("Unexpected end of file. Expected end tag (#{name})."))
@tree.openElements.pop
def process_eof
if ['title', 'style', 'script'].include?(name = @tree.open_elements.last.name)
parse_error(_("Unexpected end of file. Expected end tag (#{name})."))
@tree.open_elements.pop
end
anythingElse
@parser.phase.processEOF
anything_else
@parser.phase.process_eof
end
def processCharacters(data)
if ['title', 'style', 'script'].include?(@tree.openElements[-1].name)
if %w[title style script noscript].include?(@tree.open_elements.last.name)
@tree.insertText(data)
else
anythingElse
anything_else
@parser.phase.processCharacters(data)
end
end
def startTagHead(name, attributes)
@parser.parseError(_('Unexpected start tag head in existing head. Ignored'))
parse_error(_('Unexpected start tag head in existing head. Ignored'))
end
def startTagTitle(name, attributes)
element = @tree.createElement(name, attributes)
appendToHead(element)
@tree.openElements.push(element)
@parser.tokenizer.contentModelFlag = :RCDATA
@tree.open_elements.push(element)
@parser.tokenizer.content_model_flag = :RCDATA
end
def startTagStyle(name, attributes)
element = @tree.createElement(name, attributes)
if @tree.headPointer != nil and @parser.phase == @parser.phases[:inHead]
if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
appendToHead(element)
else
@tree.openElements[-1].appendChild(element)
@tree.open_elements.last.appendChild(element)
end
@tree.openElements.push(element)
@parser.tokenizer.contentModelFlag = :CDATA
@tree.open_elements.push(element)
@parser.tokenizer.content_model_flag = :CDATA
end
def startTagNoscript(name, attributes)
# XXX Need to decide whether to implement the scripting disabled case.
element = @tree.createElement(name, attributes)
if @tree.head_pointer !=nil and @parser.phase == @parser.phases[:inHead]
appendToHead(element)
else
@tree.open_elements.last.appendChild(element)
end
@tree.open_elements.push(element)
@parser.tokenizer.content_model_flag = :CDATA
end
def startTagScript(name, attributes)
#XXX Inner HTML case may be wrong
element = @tree.createElement(name, attributes)
element._flags.push("parser-inserted")
if (@tree.headPointer != nil and
@parser.phase == @parser.phases[:inHead])
if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
appendToHead(element)
else
@tree.openElements[-1].appendChild(element)
@tree.open_elements.last.appendChild(element)
end
@tree.openElements.push(element)
@parser.tokenizer.contentModelFlag = :CDATA
@tree.open_elements.push(element)
@parser.tokenizer.content_model_flag = :CDATA
end
def startTagBaseLinkMeta(name, attributes)
element = @tree.createElement(name, attributes)
if @tree.headPointer != nil and @parser.phase == @parser.phases[:inHead]
if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
appendToHead(element)
else
@tree.openElements[-1].appendChild(element)
@tree.open_elements.last.appendChild(element)
end
end
def startTagOther(name, attributes)
anythingElse
anything_else
@parser.phase.processStartTag(name, attributes)
end
def endTagHead(name)
if @tree.openElements[-1].name == 'head'
@tree.openElements.pop
if @tree.open_elements.last.name == 'head'
@tree.open_elements.pop
else
@parser.parseError(_("Unexpected end tag (head). Ignored."))
parse_error(_("Unexpected end tag (head). Ignored."))
end
@parser.phase = @parser.phases[:afterHead]
end
def endTagImplyAfterHead(name)
anythingElse
anything_else
@parser.phase.processEndTag(name)
end
def endTagTitleStyleScript(name)
if @tree.openElements[-1].name == name
@tree.openElements.pop
def endTagTitleStyleScriptNoscript(name)
if @tree.open_elements.last.name == name
@tree.open_elements.pop
else
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
parse_error(_("Unexpected end tag (#{name}). Ignored."))
end
end
def endTagOther(name)
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
parse_error(_("Unexpected end tag (#{name}). Ignored."))
end
def anythingElse
if @tree.openElements[-1].name == 'head'
def anything_else
if @tree.open_elements.last.name == 'head'
endTagHead('head')
else
@parser.phase = @parser.phases[:afterHead]
@ -114,11 +126,11 @@ module HTML5
protected
def appendToHead(element)
if @tree.headPointer.nil?
assert @parser.innerHTML
@tree.openElements[-1].appendChild(element)
if @tree.head_pointer.nil?
assert @parser.inner_html
@tree.open_elements.last.appendChild(element)
else
@tree.headPointer.appendChild(element)
@tree.head_pointer.appendChild(element)
end
end

View file

@ -15,7 +15,7 @@ module HTML5
def startTagTableCell(name, attributes)
clearStackToTableRowContext
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inCell]
@tree.activeFormattingElements.push(Marker)
end
@ -23,7 +23,7 @@ module HTML5
def startTagTableOther(name, attributes)
ignoreEndTag = ignoreEndTagTr
endTagTr('tr')
# XXX how are we sure it's always ignored in the innerHTML case?
# XXX how are we sure it's always ignored in the inner_html case?
@parser.phase.processStartTag(name, attributes) unless ignoreEndTag
end
@ -33,12 +33,12 @@ module HTML5
def endTagTr(name)
if ignoreEndTagTr
# innerHTML case
assert @parser.innerHTML
@parser.parseError
# inner_html case
assert @parser.inner_html
parse_error
else
clearStackToTableRowContext
@tree.openElements.pop
@tree.open_elements.pop
@parser.phase = @parser.phases[:inTableBody]
end
end
@ -47,7 +47,7 @@ module HTML5
ignoreEndTag = ignoreEndTagTr
endTagTr('tr')
# Reprocess the current tag if the tr end tag was not ignored
# XXX how are we sure it's always ignored in the innerHTML case?
# XXX how are we sure it's always ignored in the inner_html case?
@parser.phase.processEndTag(name) unless ignoreEndTag
end
@ -56,13 +56,13 @@ module HTML5
endTagTr('tr')
@parser.phase.processEndTag(name)
else
# innerHTML case
@parser.parseError
# inner_html case
parse_error
end
end
def endTagIgnore(name)
@parser.parseError(_("Unexpected end tag (#{name}) in the row phase. Ignored."))
parse_error(_("Unexpected end tag (#{name}) in the row phase. Ignored."))
end
def endTagOther(name)
@ -73,9 +73,9 @@ module HTML5
# XXX unify this with other table helper methods
def clearStackToTableRowContext
until ['tr', 'html'].include?(name = @tree.openElements[-1].name)
@parser.parseError(_("Unexpected implied end tag (#{name}) in the row phase."))
@tree.openElements.pop
until %w[tr html].include?(name = @tree.open_elements.last.name)
parse_error(_("Unexpected implied end tag (#{name}) in the row phase."))
@tree.open_elements.pop
end
end

View file

@ -15,44 +15,44 @@ module HTML5
def startTagOption(name, attributes)
# We need to imply </option> if <option> is the current node.
@tree.openElements.pop if @tree.openElements[-1].name == 'option'
@tree.insertElement(name, attributes)
@tree.open_elements.pop if @tree.open_elements.last.name == 'option'
@tree.insert_element(name, attributes)
end
def startTagOptgroup(name, attributes)
@tree.openElements.pop if @tree.openElements[-1].name == 'option'
@tree.openElements.pop if @tree.openElements[-1].name == 'optgroup'
@tree.insertElement(name, attributes)
@tree.open_elements.pop if @tree.open_elements.last.name == 'option'
@tree.open_elements.pop if @tree.open_elements.last.name == 'optgroup'
@tree.insert_element(name, attributes)
end
def startTagSelect(name, attributes)
@parser.parseError(_('Unexpected start tag (select) in the select phase implies select start tag.'))
parse_error(_('Unexpected start tag (select) in the select phase implies select start tag.'))
endTagSelect('select')
end
def startTagOther(name, attributes)
@parser.parseError(_('Unexpected start tag token (#{name}) in the select phase. Ignored.'))
parse_error(_('Unexpected start tag token (#{name}) in the select phase. Ignored.'))
end
def endTagOption(name)
if @tree.openElements[-1].name == 'option'
@tree.openElements.pop
if @tree.open_elements.last.name == 'option'
@tree.open_elements.pop
else
@parser.parseError(_('Unexpected end tag (option) in the select phase. Ignored.'))
parse_error(_('Unexpected end tag (option) in the select phase. Ignored.'))
end
end
def endTagOptgroup(name)
# </optgroup> implicitly closes <option>
if @tree.openElements[-1].name == 'option' and @tree.openElements[-2].name == 'optgroup'
@tree.openElements.pop
if @tree.open_elements.last.name == 'option' and @tree.open_elements[-2].name == 'optgroup'
@tree.open_elements.pop
end
# It also closes </optgroup>
if @tree.openElements[-1].name == 'optgroup'
@tree.openElements.pop
if @tree.open_elements.last.name == 'optgroup'
@tree.open_elements.pop
# But nothing else
else
@parser.parseError(_('Unexpected end tag (optgroup) in the select phase. Ignored.'))
parse_error(_('Unexpected end tag (optgroup) in the select phase. Ignored.'))
end
end
@ -60,15 +60,15 @@ module HTML5
if in_scope?('select', true)
remove_open_elements_until('select')
@parser.resetInsertionMode
@parser.reset_insertion_mode
else
# innerHTML case
@parser.parseError
# inner_html case
parse_error
end
end
def endTagTableElements(name)
@parser.parseError(_("Unexpected table end tag (#{name}) in the select phase."))
parse_error(_("Unexpected table end tag (#{name}) in the select phase."))
if in_scope?(name, true)
endTagSelect('select')
@ -77,7 +77,7 @@ module HTML5
end
def endTagOther(name)
@parser.parseError(_("Unexpected end tag token (#{name}) in the select phase. Ignored."))
parse_error(_("Unexpected end tag token (#{name}) in the select phase. Ignored."))
end
end

View file

@ -15,12 +15,12 @@ module HTML5
def startTagTr(name, attributes)
clearStackToTableBodyContext
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inRow]
end
def startTagTableCell(name, attributes)
@parser.parseError(_("Unexpected table cell start tag (#{name}) in the table body phase."))
parse_error(_("Unexpected table cell start tag (#{name}) in the table body phase."))
startTagTr('tr', {})
@parser.phase.processStartTag(name, attributes)
end
@ -29,11 +29,11 @@ module HTML5
# XXX AT Any ideas on how to share this with endTagTable?
if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
clearStackToTableBodyContext
endTagTableRowGroup(@tree.openElements[-1].name)
endTagTableRowGroup(@tree.open_elements.last.name)
@parser.phase.processStartTag(name, attributes)
else
# innerHTML case
@parser.parseError
# inner_html case
parse_error
end
end
@ -44,26 +44,26 @@ module HTML5
def endTagTableRowGroup(name)
if in_scope?(name, true)
clearStackToTableBodyContext
@tree.openElements.pop
@tree.open_elements.pop
@parser.phase = @parser.phases[:inTable]
else
@parser.parseError(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
parse_error(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
end
end
def endTagTable(name)
if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
clearStackToTableBodyContext
endTagTableRowGroup(@tree.openElements[-1].name)
endTagTableRowGroup(@tree.open_elements.last.name)
@parser.phase.processEndTag(name)
else
# innerHTML case
@parser.parseError
# inner_html case
parse_error
end
end
def endTagIgnore(name)
@parser.parseError(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
parse_error(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
end
def endTagOther(name)
@ -73,9 +73,9 @@ module HTML5
protected
def clearStackToTableBodyContext
until ['tbody', 'tfoot', 'thead', 'html'].include?(name = @tree.openElements[-1].name)
@parser.parseError(_("Unexpected implied end tag (#{name}) in the table body phase."))
@tree.openElements.pop
until %w[tbody tfoot thead html].include?(name = @tree.open_elements.last.name)
parse_error(_("Unexpected implied end tag (#{name}) in the table body phase."))
@tree.open_elements.pop
end
end

View file

@ -12,24 +12,24 @@ module HTML5
handle_end 'table', %w( body caption col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
def processCharacters(data)
@parser.parseError(_("Unexpected non-space characters in table context caused voodoo mode."))
parse_error(_("Unexpected non-space characters in table context caused voodoo mode."))
# Make all the special element rearranging voodoo kick in
@tree.insertFromTable = true
@tree.insert_from_table = true
# Process the character in the "in body" mode
@parser.phases[:inBody].processCharacters(data)
@tree.insertFromTable = false
@tree.insert_from_table = false
end
def startTagCaption(name, attributes)
clearStackToTableContext
@tree.activeFormattingElements.push(Marker)
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inCaption]
end
def startTagColgroup(name, attributes)
clearStackToTableContext
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inColumnGroup]
end
@ -40,7 +40,7 @@ module HTML5
def startTagRowGroup(name, attributes)
clearStackToTableContext
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inTableBody]
end
@ -50,60 +50,60 @@ module HTML5
end
def startTagTable(name, attributes)
@parser.parseError(_("Unexpected start tag (table) in table phase. Implies end tag (table)."))
parse_error(_("Unexpected start tag (table) in table phase. Implies end tag (table)."))
@parser.phase.processEndTag('table')
@parser.phase.processStartTag(name, attributes) unless @parser.innerHTML
@parser.phase.processStartTag(name, attributes) unless @parser.inner_html
end
def startTagOther(name, attributes)
@parser.parseError(_("Unexpected start tag (#{name}) in table context caused voodoo mode."))
parse_error(_("Unexpected start tag (#{name}) in table context caused voodoo mode."))
# Make all the special element rearranging voodoo kick in
@tree.insertFromTable = true
@tree.insert_from_table = true
# Process the start tag in the "in body" mode
@parser.phases[:inBody].processStartTag(name, attributes)
@tree.insertFromTable = false
@tree.insert_from_table = false
end
def endTagTable(name)
if in_scope?('table', true)
@tree.generateImpliedEndTags
unless @tree.openElements[-1].name == 'table'
@parser.parseError(_("Unexpected end tag (table). Expected end tag (#{@tree.openElements[-1].name})."))
unless @tree.open_elements.last.name == 'table'
parse_error(_("Unexpected end tag (table). Expected end tag (#{@tree.open_elements.last.name})."))
end
remove_open_elements_until('table')
@parser.resetInsertionMode
@parser.reset_insertion_mode
else
# innerHTML case
assert @parser.innerHTML
@parser.parseError
# inner_html case
assert @parser.inner_html
parse_error
end
end
def endTagIgnore(name)
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
parse_error(_("Unexpected end tag (#{name}). Ignored."))
end
def endTagOther(name)
@parser.parseError(_("Unexpected end tag (#{name}) in table context caused voodoo mode."))
parse_error(_("Unexpected end tag (#{name}) in table context caused voodoo mode."))
# Make all the special element rearranging voodoo kick in
@tree.insertFromTable = true
@tree.insert_from_table = true
# Process the end tag in the "in body" mode
@parser.phases[:inBody].processEndTag(name)
@tree.insertFromTable = false
@tree.insert_from_table = false
end
protected
def clearStackToTableContext
# "clear the stack back to a table context"
until ['table', 'html'].include?(name = @tree.openElements[-1].name)
@parser.parseError(_("Unexpected implied end tag (#{name}) in the table phase."))
@tree.openElements.pop
until %w[table html].include?(name = @tree.open_elements.last.name)
parse_error(_("Unexpected implied end tag (#{name}) in the table phase."))
@tree.open_elements.pop
end
# When the current node is <html> it's an innerHTML case
# When the current node is <html> it's an inner_html case
end
end

View file

@ -7,22 +7,22 @@ module HTML5
# covered in the specification. The error handling is typically known as
# "quirks mode". It is expected that a future version of HTML5 will define this.
def processEOF
@parser.parseError(_('Unexpected End of file. Expected DOCTYPE.'))
def process_eof
parse_error(_('Unexpected End of file. Expected DOCTYPE.'))
@parser.phase = @parser.phases[:rootElement]
@parser.phase.processEOF
@parser.phase.process_eof
end
def processComment(data)
@tree.insertComment(data, @tree.document)
@tree.insert_comment(data, @tree.document)
end
def processDoctype(name, publicId, systemId, correct)
if name.downcase != 'html' or publicId or systemId
@parser.parseError(_('Erroneous DOCTYPE.'))
parse_error(_('Erroneous DOCTYPE.'))
end
# XXX need to update DOCTYPE tokens
@tree.insertDoctype(name)
@tree.insertDoctype(name, publicId, systemId)
publicId = publicId.to_s.upcase
@ -110,23 +110,22 @@ module HTML5
end
def processSpaceCharacters(data)
@tree.insertText(data, @tree.document)
end
def processCharacters(data)
@parser.parseError(_('Unexpected non-space characters. Expected DOCTYPE.'))
parse_error(_('Unexpected non-space characters. Expected DOCTYPE.'))
@parser.phase = @parser.phases[:rootElement]
@parser.phase.processCharacters(data)
end
def processStartTag(name, attributes)
@parser.parseError(_("Unexpected start tag (#{name}). Expected DOCTYPE."))
parse_error(_("Unexpected start tag (#{name}). Expected DOCTYPE."))
@parser.phase = @parser.phases[:rootElement]
@parser.phase.processStartTag(name, attributes)
end
def processEndTag(name)
@parser.parseError(_("Unexpected end tag (#{name}). Expected DOCTYPE."))
parse_error(_("Unexpected end tag (#{name}). Expected DOCTYPE."))
@parser.phase = @parser.phases[:rootElement]
@parser.phase.processEndTag(name)
end

View file

@ -15,9 +15,12 @@ module HTML5
#
class Phase
extend Forwardable
def_delegators :@parser, :parse_error
# The following example call:
#
# tag_handlers('startTag', 'html', %( base link meta ), %( li dt dd ) => 'ListItem')
# tag_handlers('startTag', 'html', %w( base link meta ), %w( li dt dd ) => 'ListItem')
#
# ...would return a hash equal to this:
#
@ -34,15 +37,15 @@ module HTML5
if tags.last.is_a?(Hash)
tags.pop.each do |names, handler_method_suffix|
handler_method = prefix + handler_method_suffix
Array(names).each { |name| mapping[name] = handler_method }
Array(names).each {|name| mapping[name] = handler_method }
end
end
tags.each do |names|
names = Array(names)
handler_method = prefix + names.map { |name| name.capitalize }.join
names.each { |name| mapping[name] = handler_method }
handler_method = prefix + names.map {|name| name.capitalize }.join
names.each {|name| mapping[name] = handler_method }
end
return mapping
mapping
end
def self.start_tag_handlers
@ -80,17 +83,17 @@ module HTML5
@parser, @tree = parser, tree
end
def processEOF
def process_eof
@tree.generateImpliedEndTags
if @tree.openElements.length > 2
@parser.parseError(_('Unexpected end of file. Missing closing tags.'))
elsif @tree.openElements.length == 2 and @tree.openElements[1].name != 'body'
if @tree.open_elements.length > 2
parse_error(_('Unexpected end of file. Missing closing tags.'))
elsif @tree.open_elements.length == 2 and @tree.open_elements[1].name != 'body'
# This happens for framesets or something?
@parser.parseError(_("Unexpected end of file. Expected end tag (#{@tree.openElements[1].name}) first."))
elsif @parser.innerHTML and @tree.openElements.length > 1
parse_error(_("Unexpected end of file. Expected end tag (#{@tree.open_elements[1].name}) first."))
elsif @parser.inner_html and @tree.open_elements.length > 1
# XXX This is not what the specification says. Not sure what to do here.
@parser.parseError(_('XXX innerHTML EOF'))
parse_error(_('XXX inner_html EOF'))
end
# Betting ends.
end
@ -98,11 +101,11 @@ module HTML5
def processComment(data)
# For most phases the following is correct. Where it's not it will be
# overridden.
@tree.insertComment(data, @tree.openElements[-1])
@tree.insert_comment(data, @tree.open_elements.last)
end
def processDoctype(name, publicId, systemId, correct)
@parser.parseError(_('Unexpected DOCTYPE. Ignored.'))
parse_error(_('Unexpected DOCTYPE. Ignored.'))
end
def processSpaceCharacters(data)
@ -114,17 +117,17 @@ module HTML5
end
def startTagHtml(name, attributes)
if @parser.firstStartTag == false and name == 'html'
@parser.parseError(_('html needs to be the first start tag.'))
if @parser.first_start_tag == false and name == 'html'
parse_error(_('html needs to be the first start tag.'))
end
# XXX Need a check here to see if the first start tag token emitted is
# this token... If it's not, invoke @parser.parseError.
# this token... If it's not, invoke parse_error.
attributes.each do |attr, value|
unless @tree.openElements[0].attributes.has_key?(attr)
@tree.openElements[0].attributes[attr] = value
unless @tree.open_elements.first.attributes.has_key?(attr)
@tree.open_elements.first.attributes[attr] = value
end
end
@parser.firstStartTag = false
@parser.first_start_tag = false
end
def processEndTag(name)
@ -146,11 +149,10 @@ module HTML5
def remove_open_elements_until(name=nil)
finished = false
until finished
element = @tree.openElements.pop
finished = name.nil?? yield(element) : element.name == name
element = @tree.open_elements.pop
finished = name.nil? ? yield(element) : element.name == name
end
return element
end
end
end

View file

@ -3,38 +3,37 @@ require 'html5/html5parser/phase'
module HTML5
class RootElementPhase < Phase
def processEOF
insertHtmlElement
@parser.phase.processEOF
def process_eof
insert_html_element
@parser.phase.process_eof
end
def processComment(data)
@tree.insertComment(data, @tree.document)
@tree.insert_comment(data, @tree.document)
end
def processSpaceCharacters(data)
@tree.insertText(data, @tree.document)
end
def processCharacters(data)
insertHtmlElement
insert_html_element
@parser.phase.processCharacters(data)
end
def processStartTag(name, attributes)
@parser.firstStartTag = true if name == 'html'
insertHtmlElement
@parser.first_start_tag = true if name == 'html'
insert_html_element
@parser.phase.processStartTag(name, attributes)
end
def processEndTag(name)
insertHtmlElement
insert_html_element
@parser.phase.processEndTag(name)
end
def insertHtmlElement
def insert_html_element
element = @tree.createElement('html', {})
@tree.openElements.push(element)
@tree.open_elements.push(element)
@tree.document.appendChild(element)
@parser.phase = @parser.phases[:beforeHead]
end

View file

@ -3,34 +3,33 @@ require 'html5/html5parser/phase'
module HTML5
class TrailingEndPhase < Phase
def processEOF
def process_eof
end
def processComment(data)
@tree.insertComment(data, @tree.document)
@tree.insert_comment(data, @tree.document)
end
def processSpaceCharacters(data)
@parser.lastPhase.processSpaceCharacters(data)
@parser.last_phase.processSpaceCharacters(data)
end
def processCharacters(data)
@parser.parseError(_('Unexpected non-space characters. Expected end of file.'))
@parser.phase = @parser.lastPhase
parse_error(_('Unexpected non-space characters. Expected end of file.'))
@parser.phase = @parser.last_phase
@parser.phase.processCharacters(data)
end
def processStartTag(name, attributes)
@parser.parseError(_('Unexpected start tag (#{name}). Expected end of file.'))
@parser.phase = @parser.lastPhase
parse_error(_('Unexpected start tag (#{name}). Expected end of file.'))
@parser.phase = @parser.last_phase
@parser.phase.processStartTag(name, attributes)
end
def processEndTag(name)
@parser.parseError(_('Unexpected end tag (#{name}). Expected end of file.'))
@parser.phase = @parser.lastPhase
parse_error(_('Unexpected end tag (#{name}). Expected end of file.'))
@parser.phase = @parser.last_phase
@parser.phase.processEndTag(name)
end
end
end

View file

@ -27,11 +27,11 @@ module HTML5
# parseMeta - Look for a <meta> element containing encoding information
def initialize(source, options = {})
@encoding = nil
@encoding = nil
@parse_meta = true
@chardet = true
@chardet = true
options.each { |name, value| instance_variable_set("@#{name}", value) }
options.each {|name, value| instance_variable_set("@#{name}", value) }
# Raw Stream
@raw_stream = open_stream(source)
@ -297,7 +297,7 @@ module HTML5
end
when 0xC0 .. 0xFF
if @win1252
if instance_variables.include?("@win1252") && @win1252
"\xC3" + (c-64).chr # convert to utf-8
elsif @buffer[@tell-1 .. @tell+3] =~ /^
( [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte

View file

@ -24,7 +24,7 @@ module HTML5
@phases[:initial] = XmlRootPhase.new(self, @tree)
end
def normalizeToken(token)
def normalize_token(token)
case token[:type]
when :StartTag, :EmptyTag
# We need to remove the duplicate attributes and convert attributes
@ -34,23 +34,23 @@ module HTML5
# For EmptyTags, process both a Start and an End tag
if token[:type] == :EmptyTag
save = @tokenizer.contentModelFlag
save = @tokenizer.content_model_flag
@phase.processStartTag(token[:name], token[:data])
@tokenizer.contentModelFlag = save
@tokenizer.content_model_flag = save
token[:data] = {}
token[:type] = :EndTag
end
when :Characters
# un-escape RCDATA_ELEMENTS (e.g. style, script)
if @tokenizer.contentModelFlag == :CDATA
if @tokenizer.content_model_flag == :CDATA
token[:data] = token[:data].
gsub('&lt;','<').gsub('&gt;','>').gsub('&amp;','&')
end
when :EndTag
if token[:data]
parseError(_("End tag contains unexpected attributes."))
parse_error(_("End tag contains unexpected attributes."))
end
when :Comment
@ -74,22 +74,22 @@ module HTML5
@phases[:rootElement] = XhmlRootPhase.new(self, @tree)
end
def normalizeToken(token)
def normalize_token(token)
super(token)
# ensure that non-void XHTML elements have content so that separate
# open and close tags are emitted
if token[:type] == :EndTag
if VOID_ELEMENTS.include? token[:name]
if @tree.openElements[-1].name != token["name"]:
if @tree.open_elements[-1].name != token["name"]:
token[:type] = :EmptyTag
token["data"] ||= {}
end
else
if token[:name] == @tree.openElements[-1].name and \
not @tree.openElements[-1].hasContent
if token[:name] == @tree.open_elements[-1].name and \
not @tree.open_elements[-1].hasContent
@tree.insertText('') unless
@tree.openElements.any? {|e|
@tree.open_elements.any? {|e|
e.attributes.keys.include? 'xmlns' and
e.attributes['xmlns'] != 'http://www.w3.org/1999/xhtml'
}
@ -102,9 +102,9 @@ module HTML5
end
class XhmlRootPhase < RootElementPhase
def insertHtmlElement
def insert_html_element
element = @tree.createElement("html", {'xmlns' => 'http://www.w3.org/1999/xhtml'})
@tree.openElements.push(element)
@tree.open_elements.push(element)
@tree.document.appendChild(element)
@parser.phase = @parser.phases[:beforeHead]
end
@ -115,15 +115,15 @@ module HTML5
@start_tag_handlers = Hash.new(:startTagOther)
@end_tag_handlers = Hash.new(:endTagOther)
def startTagOther(name, attributes)
@tree.openElements.push(@tree.document)
@tree.open_elements.push(@tree.document)
element = @tree.createElement(name, attributes)
@tree.openElements[-1].appendChild(element)
@tree.openElements.push(element)
@tree.open_elements[-1].appendChild(element)
@tree.open_elements.push(element)
@parser.phase = XmlElementPhase.new(@parser,@tree)
end
def endTagOther(name)
super
@tree.openElements.pop
@tree.open_elements.pop
end
end
@ -135,17 +135,17 @@ module HTML5
def startTagOther(name, attributes)
element = @tree.createElement(name, attributes)
@tree.openElements[-1].appendChild(element)
@tree.openElements.push(element)
@tree.open_elements[-1].appendChild(element)
@tree.open_elements.push(element)
end
def endTagOther(name)
for node in @tree.openElements.reverse
for node in @tree.open_elements.reverse
if node.name == name
{} while @tree.openElements.pop != node
{} while @tree.open_elements.pop != node
break
else
@parser.parseError
parse_error
end
end
end

View file

@ -13,11 +13,11 @@ module HTML5
# or, if you already have a parse tree (in this example, a REXML tree),
# at the Serializer stage:
#
# tokens = TreeWalkers.getTreeWalker('rexml').new(tree)
# tokens = TreeWalkers.get_tree_walker('rexml').new(tree)
# HTMLSerializer.serialize(tokens, {:encoding=>'utf-8',
# :sanitize => true})
module HTMLSanitizeModule
module HTMLSanitizeModule
ACCEPTABLE_ELEMENTS = %w[a abbr acronym address area b big blockquote br
button caption center cite code col colgroup dd del dfn dir div dl dt

View file

@ -13,18 +13,18 @@ module HTML5
end
def initialize(options={})
@quote_attr_values = false
@quote_char = '"'
@use_best_quote_char = true
@quote_attr_values = false
@quote_char = '"'
@use_best_quote_char = true
@minimize_boolean_attributes = true
@use_trailing_solidus = false
@use_trailing_solidus = false
@space_before_trailing_solidus = true
@escape_lt_in_attrs = false
@escape_rcdata = false
@escape_lt_in_attrs = false
@escape_rcdata = false
@omit_optional_tags = true
@sanitize = false
@sanitize = false
@strip_whitespace = false
@ -73,7 +73,7 @@ module HTML5
elsif [:Characters, :SpaceCharacters].include? type
if type == :SpaceCharacters or in_cdata
if in_cdata and token[:data].include?("</")
serializeError(_("Unexpected </ in CDATA"))
serialize_error(_("Unexpected </ in CDATA"))
end
result << token[:data]
else
@ -85,7 +85,7 @@ module HTML5
if RCDATA_ELEMENTS.include?(name) and not @escape_rcdata
in_cdata = true
elsif in_cdata
serializeError(_("Unexpected child element of a CDATA element"))
serialize_error(_("Unexpected child element of a CDATA element"))
end
attributes = []
for k,v in attrs = token[:data].to_a.sort
@ -137,19 +137,19 @@ module HTML5
if RCDATA_ELEMENTS.include?(name)
in_cdata = false
elsif in_cdata
serializeError(_("Unexpected child element of a CDATA element"))
serialize_error(_("Unexpected child element of a CDATA element"))
end
end_tag = "</#{name}>"
result << end_tag
elsif type == :Comment
data = token[:data]
serializeError(_("Comment contains --")) if data.index("--")
serialize_error(_("Comment contains --")) if data.index("--")
comment = "<!--%s-->" % token[:data]
result << comment
else
serializeError(token[:data])
serialize_error(token[:data])
end
end
@ -163,13 +163,15 @@ module HTML5
alias :render :serialize
def serializeError(data="XXX ERROR MESSAGE NEEDED")
def serialize_error(data="XXX ERROR MESSAGE NEEDED")
# XXX The idea is to make data mandatory.
@errors.push(data)
if @strict
raise SerializeError
end
end
def _(string); string; end
end
# Error in serialized tree

View file

@ -4,12 +4,12 @@ module HTML5
class XHTMLSerializer < HTMLSerializer
DEFAULTS = {
:quote_attr_values => true,
:quote_attr_values => true,
:minimize_boolean_attributes => false,
:use_trailing_solidus => true,
:escape_lt_in_attrs => true,
:omit_optional_tags => false,
:escape_rcdata => true
:use_trailing_solidus => true,
:escape_lt_in_attrs => true,
:omit_optional_tags => false,
:escape_rcdata => true
}
def initialize(options={})

View file

@ -0,0 +1,45 @@
module HTML5
module Sniffer
# 4.7.4
def html_or_feed str
s = str[0, 512] # steps 1, 2
pos = 0
while pos < s.length
case s[pos]
when 0x09, 0x20, 0x0A, 0x0D # tab, space, LF, CR
pos += 1
when 0x3C # "<"
pos += 1
if s[pos..pos+2] == "!--" # [0x21, 0x2D, 0x2D]
pos += 3
until s[pos..pos+2] == "-->" or pos >= s.length
pos += 1
end
pos += 3
elsif s[pos] == 0x21 # "!"
pos += 1
until s[pos] == 0x3E or pos >= s.length # ">"
pos += 1
end
pos += 1
elsif s[pos] == 0x3F # "?"
until s[pos..pos+1] == "?>" or pos >= s.length # [0x3F, 0x3E]
pos += 1
end
pos += 2
elsif s[pos..pos+2] == "rss" # [0x72, 0x73, 0x73]
return "application/rss+xml"
elsif s[pos..pos+3] == "feed" # [0x66, 0x65, 0x65, 0x64]
return "application/atom+xml"
elsif s[pos..pos+6] == "rdf:RDF" # [0x72, 0x64, 0x66, 0x3A, 0x52, 0x44, 0x46]
raise NotImplementedError
end
else
break
end
end
"text/html"
end
end
end

File diff suppressed because it is too large Load diff

View file

@ -18,7 +18,7 @@ module HTML5
end
end
alias :getTreeBuilder :[]
alias :get_tree_builder :[]
end
end
end

View file

@ -24,9 +24,9 @@ module HTML5
attr_accessor :_flags
def initialize(name)
@parent = nil
@parent = nil
@childNodes = []
@_flags = []
@_flags = []
end
# Insert node as a child of the current node
@ -76,13 +76,13 @@ module HTML5
# Base treebuilder implementation
class TreeBuilder
attr_accessor :openElements
attr_accessor :open_elements
attr_accessor :activeFormattingElements
attr_accessor :document
attr_accessor :headPointer
attr_accessor :head_pointer
attr_accessor :formPointer
@ -106,25 +106,25 @@ module HTML5
end
def reset
@openElements = []
@open_elements = []
@activeFormattingElements = []
#XXX - rename these to headElement, formElement
@headPointer = nil
@head_pointer = nil
@formPointer = nil
self.insertFromTable = false
self.insert_from_table = false
@document = @documentClass.new
end
def elementInScope(target, tableVariant=false)
# Exit early when possible.
return true if @openElements[-1].name == target
return true if @open_elements[-1].name == target
# AT How about while true and simply set node to [-1] and set it to
# [-2] at the end...
@openElements.reverse.each do |element|
@open_elements.reverse.each do |element|
if element.name == target
return true
elsif element.name == 'table'
@ -149,10 +149,10 @@ module HTML5
# Step 2 and step 3: we start with the last element. So i is -1.
i = -1
entry = @activeFormattingElements[i]
return if entry == Marker or @openElements.include?(entry)
return if entry == Marker or @open_elements.include?(entry)
# Step 6
until entry == Marker or @openElements.include?(entry)
until entry == Marker or @open_elements.include?(entry)
# Step 5: let entry be one earlier in the list.
i -= 1
begin
@ -171,7 +171,7 @@ module HTML5
clone = @activeFormattingElements[i].cloneNode
# Step 9
element = insertElement(clone.name, clone.attributes)
element = insert_element(clone.name, clone.attributes)
# Step 10
@activeFormattingElements[i] = element
@ -198,12 +198,15 @@ module HTML5
return false
end
def insertDoctype(name)
@document.appendChild(@doctypeClass.new(name))
def insertDoctype(name, public_id, system_id)
doctype = @doctypeClass.new(name)
doctype.public_id = public_id
doctype.system_id = system_id
@document.appendChild(doctype)
end
def insertComment(data, parent=nil)
parent = @openElements[-1] if parent.nil?
def insert_comment(data, parent=nil)
parent = @open_elements[-1] if parent.nil?
parent.appendChild(@commentClass.new(data))
end
@ -216,28 +219,28 @@ module HTML5
# Switch the function used to insert an element from the
# normal one to the misnested table one and back again
def insertFromTable=(value)
@insertFromTable = value
@insertElement = value ? :insertElementTable : :insertElementNormal
def insert_from_table=(value)
@insert_from_table = value
@insert_element = value ? :insert_elementTable : :insert_elementNormal
end
def insertElement(name, attributes)
send(@insertElement, name, attributes)
def insert_element(name, attributes)
send(@insert_element, name, attributes)
end
def insertElementNormal(name, attributes)
def insert_elementNormal(name, attributes)
element = @elementClass.new(name)
element.attributes = attributes
@openElements[-1].appendChild(element)
@openElements.push(element)
@open_elements.last.appendChild(element)
@open_elements.push(element)
return element
end
# Create an element and insert it into the tree
def insertElementTable(name, attributes)
def insert_elementTable(name, attributes)
element = @elementClass.new(name)
element.attributes = attributes
if TABLE_INSERT_MODE_ELEMENTS.include?(@openElements[-1].name)
if TABLE_INSERT_MODE_ELEMENTS.include?(@open_elements.last.name)
#We should be in the InTable mode. This means we want to do
#special magic element rearranging
parent, insertBefore = getTableMisnestedNodePosition
@ -246,17 +249,17 @@ module HTML5
else
parent.insertBefore(element, insertBefore)
end
@openElements.push(element)
@open_elements.push(element)
else
return insertElementNormal(name, attributes)
return insert_elementNormal(name, attributes)
end
return element
end
def insertText(data, parent=nil)
parent = @openElements[-1] if parent.nil?
parent = @open_elements[-1] if parent.nil?
if (not(@insertFromTable) or (@insertFromTable and not TABLE_INSERT_MODE_ELEMENTS.include?(@openElements[-1].name)))
if (not(@insert_from_table) or (@insert_from_table and not TABLE_INSERT_MODE_ELEMENTS.include?(@open_elements[-1].name)))
parent.insertText(data)
else
#We should be in the InTable mode. This means we want to do
@ -265,7 +268,7 @@ module HTML5
parent.insertText(data, insertBefore)
end
end
# Get the foster parent element, and sibling to insert before
# (or nil) when inserting a misnested table node
def getTableMisnestedNodePosition
@ -275,7 +278,7 @@ module HTML5
lastTable = nil
fosterParent = nil
insertBefore = nil
@openElements.reverse.each do |element|
@open_elements.reverse.each do |element|
if element.name == "table"
lastTable = element
break
@ -288,33 +291,34 @@ module HTML5
fosterParent = lastTable.parent
insertBefore = lastTable
else
fosterParent = @openElements[@openElements.index(lastTable) - 1]
fosterParent = @open_elements[@open_elements.index(lastTable) - 1]
end
else
fosterParent = @openElements[0]
fosterParent = @open_elements[0]
end
return fosterParent, insertBefore
end
def generateImpliedEndTags(exclude=nil)
name = @openElements[-1].name
name = @open_elements[-1].name
if (['dd', 'dt', 'li', 'p', 'td', 'th', 'tr'].include?(name) and name != exclude)
@openElements.pop
# XXX td, th and tr are not actually needed
if (%w[dd dt li p td th tr].include?(name) and name != exclude)
@open_elements.pop
# XXX This is not entirely what the specification says. We should
# investigate it more closely.
generateImpliedEndTags(exclude)
end
end
def getDocument
def get_document
@document
end
def getFragment
#assert @innerHTML
def get_fragment
#assert @inner_html
fragment = @fragmentClass.new
@openElements[0].reparentChildren(fragment)
@open_elements[0].reparentChildren(fragment)
return fragment
end

View file

@ -8,7 +8,6 @@ module HTML5
module Hpricot
class Node < Base::Node
extend Forwardable
def_delegators :@hpricot, :name
@ -22,7 +21,7 @@ module HTML5
def appendChild(node)
if node.kind_of?(TextNode) and childNodes.any? and childNodes.last.kind_of?(TextNode)
childNodes[-1].hpricot.content = childNodes[-1].hpricot.to_s + node.hpricot.to_s
childNodes.last.hpricot.content = childNodes.last.hpricot.content + node.hpricot.content
else
childNodes << node
hpricot.children << node.hpricot
@ -145,21 +144,27 @@ module HTML5
end
class DocumentType < Node
def_delegators :@hpricot, :public_id, :system_id
def self.hpricot_class
::Hpricot::DocType
end
def initialize(name)
def initialize(name, public_id, system_id)
begin
super(name)
rescue ArgumentError # needs 3...
end
@hpricot = ::Hpricot::DocType.new(name, nil, nil)
@hpricot = ::Hpricot::DocType.new(name, public_id, system_id)
end
def printTree(indent=0)
"\n|#{' ' * indent}<!DOCTYPE #{hpricot.target}>"
if hpricot.target and hpricot.target.any?
"\n|#{' ' * indent}<!DOCTYPE #{hpricot.target}>"
else
"\n|#{' ' * indent}<!DOCTYPE >"
end
end
end
@ -169,7 +174,7 @@ module HTML5
end
def printTree(indent=0)
childNodes.inject('') { |tree, child| tree + child.printTree(indent+2) }
childNodes.inject('') {|tree, child| tree + child.printTree(indent + 2) }
end
end
@ -196,21 +201,26 @@ module HTML5
class TreeBuilder < Base::TreeBuilder
def initialize
@documentClass = Document
@doctypeClass = DocumentType
@elementClass = Element
@commentClass = CommentNode
@doctypeClass = DocumentType
@elementClass = Element
@commentClass = CommentNode
@fragmentClass = DocumentFragment
end
def insertDoctype(name, public_id, system_id)
doctype = @doctypeClass.new(name, public_id, system_id)
@document.appendChild(doctype)
end
def testSerializer(node)
node.printTree
end
def getDocument
def get_document
@document.hpricot
end
def getFragment
def get_fragment
@document = super
return @document.hpricot.children
end

View file

@ -17,11 +17,9 @@ module HTML5
end
def appendChild node
if node.kind_of? TextNode and
childNodes.length>0 and childNodes[-1].kind_of? TextNode
childNodes[-1].rxobj.value =
childNodes[-1].rxobj.to_s + node.rxobj.to_s
childNodes[-1].rxobj.raw = true
if node.kind_of?(TextNode) && childNodes.length > 0 && childNodes.last.kind_of?(TextNode)
childNodes.last.rxobj.value = childNodes.last.rxobj.to_s + node.rxobj.to_s
childNodes.last.rxobj.raw = true
else
childNodes.push node
rxobj.add node.rxobj
@ -45,10 +43,8 @@ module HTML5
def insertBefore node, refNode
index = childNodes.index(refNode)
if node.kind_of? TextNode and index>0 and
childNodes[index-1].kind_of? TextNode
childNodes[index-1].rxobj.value =
childNodes[index-1].rxobj.to_s + node.rxobj.to_s
if node.kind_of?(TextNode) and index > 0 && childNodes[index-1].kind_of?(TextNode)
childNodes[index-1].rxobj.value = childNodes[index-1].rxobj.to_s + node.rxobj.to_s
childNodes[index-1].rxobj.raw = true
else
childNodes.insert index, node
@ -57,7 +53,7 @@ module HTML5
end
def hasContent
return (childNodes.length > 0)
(childNodes.length > 0)
end
end
@ -77,7 +73,7 @@ module HTML5
end
def attributes= value
value.each {|name, value| rxobj.attributes[name]=value}
value.each {|name, value| rxobj.attributes[name] = value}
end
def printTree indent=0
@ -90,7 +86,7 @@ module HTML5
for child in childNodes
tree += child.printTree(indent)
end
return tree
tree
end
end
@ -120,10 +116,25 @@ module HTML5
end
class DocumentType < Node
def_delegator :@rxobj, :public, :public_id
def_delegator :@rxobj, :system, :system_id
def self.rxclass
::REXML::DocType
end
def initialize name, public_id, system_id
super(name)
if public_id
@rxobj = ::REXML::DocType.new [name, ::REXML::DocType::PUBLIC, public_id, system_id]
elsif system_id
@rxobj = ::REXML::DocType.new [name, ::REXML::DocType::SYSTEM, nil, system_id]
else
@rxobj = ::REXML::DocType.new name
end
end
def printTree indent=0
"\n|#{' ' * indent}<!DOCTYPE #{name}>"
end
@ -145,7 +156,7 @@ module HTML5
class TextNode < Node
def initialize data
raw=data.gsub('&','&amp;').gsub('<','&lt;').gsub('>','&gt;')
raw = data.gsub('&', '&amp;').gsub('<', '&lt;').gsub('>', '&gt;')
@rxobj = ::REXML::Text.new(raw, true, nil, true)
end
@ -167,21 +178,26 @@ module HTML5
class TreeBuilder < Base::TreeBuilder
def initialize
@documentClass = Document
@doctypeClass = DocumentType
@elementClass = Element
@commentClass = CommentNode
@doctypeClass = DocumentType
@elementClass = Element
@commentClass = CommentNode
@fragmentClass = DocumentFragment
end
def testSerializer node
node.printTree()
def insertDoctype(name, public_id, system_id)
doctype = @doctypeClass.new(name, public_id, system_id)
@document.appendChild(doctype)
end
def getDocument
def testSerializer node
node.printTree
end
def get_document
@document.rxobj
end
def getFragment
def get_fragment
@document = super
return @document.rxobj.children
end

View file

@ -18,17 +18,17 @@ module HTML5
def initialize name
super
@name = name
@value = nil
@name = name
@value = nil
@attributes = {}
end
def appendChild node
if node.kind_of? TextNode and
childNodes.length>0 and childNodes[-1].kind_of? TextNode
childNodes[-1].value += node.value
childNodes.length > 0 and childNodes.last.kind_of? TextNode
childNodes.last.value += node.value
else
childNodes.push node
childNodes << node
end
node.parent = self
end
@ -55,8 +55,7 @@ module HTML5
def insertBefore node, refNode
index = childNodes.index(refNode)
if node.kind_of? TextNode and index>0 and
childNodes[index-1].kind_of? TextNode
if node.kind_of?(TextNode) && index > 0 && childNodes[index-1].kind_of?(TextNode)
childNodes[index-1].value += node.value
else
childNodes.insert index, node
@ -72,7 +71,7 @@ module HTML5
end
def hasContent
return (childNodes.length > 0)
childNodes.length > 0
end
end
@ -90,7 +89,7 @@ module HTML5
for child in childNodes
tree += child.printTree(indent)
end
return tree
tree
end
end
@ -108,13 +107,21 @@ module HTML5
for child in childNodes
tree += child.printTree(indent + 2)
end
return tree
tree
end
end
class DocumentType < Node
attr_accessor :public_id, :system_id
def to_s
"<!DOCTYPE %s>" % name
"<!DOCTYPE #{name}>"
end
def initialize name
super name
@public_id = nil
@system_id = nil
end
end
@ -157,19 +164,19 @@ module HTML5
class TreeBuilder < Base::TreeBuilder
def initialize
@documentClass = Document
@doctypeClass = DocumentType
@elementClass = Element
@commentClass = CommentNode
@doctypeClass = DocumentType
@elementClass = Element
@commentClass = CommentNode
@fragmentClass = DocumentFragment
end
def testSerializer node
node.printTree()
node.printTree
end
def getFragment
def get_fragment
@document = super
return @document.childNodes
@document.childNodes
end
end

View file

@ -6,13 +6,13 @@ module HTML5
class << self
def [](name)
case name.to_s.downcase
when 'simpletree' then
when 'simpletree'
require 'html5/treewalkers/simpletree'
SimpleTree::TreeWalker
when 'rexml' then
when 'rexml'
require 'html5/treewalkers/rexml'
REXML::TreeWalker
when 'hpricot' then
when 'hpricot'
require 'html5/treewalkers/hpricot'
Hpricot::TreeWalker
else
@ -20,7 +20,7 @@ module HTML5
end
end
alias :getTreeWalker :[]
alias :get_tree_walker :[]
end
end
end

View file

@ -3,153 +3,151 @@ module HTML5
module TreeWalkers
module TokenConstructor
def error(msg)
return {:type => "SerializeError", :data => msg}
def error(msg)
{:type => "SerializeError", :data => msg}
end
def normalize_attrs(attrs)
attrs.to_a
end
def empty_tag(name, attrs, has_children=false)
error(_("Void element has children")) if has_children
{:type => :EmptyTag, :name => name, :data => normalize_attrs(attrs)}
end
def start_tag(name, attrs)
{:type => :StartTag, :name => name, :data => normalize_attrs(attrs)}
end
def end_tag(name)
{:type => :EndTag, :name => name, :data => []}
end
def text(data)
if data =~ /\A([#{SPACE_CHARACTERS.join('')}]+)/m
yield({:type => :SpaceCharacters, :data => $1})
data = data[$1.length .. -1]
return if data.empty?
end
def normalizeAttrs(attrs)
attrs.to_a
if data =~ /([#{SPACE_CHARACTERS.join('')}]+)\Z/m
yield({:type => :Characters, :data => data[0 ... -$1.length]})
yield({:type => :SpaceCharacters, :data => $1})
else
yield({:type => :Characters, :data => data})
end
end
def emptyTag(name, attrs, hasChildren=false)
error(_("Void element has children")) if hasChildren
return({:type => :EmptyTag, :name => name, \
:data => normalizeAttrs(attrs)})
end
def comment(data)
{:type => :Comment, :data => data}
end
def startTag(name, attrs)
return {:type => :StartTag, :name => name, \
:data => normalizeAttrs(attrs)}
end
def doctype(name, public_id, system_id, correct=nil)
{:type => :Doctype, :name => name, :public_id => public_id, :system_id => system_id, :correct => correct}
end
def endTag(name)
return {:type => :EndTag, :name => name, :data => []}
end
def unknown(nodeType)
error(_("Unknown node type: ") + nodeType.to_s)
end
def text(data)
if data =~ /\A([#{SPACE_CHARACTERS.join('')}]+)/m
yield({:type => :SpaceCharacters, :data => $1})
data = data[$1.length .. -1]
return if data.empty?
end
if data =~ /([#{SPACE_CHARACTERS.join('')}]+)\Z/m
yield({:type => :Characters, :data => data[0 ... -$1.length]})
yield({:type => :SpaceCharacters, :data => $1})
else
yield({:type => :Characters, :data => data})
end
end
def comment(data)
return {:type => :Comment, :data => data}
end
def doctype(name)
return {:type => :Doctype, :name => name, :data => name.upcase() == "HTML"}
end
def unknown(nodeType)
return error(_("Unknown node type: ") + nodeType.to_s)
end
def _(str)
str
end
def _(str)
str
end
end
class Base
include TokenConstructor
def initialize(tree)
@tree = tree
@tree = tree
end
def each
raise NotImplementedError
raise NotImplementedError
end
alias walk each
end
class NonRecursiveTreeWalker < TreeWalkers::Base
def node_details(node)
raise NotImplementedError
end
def node_details(node)
raise NotImplementedError
end
def first_child(node)
raise NotImplementedError
end
def first_child(node)
raise NotImplementedError
end
def next_sibling(node)
raise NotImplementedError
end
def next_sibling(node)
raise NotImplementedError
end
def parent(node)
raise NotImplementedError
end
def parent(node)
raise NotImplementedError
end
def each
currentNode = @tree
while currentNode != nil
details = node_details(currentNode)
hasChildren = false
def each
current_node = @tree
while current_node != nil
details = node_details(current_node)
has_children = false
case details.shift
when :DOCTYPE
yield doctype(*details)
case details.shift
when :DOCTYPE
yield doctype(*details)
when :TEXT
text(*details) {|token| yield token}
when :TEXT
text(*details) {|token| yield token}
when :ELEMENT
name, attributes, hasChildren = details
if VOID_ELEMENTS.include?(name)
yield emptyTag(name, attributes.to_a, hasChildren)
hasChildren = false
else
yield startTag(name, attributes.to_a)
end
when :COMMENT
yield comment(details[0])
when :DOCUMENT, :DOCUMENT_FRAGMENT
hasChildren = true
when nil
# ignore (REXML::XMLDecl is an example)
else
yield unknown(details[0])
end
firstChild = hasChildren ? first_child(currentNode) : nil
if firstChild != nil
currentNode = firstChild
else
while currentNode != nil
details = node_details(currentNode)
if details.shift == :ELEMENT
name, attributes, hasChildren = details
yield endTag(name) if !VOID_ELEMENTS.include?(name)
end
if @tree == currentNode
currentNode = nil
else
nextSibling = next_sibling(currentNode)
if nextSibling != nil
currentNode = nextSibling
break
end
currentNode = parent(currentNode)
end
end
end
when :ELEMENT
name, attributes, has_children = details
if VOID_ELEMENTS.include?(name)
yield empty_tag(name, attributes.to_a, has_children)
has_children = false
else
yield start_tag(name, attributes.to_a)
end
when :COMMENT
yield comment(details[0])
when :DOCUMENT, :DOCUMENT_FRAGMENT
has_children = true
when nil
# ignore (REXML::XMLDecl is an example)
else
yield unknown(details[0])
end
first_child = has_children ? first_child(current_node) : nil
if first_child != nil
current_node = first_child
else
while current_node != nil
details = node_details(current_node)
if details.shift == :ELEMENT
name, attributes, has_children = details
yield end_tag(name) if !VOID_ELEMENTS.include?(name)
end
if @tree == current_node
current_node = nil
else
next_sibling = next_sibling(current_node)
if next_sibling != nil
current_node = next_sibling
break
end
current_node = parent(current_node)
end
end
end
end
end
end
end

View file

@ -13,17 +13,17 @@ module HTML5
[:DOCUMENT_FRAGMENT]
else
[:ELEMENT, node.name,
node.attributes.map {|name,value| [name,value]},
node.attributes.map {|name, value| [name, value]},
!node.empty?]
end
when ::Hpricot::Text
[:TEXT, node.to_plain_text]
[:TEXT, node.content]
when ::Hpricot::Comment
[:COMMENT, node.content]
when ::Hpricot::Doc
[:DOCUMENT]
when ::Hpricot::DocType
[:DOCTYPE, node.target]
[:DOCTYPE, node.target, node.public_id, node.system_id]
when ::Hpricot::XMLDecl
[nil]
else

View file

@ -23,7 +23,7 @@ module HTML5
when ::REXML::Comment
[:COMMENT, node.string]
when ::REXML::DocType
[:DOCTYPE, node.name]
[:DOCTYPE, node.name, node.public, node.system]
when ::REXML::XMLDecl
[nil]
else

View file

@ -12,20 +12,20 @@ module HTML5
return
when DocumentType
yield doctype(node.name)
yield doctype(node.name, node.public_id, node.system_id)
when TextNode
text(node.value) {|token| yield token}
when Element
if VOID_ELEMENTS.include?(node.name)
yield emptyTag(node.name, node.attributes, node.hasContent())
yield empty_tag(node.name, node.attributes, node.hasContent())
else
yield startTag(node.name, node.attributes)
yield start_tag(node.name, node.attributes)
for child in node.childNodes
walk(child) {|token| yield token}
end
yield endTag(node.name)
yield end_tag(node.name)
end
when CommentNode

View file

@ -0,0 +1,3 @@
module HTML5
VERSION = '0.1.0'
end

View file

@ -3,13 +3,13 @@
{"description": "bare text with leading spaces",
"options": {"strip_whitespace": true},
"input": [["Characters", "\t\r\n\u000B\u000C foo"]],
"expected": ["foo"]
"expected": [" foo"]
},
{"description": "bare text with trailing spaces",
"options": {"strip_whitespace": true},
"input": [["Characters", "foo \t\r\n\u000B\u000C"]],
"expected": ["foo"]
"expected": ["foo "]
},
{"description": "bare text with inner spaces",

View file

@ -0,0 +1,43 @@
[
{"type": "text/html", "input": ""},
{"type": "text/html", "input": "<!---->"},
{"type": "text/html", "input": "<!--asdfaslkjdf;laksjdf as;dkfjsd-->"},
{"type": "text/html", "input": "<!"},
{"type": "text/html", "input": "\t"},
{"type": "text/html", "input": "<!>"},
{"type": "text/html", "input": "<?"},
{"type": "text/html", "input": "<??>"},
{"type": "application/rss+xml", "input": "<rss"},
{"type": "application/atom+xml", "input": "<feed"},
{"type": "text/html", "input": "<html"},
{"type": "text/html", "input": "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n<html><head>\n<title>302 Found</title>\n</head><body>\n<h1>Found</h1>\n<p>The document has moved <a href=\"http://feeds.feedburner.com/gofug\">here</a>.</p>\n</body></html>\n"},
{"type": "text/html", "input": "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">\r\n<HTML><HEAD>\r\n <link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/289619328/feed.css\" /><link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/431602649/feed.css\" />\r\n<link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/382549546/feed.css\" />\r\n<link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/314618017/feed.css\" /><META http-equiv=\"expires\" content="},
{"type": "text/html", "input": "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\r\n<html>\r\n<head>\r\n<title>Xiaxue - Chicken pie blogger.</title><meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\"><style type=\"text/css\">\r\n<style type=\"text/css\">\r\n<!--\r\nbody {\r\n background-color: #FFF2F2;\r\n}\r\n.style1 {font-family: Georgia, \"Times New Roman\", Times, serif}\r\n.style2 {\r\n color: #8a567c;\r\n font-size: 14px;\r\n font-family: Georgia, \"Times New Roman\", Times, serif;\r\n}\r"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<head> \r\n<title>Google Operating System</title>\r\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"Description\" content=\"Unofficial news and tips about Google. A blog that watches Google's latest developments and the attempts to move your operating system online.\" />\r\n<meta name=\"generator\" c"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<head>\r\n <title>Assimilated Press</title> <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"MSSmartTagsPreventParsing\" content=\"true\" />\r\n<meta name=\"generator\" content=\"Blogger\" />\r\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"Assimilated Press - Atom\" href=\"http://assimila"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<head>\r\n <title>PostSecret</title>\r\n<META name=\"keywords\" Content=\"secrets, postcard, secret, postcards, postsecret, postsecrets,online confessional, post secret, post secrets, artomatic, post a secret\"><META name=\"discription\" Content=\"See a Secret...Share a Secret\"> <meta http-equiv=\"Content-Type\" content=\"te"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns='http://www.w3.org/1999/xhtml' xmlns:b='http://www.google.com/2005/gml/b' xmlns:data='http://www.google.com/2005/gml/data' xmlns:expr='http://www.google.com/2005/gml/expr'>\n <head>\n \n <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'/>\n <meta content='true' name='MSSmartTagsPreventParsing'/>\n <meta content='blogger' name='generator'/>\n <link rel=\"alternate\" typ"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"ja\">\n<head profile=\"http://gmpg.org/xfn/11\"> \n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" /> \n<title> CMS Lever</title><link rel=\"stylesheet\" type=\"text/css\" media=\"screen\" href=\"http://s.wordpress.com/wp-content/themes/pub/twenty-eight/2813.css\"/>\n<link rel=\"alternate\" type=\"application/rss+xml\" title=\"RSS 2.0\" h"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"en\"><head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n<title> Park Avenue Peerage</title>\t<meta name=\"generator\" content=\"WordPress.com\" />\t<!-- feeds -->\n\t<link rel=\"alternate\" type=\"application/rss+xml\" title=\"RSS 2.0\" href=\"http://parkavenuepeerage.wordpress.com/feed/\" />\t<link rel=\"pingback\" href="},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"ja\"><head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n<title> \u884c\u96f2\u6d41\u6c34 -like a floating clouds and running water-</title>\t<meta name=\"generator\" content=\"WordPress.com\" />\t<!-- feeds -->\n\t<link rel=\"alternate\" type=\"application/rss+xml\" title=\"RSS 2.0\" href=\"http://shw4.wordpress.com/feed/\" />\t<li"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n<title>Go Fug Yourself</title><link rel=\"stylesheet\" href=\"http://gofugyourself.typepad.com/go_fug_yourself/styles.css\" type=\"text/css\" />\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"Atom\" "},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"en\"><head profile=\"http://gmpg.org/xfn/11\">\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" /><title> Ladies&#8230;</title><meta name=\"generator\" content=\"WordPress.com\" /> <!-- leave this for stats --><link rel=\"stylesheet\" href=\"http://s.wordpress.com/wp-content/themes/default/style.css?1\" type=\"tex"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\r\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\r\n<head>\r\n <title>The Sartorialist</title> <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"MSSmartTagsPreventParsing\" content=\"true\" />\r\n<meta name=\"generator\" content=\"Blogger\" />\r\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"The Sartorialist - Atom\" href=\"http://thesartorialist.blogspot"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-1\" />\n<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n<title>Creating Passionate Users</title><link rel=\"stylesheet\" href=\"http://headrush.typepad.com/creating_passionate_users/styles.css\" type=\"text/css\" />\n<link rel=\"alternate\" type"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n\t\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" id=\"sixapart-standard\">\n<head>\n\t<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n\t<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n\t\n\t\n <meta name=\"keywords\" content=\"marketing, blog, seth, ideas, respect, permission\" />\n <meta name=\"description\" content=\"Seth Godin's riffs on marketing, respect, and the "},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n\t\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" id=\"sixapart-standard\">\n<head>\n\t<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n\t<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n\t\n\t\n \n <meta name=\"description\" content=\" Western Civilization hangs in the balance. This blog is part of the solution,the cure. Get your heads out of the sand and Fight the G"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=pahrefhttpwwwfeedburnercomtarget_blankimgsrchttpwwwfeedburnercomfbimagespubpowered_by_fbgifaltPoweredbyFeedBurnerstyleborder0ap\" />\n<title> From Under the Rotunda</title>\n<link rel=\"stylesheet\" href=\"http://s.wordpress.com/wp-content/themes/pub/andreas04/style.css\" type=\"text/css\""},
{"type": "application/atom+xml", "input": "<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href=\"http://www.blogger.com/styles/atom.css\" type=\"text/css\"?><feed xmlns='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/'><id>tag:blogger.com,1999:blog-10861780</id><updated>2007-07-27T12:38:50.888-07:00</updated><title type='text'>Official Google Blog</title><link rel='alternate' type='text/html' href='http://googleblog.blogspot.com/'/><link rel='next' type='application/atom+xml' href='http://googleblog.blogs"},
{"type": "application/rss+xml", "input": "<?xml version='1.0' encoding='UTF-8'?><rss xmlns:atom='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/' version='2.0'><channel><atom:id>tag:blogger.com,1999:blog-10861780</atom:id><lastBuildDate>Fri, 27 Jul 2007 19:38:50 +0000</lastBuildDate><title>Official Google Blog</title><description/><link>http://googleblog.blogspot.com/</link><managingEditor>Eric Case</managingEditor><generator>Blogger</generator><openSearch:totalResults>729</openSearch:totalResults><openSearc"},
{"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"pahrefhttpwwwfeedburnercomtarget_blankimgsrchttpwwwfeedburnercomfbimagespubpowered_by_fbgifaltPoweredbyFeedBurnerstyleborder0ap\"?>\n<!-- generator=\"wordpress/MU\" -->\n<rss version=\"2.0\"\n\txmlns:content=\"http://purl.org/rss/1.0/modules/content/\"\n\txmlns:wfw=\"http://wellformedweb.org/CommentAPI/\"\n\txmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n\t><channel>\n\t<title>From Under the Rotunda</title>\n\t<link>http://dannybernardi.wordpress.com</link>\n\t<description>The Monographs of Danny Ber"},
{"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!-- generator=\"wordpress/MU\" -->\n<rss version=\"2.0\"\n\txmlns:content=\"http://purl.org/rss/1.0/modules/content/\"\n\txmlns:wfw=\"http://wellformedweb.org/CommentAPI/\"\n\txmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n\t><channel>\n\t<title>CMS Lever</title>\n\t<link>http://kanaguri.wordpress.com</link>\n\t<description>CMS\u306e\u6c17\u306b\u306a\u3063\u305f\u3053\u3068</description>\n\t<pubDate>Wed, 18 Jul 2007 21:26:22 +0000</pubDate>\n\t<generator>http://wordpress.org/?v=MU</generator>\n\t<language>ja</languag"},
{"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:thr=\"http://purl.org/syndication/thread/1.0\">\n <title>Atlas Shrugs</title>\n <link rel=\"self\" type=\"application/atom+xml\" href=\"http://atlasshrugs2000.typepad.com/atlas_shrugs/atom.xml\" />\n <link rel=\"alternate\" type=\"text/html\" href=\"http://atlasshrugs2000.typepad.com/atlas_shrugs/\" />\n <id>tag:typepad.com,2003:weblog-132946</id>\n <updated>2007-08-15T16:07:34-04"},
{"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atom10full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:thr=\"http://purl.org/syndication/thread/1.0\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\">\r\n <title>Creating Passionate Users</title>\r\n "},
{"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atom10full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\">\r\n <title>Seth's Blog</title>\r\n <link rel=\"alternate\" type=\"text/html\" href=\"http://sethgodin.typepad.com/seths_blog/\" />\r\n <link rel=\"s"},
{"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atom10full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:openSearch=\"http://a9.com/-/spec/opensearchrss/1.0/\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\"><id>tag:blogger.com,1999:blog-32454861</id><updated>2007-07-31T21:44:09.867+02:00</upd"},
{"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atomfull.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://purl.org/atom/ns#\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\" version=\"0.3\">\r\n <title>Go Fug Yourself</title>\r\n <link rel=\"alternate\" type=\"text/html\" href=\"http://go"},
{"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/rss2full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><rss xmlns:creativeCommons=\"http://backend.userland.com/creativeCommonsRssModule\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\" version=\"2.0\"><channel><title>Google Operating System</title><link>http://googlesystem.blogspot.com/</link>"},
{"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"\"?>\n<!-- generator=\"wordpress/MU\" -->\n<rss version=\"2.0\"\n\txmlns:content=\"http://purl.org/rss/1.0/modules/content/\"\n\txmlns:wfw=\"http://wellformedweb.org/CommentAPI/\"\n\txmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n\t><channel>\n\t<title>Nunublog</title>\n\t<link>http://nunubh.wordpress.com</link>\n\t<description>Just Newbie Blog!</description>\n\t<pubDate>Mon, 09 Jul 2007 18:54:09 +0000</pubDate>\n\t<generator>http://wordpress.org/?v=MU</generator>\n\t<language>id</language>\n\t\t\t<item>\n\t\t<ti"},
{"type": "text/html", "input": "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<HEAD>\r\n<TITLE>Design*Sponge</TITLE><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"MSSmartTagsPreventParsing\" content=\"true\" />\r\n<meta name=\"generator\" content=\"Blogger\" />\r\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"Design*Sponge - Atom\" href=\"http://designsponge.blogspot.com/feeds/posts/default\" />\r\n<link rel=\"alternate\" type=\"application/rss+xml\" title=\"Design*Sponge - RSS\" href="},
{"type": "text/html", "input": "<HTML>\n<HEAD>\n<TITLE>Moved Temporarily</TITLE>\n</HEAD>\n<BODY BGCOLOR=\"#FFFFFF\" TEXT=\"#000000\">\n<H1>Moved Temporarily</H1>\nThe document has moved <A HREF=\"http://feeds.feedburner.com/thesecretdiaryofstevejobs\">here</A>.\n</BODY>\n</HTML>\n"}
]

View file

@ -11,12 +11,24 @@
"input":"foo</bar>",
"output":[["Character", "foo"], ["EndTag", "bar"]]},
{"description":"End tag closing RCDATA or CDATA (case-insensitivity)",
"contentModelFlags":["RCDATA", "CDATA"],
"lastStartTag":"bar",
"input":"foo</bAr>",
"output":[["Character", "foo"], ["EndTag", "bar"]]},
{"description":"End tag with incorrect name in RCDATA or CDATA",
"contentModelFlags":["RCDATA", "CDATA"],
"lastStartTag":"baz",
"input":"</foo>bar</baz>",
"output":[["Character", "</foo>bar"], ["EndTag", "baz"]]},
{"description":"End tag with incorrect name in RCDATA or CDATA (starting like correct name)",
"contentModelFlags":["RCDATA", "CDATA"],
"lastStartTag":"baz",
"input":"</foo>bar</bazaar>",
"output":[["Character", "</foo>bar</bazaar>"]]},
{"description":"End tag closing RCDATA or CDATA, switching back to PCDATA",
"contentModelFlags":["RCDATA", "CDATA"],
"lastStartTag":"bar",

File diff suppressed because it is too large Load diff

View file

@ -161,6 +161,10 @@
"input":"<h a='&not1'>",
"output":["ParseError", ["StartTag", "h", {"a":"&not1"}]]},
{"description":"Entity in attribute without semicolon ending in i",
"input":"<h a='&noti'>",
"output":["ParseError", ["StartTag", "h", {"a":"&noti"}]]},
{"description":"Entity in attribute without semicolon",
"input":"<h a='&COPY'>",
"output":["ParseError", ["StartTag", "h", {"a":"©"}]]}

View file

@ -60,14 +60,6 @@
"input":"&#xD869;&#xDED6;",
"output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]},
{"description":"Numeric entity representing a Windows-1252 'codepoint'",
"input":"&#137;",
"output":["ParseError", ["Character", "\u2030"]]},
{"description":"Hexadecimal entity representing a Windows-1252 'codepoint'",
"input":"&#x89;",
"output":["ParseError", ["Character", "\u2030"]]},
{"description":"Hexadecimal entity with mixed uppercase and lowercase",
"input":"&#xaBcD;",
"output":[["Character", "\uABCD"]]},
@ -122,7 +114,15 @@
{"description":"Null Byte Replacement",
"input":"\u0000",
"output":["ParseError", ["Character", "\ufffd"]]}
"output":["ParseError", ["Character", "\ufffd"]]},
{"description":"Comment with dash",
"input":"<!---x",
"output":["ParseError", ["Comment", "-x"]]},
{"description":"Entity + newline",
"input":"\nx\n&gt;\n",
"output":[["Character","\nx\n>\n"]]}
]}

View file

@ -0,0 +1,367 @@
{"tests": [
{"description":"<",
"input":"<",
"output":["ParseError", ["Character", "<"]]},
{"description":"<>",
"input":"<>",
"output":["ParseError", ["Character", "<>"]]},
{"description":"<!",
"input":"<!",
"output":["ParseError", ["Comment", ""]]},
{"description":"<!>",
"input":"<!>",
"output":["ParseError", ["Comment", ""]]},
{"description":"<!--",
"input":"<!--",
"output":["ParseError", ["Comment", ""]]},
{"description":"<!-->",
"input":"<!-->",
"output":["ParseError", ["Comment", ""]]},
{"description":"<!---",
"input":"<!---",
"output":["ParseError", ["Comment", ""]]},
{"description":"<!--->",
"input":"<!--->",
"output":["ParseError", ["Comment", ""]]},
{"description":"<!---->",
"input":"<!---->",
"output":[["Comment", ""]]},
{"description":"<!-----",
"input":"<!-----",
"output":["ParseError", "ParseError", ["Comment", "-"]]},
{"description":"<!----.",
"input":"<!----.",
"output":["ParseError", "ParseError", ["Comment", "--."]]},
{"description":"<!---?",
"input":"<!---?",
"output":["ParseError", ["Comment", "-?"]]},
{"description":"<!--?-",
"input":"<!--?-",
"output":["ParseError", ["Comment", "?"]]},
{"description":"<!--?--",
"input":"<!--?--",
"output":["ParseError", ["Comment", "?"]]},
{"description":"<!--?-.",
"input":"<!--?-.",
"output":["ParseError", ["Comment", "?-."]]},
{"description":"<!--?.",
"input":"<!--?.",
"output":["ParseError", ["Comment", "?."]]},
{"description":"<?>",
"input":"<?>",
"output":["ParseError", ["Comment", "?"]]},
{"description":"<??",
"input":"<??",
"output":["ParseError", ["Comment", "??"]]},
{"description":"</",
"input":"</",
"output":["ParseError", ["Character", "</"]]},
{"description":"</>",
"input":"</>",
"output":["ParseError"]},
{"description":"</?",
"input":"</?",
"output":["ParseError", ["Comment", "?"]]},
{"description":">",
"input":">",
"output":[["Character", ">"]]},
{"description":"-",
"input":"-",
"output":[["Character", "-"]]},
{"description":"?",
"input":"?",
"output":[["Character", "?"]]},
{"description":"&",
"input":"&",
"output":[["Character", "&"]]},
{"description":"&#",
"input":"&#",
"output":["ParseError", ["Character", "&#"]]},
{"description":"&#9",
"input":"&#9",
"output":["ParseError", ["Character", "\t"]]},
{"description":"<!doctype >",
"input":"<!doctype >",
"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
{"description":"<!doctype ",
"input":"<!doctype ",
"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
{"description":"<!doctype!>",
"input":"<!doctype!>",
"output":["ParseError", ["DOCTYPE", "!", null, null, true]]},
{"description":"<!doctype! >",
"input":"<!doctype! >",
"output":["ParseError", ["DOCTYPE", "!", null, null, true]]},
{"description":"<!doctype! ",
"input":"<!doctype! ",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype! ?>",
"input":"<!doctype! ?>",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype! ??",
"input":"<!doctype! ??",
"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype!?",
"input":"<!doctype!?",
"output":["ParseError", "ParseError", ["DOCTYPE", "!?", null, null, false]]},
{"description":"<!doctype! public>",
"input":"<!doctype! public>",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype! public ",
"input":"<!doctype! public ",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype! public?",
"input":"<!doctype! public?",
"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype! public''",
"input":"<!doctype! public''",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", null, false]]},
{"description":"<!doctype! public'(",
"input":"<!doctype! public'(",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "(", null, false]]},
{"description":"<!doctype! public\"\">",
"input":"<!doctype! public\"\">",
"output":["ParseError", ["DOCTYPE", "!", "", null, true]]},
{"description":"<!doctype! public\"\" ",
"input":"<!doctype! public\"\" ",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", null, false]]},
{"description":"<!doctype! public\"\"?",
"input":"<!doctype! public\"\"?",
"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", "", null, false]]},
{"description":"<!doctype! public\"\"'",
"input":"<!doctype! public\"\"'",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", "", false]]},
{"description":"<!doctype! public\"\"\"",
"input":"<!doctype! public\"\"\"",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", "", false]]},
{"description":"<!doctype! public\"#",
"input":"<!doctype! public\"#",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "#", null, false]]},
{"description":"<!doctype! system>",
"input":"<!doctype! system>",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype! system ",
"input":"<!doctype! system ",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype! system?",
"input":"<!doctype! system?",
"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype! system''",
"input":"<!doctype! system''",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "", false]]},
{"description":"<!doctype! system'(",
"input":"<!doctype! system'(",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "(", false]]},
{"description":"<!doctype! system\"\">",
"input":"<!doctype! system\"\">",
"output":["ParseError", ["DOCTYPE", "!", null, "", true]]},
{"description":"<!doctype! system\"\" ",
"input":"<!doctype! system\"\" ",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "", false]]},
{"description":"<!doctype! system\"\"?",
"input":"<!doctype! system\"\"?",
"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, "", false]]},
{"description":"<!doctype! system\"#",
"input":"<!doctype! system\"#",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "#", false]]},
{"description":"</z",
"input":"</z",
"output":["ParseError", ["EndTag", "z"]]},
{"description":"<z>",
"input":"<z>",
"output":[["StartTag", "z", {}]]},
{"description":"<z ",
"input":"<z ",
"output":["ParseError", ["StartTag", "z", {}]]},
{"description":"<z/>",
"input":"<z/>",
"output":["ParseError", ["StartTag", "z", {}]]},
{"description":"<z/ ",
"input":"<z/ ",
"output":["ParseError", "ParseError", ["StartTag", "z", {}]]},
{"description":"<z//",
"input":"<z//",
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {}]]},
{"description":"<z",
"input":"<z",
"output":["ParseError", ["StartTag", "z", {}]]},
{"description":"</z",
"input":"</z",
"output":["ParseError", ["EndTag", "z"]]},
{"description":"<z0",
"input":"<z0",
"output":["ParseError", ["StartTag", "z0", {}]]},
{"description":"<z/0=>",
"input":"<z/0=>",
"output":["ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0= ",
"input":"<z/0= ",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0=?>",
"input":"<z/0=?>",
"output":["ParseError", ["StartTag", "z", {"0": "?"}]]},
{"description":"<z/0=? ",
"input":"<z/0=? ",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "?"}]]},
{"description":"<z/0=??",
"input":"<z/0=??",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "??"}]]},
{"description":"<z/0=''",
"input":"<z/0=''",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0='&",
"input":"<z/0='&",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "&"}]]},
{"description":"<z/0='%",
"input":"<z/0='%",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "%"}]]},
{"description":"<z/0=\"'",
"input":"<z/0=\"'",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "'"}]]},
{"description":"<z/0=\"\"",
"input":"<z/0=\"\"",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0=\"&",
"input":"<z/0=\"&",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "&"}]]},
{"description":"<z/0=&",
"input":"<z/0=&",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "&"}]]},
{"description":"<z/0>",
"input":"<z/0>",
"output":["ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0 =",
"input":"<z/0 =",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0 >",
"input":"<z/0 >",
"output":["ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0 ",
"input":"<z/0 ",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0 /",
"input":"<z/0 /",
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0/",
"input":"<z/0/",
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/00",
"input":"<z/00",
"output":["ParseError", "ParseError", ["StartTag", "z", {"00": ""}]]},
{"description":"<z/0 0",
"input":"<z/0 0",
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0='&#9",
"input":"<z/0='&#9",
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": "\t"}]]},
{"description":"<z/0=\"&#9",
"input":"<z/0=\"&#9",
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": "\t"}]]},
{"description":"<z/0=&#9",
"input":"<z/0=&#9",
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": "\t"}]]},
{"description":"<z/0z",
"input":"<z/0z",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0z": ""}]]},
{"description":"<z/0 z",
"input":"<z/0 z",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "z": ""}]]},
{"description":"<zz",
"input":"<zz",
"output":["ParseError", ["StartTag", "zz", {}]]},
{"description":"<z/z",
"input":"<z/z",
"output":["ParseError", "ParseError", ["StartTag", "z", {"z": ""}]]}
]}

View file

@ -0,0 +1,198 @@
{"tests": [
{"description":"< in attribute name",
"input":"<z/0 <",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "<": ""}]]},
{"description":"< in attribute value",
"input":"<z x=<",
"output":["ParseError", ["StartTag", "z", {"x": "<"}]]},
{"description":"CR EOF after doctype name",
"input":"<!doctype html \r",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"CR EOF in tag name",
"input":"<z\r",
"output":["ParseError", ["StartTag", "z", {}]]},
{"description":"Zero hex numeric entity",
"input":"&#x0",
"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
{"description":"Zero decimal numeric entity",
"input":"&#0",
"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
{"description":"Zero-prefixed hex numeric entity",
"input":"&#x000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000041;",
"output":[["Character", "A"]]},
{"description":"Zero-prefixed decimal numeric entity",
"input":"&#000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000065;",
"output":[["Character", "A"]]},
{"description":"Empty hex numeric entities",
"input":"&#x &#X ",
"output":["ParseError", ["Character", "&#x "], "ParseError", ["Character", "&#X "]]},
{"description":"Empty decimal numeric entities",
"input":"&# &#; ",
"output":["ParseError", ["Character", "&# "], "ParseError", ["Character", "&#; "]]},
{"description":"Non-BMP numeric entity",
"input":"&#x10000;",
"output":[["Character", "\uD800\uDC00"]]},
{"description":"Maximum non-BMP numeric entity",
"input":"&#X10FFFF;",
"output":[["Character", "\uDBFF\uDFFF"]]},
{"description":"Above maximum numeric entity",
"input":"&#x110000;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"32-bit hex numeric entity",
"input":"&#x80000041;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"33-bit hex numeric entity",
"input":"&#x100000041;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"33-bit decimal numeric entity",
"input":"&#4294967361;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"65-bit hex numeric entity",
"input":"&#x10000000000000041;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"65-bit decimal numeric entity",
"input":"&#18446744073709551681;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"Surrogate code point edge cases",
"input":"&#xD7FF;&#xD800;&#xD801;&#xDFFE;&#xDFFF;&#xE000;",
"output":[["Character", "\uD7FF"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD\uE000"]]},
{"description":"Uppercase start tag name",
"input":"<X>",
"output":[["StartTag", "x", {}]]},
{"description":"Uppercase end tag name",
"input":"</X>",
"output":[["EndTag", "x"]]},
{"description":"Uppercase attribute name",
"input":"<x X>",
"output":[["StartTag", "x", { "x":"" }]]},
{"description":"Tag/attribute name case edge values",
"input":"<x@AZ[`az{ @AZ[`az{>",
"output":[["StartTag", "x@az[`az{", { "@az[`az{":"" }]]},
{"description":"Duplicate different-case attributes",
"input":"<x x=1 x=2 X=3>",
"output":["ParseError", "ParseError", ["StartTag", "x", { "x":"1" }]]},
{"description":"Uppercase close tag attributes",
"input":"</x X>",
"output":["ParseError", ["EndTag", "x"]]},
{"description":"Duplicate close tag attributes",
"input":"</x x x>",
"output":["ParseError", "ParseError", ["EndTag", "x"]]},
{"description":"Permitted slash",
"input":"<br/>",
"output":[["StartTag", "br", {}]]},
{"description":"Non-permitted slash",
"input":"<xr/>",
"output":["ParseError", ["StartTag", "xr", {}]]},
{"description":"Permitted slash but in close tag",
"input":"</br/>",
"output":["ParseError", ["EndTag", "br"]]},
{"description":"Doctype public case-sensitivity (1)",
"input":"<!DoCtYpE HtMl PuBlIc \"AbC\" \"XyZ\">",
"output":[["DOCTYPE", "HtMl", "AbC", "XyZ", true]]},
{"description":"Doctype public case-sensitivity (2)",
"input":"<!dOcTyPe hTmL pUbLiC \"aBc\" \"xYz\">",
"output":[["DOCTYPE", "hTmL", "aBc", "xYz", true]]},
{"description":"Doctype system case-sensitivity (1)",
"input":"<!DoCtYpE HtMl SyStEm \"XyZ\">",
"output":[["DOCTYPE", "HtMl", null, "XyZ", true]]},
{"description":"Doctype system case-sensitivity (2)",
"input":"<!dOcTyPe hTmL sYsTeM \"xYz\">",
"output":[["DOCTYPE", "hTmL", null, "xYz", true]]},
{"description":"U+0000 in lookahead region after non-matching character",
"input":"<!doc>\u0000",
"output":["ParseError", ["Comment", "doc"], "ParseError", ["Character", "\uFFFD"]],
"ignoreErrorOrder":true},
{"description":"U+0000 in lookahead region",
"input":"<!doc\u0000",
"output":["ParseError", "ParseError", ["Comment", "doc\uFFFD"]],
"ignoreErrorOrder":true},
{"description":"CR followed by U+0000",
"input":"\r\u0000",
"output":["ParseError", ["Character", "\n\uFFFD"]],
"ignoreErrorOrder":true},
{"description":"CR followed by non-LF",
"input":"\r?",
"output":[["Character", "\n?"]]},
{"description":"CR at EOF",
"input":"\r",
"output":[["Character", "\n"]]},
{"description":"LF at EOF",
"input":"\n",
"output":[["Character", "\n"]]},
{"description":"CR LF",
"input":"\r\n",
"output":[["Character", "\n"]]},
{"description":"CR CR",
"input":"\r\r",
"output":[["Character", "\n\n"]]},
{"description":"LF LF",
"input":"\n\n",
"output":[["Character", "\n\n"]]},
{"description":"LF CR",
"input":"\n\r",
"output":[["Character", "\n\n"]]},
{"description":"text CR CR CR text",
"input":"text\r\r\rtext",
"output":[["Character", "text\n\n\ntext"]]},
{"description":"Doctype publik",
"input":"<!DOCTYPE html PUBLIK \"AbC\" \"XyZ\">",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"Doctype publi",
"input":"<!DOCTYPE html PUBLI",
"output":["ParseError", "ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"Doctype sistem",
"input":"<!DOCTYPE html SISTEM \"AbC\">",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"Doctype sys",
"input":"<!DOCTYPE html SYS",
"output":["ParseError", "ParseError", ["DOCTYPE", "html", null, null, false]]}
]}

View file

@ -113,7 +113,6 @@ Line1<br>Line2<br>Line3<br>Line4
<html><head></body></html>
#errors
6: missing document type declaration
19: unexpected body element end tag in head
#document
| <html>
| <head>
@ -159,7 +158,6 @@ Line1<br>Line2<br>Line3<br>Line4
</head>
#errors
7: missing document type declaration
7: unexpected head element end tag
#document
| <html>
| <head>
@ -169,7 +167,6 @@ Line1<br>Line2<br>Line3<br>Line4
</body>
#errors
7: missing document type declaration
7: unexpected body element end tag
#document
| <html>
| <head>
@ -437,6 +434,7 @@ Unexpected end of file
#data
<!DOCTYPE HTML><li>hello<li>world<ul>how<li>do</ul>you</body><!--do-->
#errors
Unexpected end of file. Expected </li>. XXX
#document
| <!DOCTYPE HTML>
| <html>
@ -638,7 +636,6 @@ Unexpected end of file
#data
<!DOCTYPE HTML><script> <!-- </script> --> </script> EOF
#errors
52: unexpected script element end tag
#document
| <!DOCTYPE HTML>
| <html>
@ -732,6 +729,7 @@ Unexpected end of file
#errors
6: missing document type declaration
29: mismatched font element end tag (misnested tags)
AAA </font> tag strikes again
35: mismatched body element end tag (premature end of file?)
#document
| <html>
@ -1122,6 +1120,7 @@ Unexpected end of file
15: missing document type declaration
39: unexpected node in table context
39: a element start tag implying a element end tag
AAA violation: </a>
39: unexpected node in table context
39: mismatched a element end tag (misnested tags across <table> tag)
43: unexpected node in table context
@ -1177,6 +1176,8 @@ Unexpected end of file
7: missing document type declaration
22: unexpected node in table context
27: unexpected node in table context
XXX more table voodoo
XXX more table voodoo
54: unexpected td element end tag implied other end tags
63: unexpected node in table context
72: mismatched body element end tag (premature end of file?)
@ -1301,11 +1302,9 @@ unexpected EOF
#errors
6: missing document type declaration
12: unexpected body element start tag
18: base element start tag out of place
24: link element start tag out of place
30: meta element start tag out of place
37: title element start tag out of place
54: unexpected body element start tag
Missing end tag </p>. XXX
#document
| <html>
| <head>
@ -1346,7 +1345,6 @@ unexpected EOF
3: missing document type declaration
13: unexpected node in table context
13: a element start tag implying a element end tag
13: unexpected node in table context
13: mismatched a element end tag (misnested tags across <table> tag)
21: mismatched table element end tag
27: a element start tag implying a element end tag
@ -1576,6 +1574,8 @@ unexpected EOF
<ul><li></li><div><li></div><li><li><div><li><address><li><b><em></b><li></ul>
#errors
4: missing document type declaration
Missing end tag for <div> (nr2)
Missing end tag for <address>
69: mismatched b element end tag (misnested tags)
#document
| <html>
@ -1620,7 +1620,6 @@ unexpected EOF
56: unexpected frameset element start tag in body
63: unexpected frame element start tag in body
74: unexpected frameset element end tag
87: unescaped '</' in CDATA or RCDATA block
106: unexpected end of file while parsing CDATA section for element noframes
#document
| <html>
@ -1635,6 +1634,7 @@ unexpected EOF
4: missing document type declaration
15: required tr element start tag implied by unexpected td element start tag
27: unexpected td element end tag implied other end tags
Unexpected </h1> tag. Expected other.
Unexpected EOF
#document
| <html>
@ -1742,9 +1742,9 @@ Unexpected EOF
108: unexpected h4 element end tag
113: unexpected h5 element end tag
118: unexpected h6 element end tag
125: unexpected body element end tag
125: unexpected end tag token br in after body phase
130: unexpected br element end tag
134: unexpected a element end tag
134: unexpected a element end tag (AAA)
140: unexpected img element end tag
148: unexpected title element end tag
155: unexpected span element end tag
@ -1926,6 +1926,9 @@ Unexpected EOF
610: unexpected option element end tag
622: unexpected plaintext element end tag
633: mismatched special end tag textarea
XXX
XXX
XXX
#document
| <html>
| <head>
@ -1935,3 +1938,13 @@ Unexpected EOF
| <tbody>
| <tr>
| <p>
#data
<frameset>
#errors
10: Start tag seen without seeing a doctype first.
11: End of file seen and there were open elements.
#document
| <html>
| <head>
| <frameset>

View file

@ -12,7 +12,6 @@
<textarea>test</div>test
#errors
10: missing document type declaration.
17: unescaped '</' in CDATA or RCDATA block.
25: unexpected end of file while parsing CDATA section for element textarea.
#document
| <html>
@ -87,6 +86,8 @@ Expected end tag </frameset>
#data
<!DOCTYPE HTML><font><p><b>test</font>
#errors
AAA violation. </font>
AAA violation. </font>
#document
| <!DOCTYPE HTML>
| <html>
@ -101,6 +102,7 @@ Expected end tag </frameset>
#data
<!DOCTYPE HTML><dt><div><dd>
#errors
Missing end tag for <div>.
#document
| <!DOCTYPE HTML>
| <html>
@ -114,7 +116,6 @@ Expected end tag </frameset>
<script></x
#errors
no document type
</ in script
Unexpected end of file. Expected </script> end tag.
#document
| <html>
@ -129,6 +130,7 @@ Unexpected end of file. Expected </script> end tag.
no document type
<plaintext> directly inside table
Characters inside table.
Characters inside table. (XXX?)
Unexpected end of file.
#document
| <html>
@ -175,10 +177,10 @@ Unexpected start tag "body"
| <html>
| <head>
| <body>
| t4="4"
| t1="1"
| t2="2"
| t3="3"
| t1="1"
| t4="4"
#data
</b test
@ -195,7 +197,6 @@ Unexpected end tag.
#data
<!DOCTYPE HTML></b test<b &=&amp>X
#errors
Unexpected < in attribute
End tag contains attributes.
Unexpected end tag.
Named entity didn't end with ;
@ -224,7 +225,6 @@ Unexpected EOF in (end) tag name
&
#errors
No doctype.
Unfinished entity.
#document
| <html>
| <head>
@ -349,11 +349,11 @@ Unexpected end EOF. Missing closing tags.
| <b>
| <i>
| <u>
| " "
| <p>
| <b>
| <i>
| <u>
| <b>
| <i>
| <u>
| " "
| <p>
| "X"
#data
@ -538,10 +538,10 @@ No doctype
| <hr>
| <p>
| <label>
| "This is a searchable index. Insert your search keywords here:"
| "This is a searchable index. Insert your search keywords here: "
| <input>
| test="x"
| name="isindex"
| test="x"
| <hr>
#data
@ -571,19 +571,18 @@ Unexpected EOF.
| <b>
| <i>
| <u>
| "
| <b>
| <i>
| <u>
| "
"
| <p>
| <b>
| <i>
| <u>
| <p>
| "X"
#data
<!DOCTYPE HTML><body><title>test</body></title>
#errors
Unexpected start tag that belongs in the head.
Expected closing tag after </.
#document
| <!DOCTYPE HTML>
| <html>
@ -596,10 +595,7 @@ Expected closing tag after </.
<!DOCTYPE HTML><body><title>X</title><meta name=z><link rel=foo><style>
x { content:"</style" } </style>
#errors
Unexpected start tag that belongs in head.
Unexpected start tag that belongs in head.
Unexpected start tag that belongs in head.
Expected closing tag after </.
Unexpected start tag that belongs in head. <title>
#document
| <!DOCTYPE HTML>
| <html>
@ -632,8 +628,6 @@ x { content:"</style" } "
#errors
No doctype.
#document
| "
"
| <html>
| <head>
| <body>
@ -643,7 +637,6 @@ No doctype.
#errors
#document
| <!DOCTYPE HTML>
| " "
| <html>
| <head>
| <body>
@ -749,8 +742,8 @@ Solidus (/) incorrectly placed.
| <body>
| "X"
| <p>
| y=""
| x=""
| y=""
| z=""
#data

View file

@ -131,6 +131,7 @@ y"
<!DOCTYPE htML><html><head></head><body><pre>x<div>
y</pre></body></html>
#errors
End tag <pre> seen too early. Expected other end tag.
#document
| <!DOCTYPE htML>
| <html>
@ -140,11 +141,12 @@ y</pre></body></html>
| "x"
| <div>
| "
| y"
y"
#data
<!DOCTYPE htML><HTML><META><HEAD></HEAD></HTML>
#errors
Unexpected start tag HEAD in HEAD. Ignored.
#document
| <!DOCTYPE htML>
| <html>
@ -155,6 +157,7 @@ y</pre></body></html>
#data
<!DOCTYPE htML><HTML><HEAD><head></HEAD></HTML>
#errors
Unexpected start tag HEAD in HEAD. Ignored.
#document
| <!DOCTYPE htML>
| <html>
@ -164,6 +167,8 @@ y</pre></body></html>
#data
<textarea>foo<span>bar</span><i>baz
#errors
Unexpected start tag. Expected DOCTYPE.
Unexpected end of file.
#document
| <html>
| <head>
@ -174,6 +179,8 @@ y</pre></body></html>
#data
<title>foo<span>bar</em><i>baz
#errors
Unexpected start tag. Expected DOCTYPE.
Unexpected end of file.
#document
| <html>
| <head>
@ -236,6 +243,8 @@ Missing end tag (div)
#data
<!doctype html><nobr><nobr><nobr>
#errors
Unexpected <nobr> tag.
Unexpected <nobr> tag.
Unexpected end of file.
#document
| <!DOCTYPE html>
@ -249,6 +258,7 @@ Unexpected end of file.
#data
<!doctype html><nobr><nobr></nobr><nobr>
#errors
Unexpected <nobr> tag.
Unexpected end of file.
#document
| <!DOCTYPE html>

View file

@ -41,6 +41,7 @@ plaintext
#data
setting html's innerHTML
#errors
XXX innerHTML EOF
#document-fragment
html
#document
@ -51,6 +52,7 @@ html
#data
<title>setting head's innerHTML</title>
#errors
Unexpected title element that belongs in head.
#document-fragment
head
#document

View file

@ -110,7 +110,6 @@ No DOCTYPE
<style> <!</-- </style>x
#errors
No DOCTYPE
Unexpected end of file
#document
| <html>
| <head>
@ -118,3 +117,59 @@ Unexpected end of file
| " <!</-- "
| <body>
| "x"
#data
<xmp> <!-- > --> </xmp>
#errors
No DOCTYPE
#document
| <html>
| <head>
| <body>
| <xmp>
| " <!-- > --> "
#data
<title>&amp;</title>
#errors
No DOCTYPE
#document
| <html>
| <head>
| <title>
| "&"
| <body>
#data
<title><!--&amp;--></title>
#errors
No DOCTYPE
#document
| <html>
| <head>
| <title>
| "<!--&amp;-->"
| <body>
#data
<title><!--</title>
#errors
No DOCTYPE
Unexpected EOF
#document
| <html>
| <head>
| <title>
| "<!--</title>"
| <body>
#data
<noscript><!--</noscript>--></noscript>
#errors
No DOCTYPE
#document
| <html>
| <head>
| <noscript>
| "<!--</noscript>-->"
| <body>

View file

@ -1,6 +1,7 @@
#data
<!doctype html></head> <head>
#errors
Unexpected start tag head. Ignored.
#document
| <!DOCTYPE html>
| <html>
@ -11,6 +12,9 @@
#data
<!doctype html></html> <head>
#errors
Unexpected start tag head.
Unexpected start tag head in after body phase.
Unexpected start tag head. Ignored.
#document
| <!DOCTYPE html>
| <html>
@ -21,6 +25,7 @@
#data
<!doctype html></body><meta>
#errors
Unexpected meta element in after body phase.
#document
| <!DOCTYPE html>
| <html>
@ -45,7 +50,6 @@ Unexpected end of file.
#data
<!doctype HTml><title>&amp;</title>
#errors
Unexpected end of file.
#document
| <!DOCTYPE HTml>
| <html>
@ -57,7 +61,6 @@ Unexpected end of file.
#data
<!doctype HTml><title><!--&amp;--></title>
#errors
Unexpected end of file.
#document
| <!DOCTYPE HTml>
| <html>
@ -65,3 +68,26 @@ Unexpected end of file.
| <title>
| "<!--&amp;-->"
| <body>
#data
<!doctype>
#errors
No space after "doctype"
Unexpected ">"
Incorrect doctype
#document
| <!DOCTYPE >
| <html>
| <head>
| <body>
#data
<!---x
#errors
End of file in comment
End of file before doctype
#document
| <!-- -x -->
| <html>
| <head>
| <body>

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,159 @@
{"tests": [
{"description": "valid single class attribute value",
"input": "<span class=a>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading space",
"input": "<span class=' a'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with trailing space",
"input": "<span class='a '>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading and trailing space",
"input": "<span class=' a '>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading tab",
"input": "<span class=' a'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with trailing tab",
"input": "<span class='a '>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading and trailing tab",
"input": "<span class=' a '>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading LF",
"input": "<span class='
a'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with trailing LF",
"input": "<span class='a
'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading and trailing LF",
"input": "<span class='
a
'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading LT",
"input": "<span class=' a'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with trailing LT",
"input": "<span class='a '>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading and trailing LT",
"input": "<span class=' a '>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading FF",
"input": "<span class=' a'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with trailing FF",
"input": "<span class='a '>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading and trailing FF",
"input": "<span class=' a '>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading CR",
"input": "<span class=' a'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with trailing CR",
"input": "<span class='a '>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading and trailing CR",
"input": "<span class=' a '>",
"fail-if": "invalid-attribute-value"},
{"description": "valid double class attribute value separated by space",
"input": "<span class='a b'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid double class attribute value separated by tab",
"input": "<span class='a b'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid double class attribute value separated by LF",
"input": "<span class='a
b'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid double class attribute value separated by LT",
"input": "<span class='a b'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid double class attribute value separated by FF",
"input": "<span class='a b'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid double class attribute value separated by CR",
"input": "<span class='a b'>",
"fail-if": "invalid-attribute-value"},
{"description": "invalid duplicated class attribute value separated by space",
"input": "<span class='a a'>",
"fail-unless": "invalid-attribute-value"},
{"description": "invalid duplicated class attribute value separated by tab",
"input": "<span class='a a'>",
"fail-unless": "invalid-attribute-value"},
{"description": "invalid duplicated class attribute value separated by LF",
"input": "<span class='a
a'>",
"fail-unless": "invalid-attribute-value"},
{"description": "invalid duplicated class attribute value separated by LT",
"input": "<span class='a a'>",
"fail-unless": "invalid-attribute-value"},
{"description": "invalid duplicated class attribute value separated by FF",
"input": "<span class='a a'>",
"fail-unless": "invalid-attribute-value"},
{"description": "invalid duplicated class attribute value separated by CR",
"input": "<span class='a a'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated class attribute value separated by space",
"input": "<span class='a a'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated class attribute value separated by tab",
"input": "<span class='a a'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated class attribute value separated by LF",
"input": "<span class='a
a'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated class attribute value separated by LT",
"input": "<span class='a a'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated class attribute value separated by FF",
"input": "<span class='a a'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated class attribute value separated by CR",
"input": "<span class='a a'>",
"fail-unless": "duplicate-value-in-token-list"}
]}

View file

@ -0,0 +1,59 @@
{"tests": [
{"description": "valid contenteditable attribute value 'true'",
"input": "<span contenteditable=true>",
"fail-if": "invalid-attribute-value"},
{"description": "valid contenteditable attribute value 'TRUE'",
"input": "<span contenteditable=TRUE>",
"fail-if": "invalid-attribute-value"},
{"description": "valid contenteditable attribute value 'TrUe'",
"input": "<span contenteditable=TrUe>",
"fail-if": "invalid-attribute-value"},
{"description": "valid contenteditable attribute value 'false'",
"input": "<span contenteditable=false>",
"fail-if": "invalid-attribute-value"},
{"description": "valid contenteditable attribute value 'FALSE'",
"input": "<span contenteditable=FALSE>",
"fail-if": "invalid-attribute-value"},
{"description": "valid contenteditable attribute value 'FalSe'",
"input": "<span contenteditable=FalSe>",
"fail-if": "invalid-attribute-value"},
{"description": "valid contenteditable attribute value ''",
"input": "<span contenteditable=''>",
"fail-if": "invalid-attribute-value"},
{"description": "valid contenteditable attribute value (not specified)",
"input": "<span contenteditable>",
"fail-if": "invalid-attribute-value"},
{"description": "invalid contenteditable attribute value 'foo'",
"input": "<span contenteditable=foo>",
"fail-unless": "invalid-attribute-value"},
{"description": "invalid contenteditable attribute value '0'",
"input": "<span contenteditable=0>",
"fail-unless": "invalid-attribute-value"},
{"description": "invalid contenteditable attribute value '1'",
"input": "<span contenteditable=1>",
"fail-unless": "invalid-attribute-value"},
{"description": "invalid contenteditable attribute value 'yes'",
"input": "<span contenteditable=yes>",
"fail-unless": "invalid-attribute-value"},
{"description": "invalid contenteditable attribute value 'no'",
"input": "<span contenteditable=no>",
"fail-unless": "invalid-attribute-value"},
{"description": "invalid contenteditable attribute value 'inherit'",
"input": "<span contenteditable=inherit>",
"fail-unless": "invalid-attribute-value"}
]}

View file

@ -0,0 +1,118 @@
{"tests": [
{"description": "contextmenu points to valid ID earlier",
"input": "<menu id=a><span contextmenu=a>",
"fail-if": "id-does-not-exist"},
{"description": "contextmenu points to valid ID later",
"input": "<span contextmenu=a><menu id=a>",
"fail-if": "id-does-not-exist"},
{"description": "contextmenu points to non-existent ID",
"input": "<span contextmenu=a>",
"fail-unless": "id-does-not-exist"},
{"description": "contextmenu points to ID on non-menu element",
"input": "<span id=a><span contextmenu=a>",
"fail-unless": "contextmenu-must-point-to-menu"},
{"description": "uppercase contextmenu points to ID on non-menu element",
"input": "<span id=a><span CONTEXTMENU=a>",
"fail-unless": "contextmenu-must-point-to-menu"},
{"description": "valid ID 'a'",
"input": "<span contextmenu=a>",
"fail-if": "invalid-attribute-value"},
{"description": "valid ID '1'",
"input": "<span contextmenu=1>",
"fail-if": "invalid-attribute-value"},
{"description": "wacky but valid ID",
"input": "<span contextmenu='<html><head><title>a</title></head><body><p>b</p></body></html>'>",
"fail-if": "invalid-attribute-value"},
{"description": "invalid blank ID",
"input": "<span id>",
"fail-unless": "attribute-value-can-not-be-blank"},
{"description": "invalid blank ID with quotes",
"input": "<span contextmenu=''>",
"fail-unless": "attribute-value-can-not-be-blank"},
{"description": "invalid ID because of leading space",
"input": "<span contextmenu=' a'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing space",
"input": "<span contextmenu='a '>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of space in value",
"input": "<span contextmenu='a b'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading tab",
"input": "<span contextmenu=' a'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing tab",
"input": "<span contextmenu='a '>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of tab in value",
"input": "<span contextmenu='a b'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading LF",
"input": "<span contextmenu='
a'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing LF",
"input": "<span contextmenu='a
'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of LF in value",
"input": "<span contextmenu='a
b'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading LT",
"input": "<span contextmenu=' a'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing LT",
"input": "<span contextmenu='a '>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of LT in value",
"input": "<span contextmenu='a b'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading FF",
"input": "<span contextmenu=' a'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing FF",
"input": "<span contextmenu='a '>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of FF in value",
"input": "<span contextmenu='a b'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading CR",
"input": "<span contextmenu=' a'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing CR",
"input": "<span contextmenu='a '>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of CR in value",
"input": "<span contextmenu='a b'>",
"fail-unless": "space-in-id"}
]}

View file

@ -0,0 +1,118 @@
{"tests": [
{"description": "valid ID 'a'",
"input": "<span id=a>",
"fail-if": "invalid-attribute-value"},
{"description": "valid ID '1'",
"input": "<span id=1>",
"fail-if": "invalid-attribute-value"},
{"description": "wacky but valid ID",
"input": "<span id='<html><head><title>a</title></head><body><p>b</p></body></html>'>",
"fail-if": "invalid-attribute-value"},
{"description": "invalid blank ID",
"input": "<span id>",
"fail-unless": "attribute-value-can-not-be-blank"},
{"description": "invalid blank ID with quotes",
"input": "<span id=''>",
"fail-unless": "attribute-value-can-not-be-blank"},
{"description": "invalid ID because of leading space",
"input": "<span id=' a'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing space",
"input": "<span id='a '>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of space in value",
"input": "<span id='a b'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading tab",
"input": "<span id=' a'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing tab",
"input": "<span id='a '>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of tab in value",
"input": "<span id='a b'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading LF",
"input": "<span id='
a'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing LF",
"input": "<span id='a
'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of LF in value",
"input": "<span id='a
b'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading LT",
"input": "<span id=' a'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing LT",
"input": "<span id='a '>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of LT in value",
"input": "<span id='a b'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading FF",
"input": "<span id=' a'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing FF",
"input": "<span id='a '>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of FF in value",
"input": "<span id='a b'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading CR",
"input": "<span id=' a'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing CR",
"input": "<span id='a '>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of CR in value",
"input": "<span id='a b'>",
"fail-unless": "space-in-id"},
{"description": "duplicate ID values",
"input": "<span id=a><span id=a>",
"fail-unless": "duplicate-id"},
{"description": "duplicate ID values with spaces (weird but true)",
"input": "<span id='a '><span id='a '>",
"fail-unless": "duplicate-id"},
{"description": "not duplicate ID values because spaces don't match",
"input": "<span id=a><span id='a '>",
"fail-if": "duplicate-id"},
{"description": "not duplicate ID values because spaces don't match",
"input": "<span id=' a'><span id='a '>",
"fail-if": "duplicate-id"},
{"description": "not duplicate ID values because case doesn't match",
"input": "<span id=a><span id=A>",
"fail-if": "duplicate-id"}
]}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,375 @@
{"tests": [
{"description": "unknown start tag <foo>",
"input": "<foo>",
"fail-unless": "unknown-start-tag"},
{"description": "allowed start tag <code>",
"input": "<code>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <kbd>",
"input": "<kbd>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <aside>",
"input": "<aside>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <datagrid>",
"input": "<datagrid>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <font>",
"input": "<font>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <noscript>",
"input": "<noscript>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <style>",
"input": "<style>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <img>",
"input": "<img>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <title>",
"input": "<title>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <menu>",
"input": "<menu>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <tr>",
"input": "<tr>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <param>",
"input": "<param>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <li>",
"input": "<li>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <source>",
"input": "<source>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <tfoot>",
"input": "<tfoot>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <th>",
"input": "<th>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <td>",
"input": "<td>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <dl>",
"input": "<dl>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <blockquote>",
"input": "<blockquote>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <dd>",
"input": "<dd>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <abbr>",
"input": "<abbr>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <dt>",
"input": "<dt>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <noembed>",
"input": "<noembed>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <p>",
"input": "<p>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <small>",
"input": "<small>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <meter>",
"input": "<meter>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <em>",
"input": "<em>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <meta>",
"input": "<meta>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <video>",
"input": "<video>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <div>",
"input": "<div>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <canvas>",
"input": "<canvas>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <sub>",
"input": "<sub>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <section>",
"input": "<section>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <sup>",
"input": "<sup>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <progress>",
"input": "<progress>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <body>",
"input": "<body>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <base>",
"input": "<base>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <br>",
"input": "<br>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <address>",
"input": "<address>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <article>",
"input": "<article>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <strong>",
"input": "<strong>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <legend>",
"input": "<legend>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <event-source>",
"input": "<event-source>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <ol>",
"input": "<ol>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <script>",
"input": "<script>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <caption>",
"input": "<caption>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <dialog>",
"input": "<dialog>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <col>",
"input": "<col>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <h2>",
"input": "<h2>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <h3>",
"input": "<h3>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <h1>",
"input": "<h1>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <h6>",
"input": "<h6>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <h4>",
"input": "<h4>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <h5>",
"input": "<h5>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <header>",
"input": "<header>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <table>",
"input": "<table>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <span>",
"input": "<span>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <area>",
"input": "<area>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <dfn>",
"input": "<dfn>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <var>",
"input": "<var>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <cite>",
"input": "<cite>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <thead>",
"input": "<thead>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <head>",
"input": "<head>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <hr>",
"input": "<hr>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <link>",
"input": "<link>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <datatemplate>",
"input": "<datatemplate>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <b>",
"input": "<b>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <colgroup>",
"input": "<colgroup>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <ul>",
"input": "<ul>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <del>",
"input": "<del>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <iframe>",
"input": "<iframe>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <pre>",
"input": "<pre>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <figure>",
"input": "<figure>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <ins>",
"input": "<ins>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <tbody>",
"input": "<tbody>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <html>",
"input": "<html>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <nav>",
"input": "<nav>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <details>",
"input": "<details>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <samp>",
"input": "<samp>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <map>",
"input": "<map>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <nest>",
"input": "<nest>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <object>",
"input": "<object>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <a>",
"input": "<a>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <footer>",
"input": "<footer>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <i>",
"input": "<i>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <m>",
"input": "<m>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <rule>",
"input": "<rule>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <q>",
"input": "<q>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <command>",
"input": "<command>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <time>",
"input": "<time>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <audio>",
"input": "<audio>",
"fail-if": "unknown-start-tag"},
{"description": "allowed start tag <bdo>",
"input": "<bdo>",
"fail-if": "unknown-start-tag"}
]}

View file

@ -16,19 +16,8 @@ def html5_test_files(subdirectory)
Dir[File.join(TESTDATA_DIR, subdirectory, '*.*')]
end
begin
require 'rubygems'
require 'json'
rescue LoadError
class JSON
def self.parse json
json.gsub!(/"\s*:/, '"=>')
json.gsub!(/\\u[0-9a-fA-F]{4}/) {|x| [x[2..-1].to_i(16)].pack('U')}
null = nil
eval json
end
end
end
require 'rubygems'
require 'json'
module HTML5
module TestSupport

View file

@ -6,7 +6,7 @@ XMLELEM = /<(\w+\s*)((?:[-:\w]+="[^"]*"\s*)+)(\/?)>/
def assert_xml_equal(input, expected=nil, parser=HTML5::XMLParser)
sortattrs = proc {"<#{$1+$2.split.sort.join(' ')+$3}>"}
document = parser.parse(input.chomp).root
document = parser.parse(input.chomp, :lowercase_attr_name => false, :lowercase_element_name => false).root
if not expected
expected = input.chomp.gsub(XMLELEM,&sortattrs)
expected = expected.gsub(/&#(\d+);/) {[$1.to_i].pack('U')}
@ -257,6 +257,22 @@ EOX1
<head><title>PROLOG</title></head>
<body>
</body></html>
EOX2
end
def test_tagsoup
assert_xhtml_equal <<EOX1, <<EOX2.strip
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>TAGSOUP</title></head>
<body>
<u><blockquote><p></u>
</body></html>
EOX1
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>TAGSOUP</title></head>
<body>
<u/><blockquote><u/><p><u/>
</p></blockquote></body></html>
EOX2
end

View file

@ -26,8 +26,9 @@ class Html5ParserTestCase < Test::Unit::TestCase
test_name = File.basename(test_file).sub('.dat', '')
TestData.new(test_file, %w(data errors document-fragment document)).
each_with_index do |(input, errors, innerHTML, expected), index|
each_with_index do |(input, errors, inner_html, expected), index|
errors = errors.split("\n")
expected = expected.gsub("\n| ","\n")[2..-1]
$tree_types_to_test.each do |tree_name|
@ -35,8 +36,8 @@ class Html5ParserTestCase < Test::Unit::TestCase
parser = HTMLParser.new(:tree => TreeBuilders[tree_name])
if innerHTML
parser.parseFragment(input, innerHTML)
if inner_html
parser.parse_fragment(input, inner_html)
else
parser.parse(input)
end
@ -49,16 +50,15 @@ class Html5ParserTestCase < Test::Unit::TestCase
'', 'Recieved:', actual_output
].join("\n")
if $CHECK_PARSER_ERRORS
actual_errors = parser.errors.map do |(line, col), message|
'Line: %i Col: %i %s' % [line, col, message]
end
assert_equal errors.length, parser.errors.length, [
'Input', input + "\n",
'Expected errors:', errors.join("\n"),
'Actual errors:', actual_errors.join("\n")
].join("\n")
actual_errors = parser.errors.map do |(line, col), message|
'Line: %i Col: %i %s' % [line, col, message]
end
assert_equal errors.length, parser.errors.length, [
'', 'Input', input,
'', "Expected errors (#{errors.length}):", errors.join("\n"),
'', "Actual errors (#{actual_errors.length}):",
actual_errors.join("\n")
].join("\n")
end
end

View file

@ -12,17 +12,17 @@ class SanitizeTest < Test::Unit::TestCase
include HTML5
def sanitize_xhtml stream
XHTMLParser.parseFragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8'}).to_s
XHTMLParser.parse_fragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8', :lowercase_element_name => false, :lowercase_attr_name => false}).to_s
end
def sanitize_html stream
HTMLParser.parseFragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8'}).to_s
HTMLParser.parse_fragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8', :lowercase_element_name => false, :lowercase_attr_name => false}).to_s
end
def sanitize_rexml stream
require 'rexml/document'
doc = REXML::Document.new("<div xmlns='http://www.w3.org/1999/xhtml'>#{stream}</div>")
tokens = TreeWalkers.getTreeWalker('rexml').new(doc)
tokens = TreeWalkers.get_tree_walker('rexml').new(doc)
XHTMLSerializer.serialize(tokens, {:encoding=>'utf-8',
:quote_char => "'",
:inject_meta_charset => false,
@ -39,8 +39,8 @@ class SanitizeTest < Test::Unit::TestCase
HTMLSanitizer::ALLOWED_ELEMENTS.each do |tag_name|
define_method "test_should_allow_#{tag_name}_tag" do
input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
htmloutput = "<#{tag_name.downcase} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name.downcase}>"
input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
htmloutput = "<#{tag_name.downcase} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name.downcase}>"
xhtmloutput = "<#{tag_name} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name}>"
rexmloutput = xhtmloutput

View file

@ -12,17 +12,17 @@ class JsonWalker < HTML5::TreeWalkers::Base
@tree.each do |token|
case token[0]
when 'StartTag'
yield startTag(token[1], token[2])
yield start_tag(token[1], token[2])
when 'EndTag'
yield endTag(token[1])
yield end_tag(token[1])
when 'EmptyTag'
yield emptyTag(token[1], token[2])
yield empty_tag(token[1], token[2])
when 'Comment'
yield comment(token[1])
when 'Characters', 'SpaceCharacters'
text(token[1]) {|textToken| yield textToken}
when 'Doctype'
yield doctype(token[1])
yield doctype(token[1], token[2], token[3])
else
raise "Unknown token type: " + token[0]
end

View file

@ -0,0 +1,27 @@
require File.join(File.dirname(__FILE__), 'preamble')
require "html5/sniffer"
class TestFeedTypeSniffer < Test::Unit::TestCase
include HTML5
include TestSupport
include Sniffer
html5_test_files('sniffer').each do |test_file|
test_name = File.basename(test_file).sub('.test', '')
tests = JSON.parse(File.read(test_file))
tests.each_with_index do |data, index|
define_method('test_%s_%d' % [test_name, index + 1]) do
assert_equal data['type'], html_or_feed(data['input'])
end
end
end
# each_with_index do |t, i|
# define_method "test_#{i}" do
# assert_equal t[0], sniff_feed_type(t[1])
# end
# end
end

View file

@ -6,6 +6,33 @@ require 'tokenizer_test_parser'
class Html5TokenizerTestCase < Test::Unit::TestCase
def assert_tokens_match(expectedTokens, receivedTokens, ignoreErrorOrder, message)
if !ignoreErrorOrder
return expectedTokens == receivedTokens
else
#Sort the tokens into two groups; non-parse errors and parse errors
expected = [[],[]]
received = [[],[]]
for token in expectedTokens
if token != "ParseError"
expected[0] << token
else
expected[1] << token
end
end
for token in receivedTokens
if token != "ParseError"
received[0] << token
else
received[1] << token
end
end
assert_equal expected, received, message
end
end
def type_of?(token_name, token)
token != 'ParseError' and token_name == token.first
end
@ -38,9 +65,9 @@ class Html5TokenizerTestCase < Test::Unit::TestCase
assert_nothing_raised message do
tokenizer = HTML5::HTMLTokenizer.new(data['input'])
tokenizer.contentModelFlag = content_model_flag.to_sym
tokenizer.content_model_flag = content_model_flag.to_sym
tokenizer.currentToken = {:type => :startTag, :name => data['lastStartTag']} if data.has_key?('lastStartTag')
tokenizer.current_token = {:type => :startTag, :name => data['lastStartTag']} if data.has_key?('lastStartTag')
tokens = TokenizerTestParser.new(tokenizer).parse
@ -48,7 +75,7 @@ class Html5TokenizerTestCase < Test::Unit::TestCase
expected = concatenate_consecutive_characters(data['output'])
assert_equal expected, actual, message
assert_tokens_match expected, actual, data["ignoreErrorOrder"], message
end
end
end

View file

@ -60,7 +60,11 @@ class TestTreeWalkers < Test::Unit::TestCase
when :Comment
output << "#{' '*indent}<!-- #{token[:data]} -->"
when :Doctype
output << "#{' '*indent}<!DOCTYPE #{token[:name]}>"
if token[:name] and token[:name].any?
output << "#{' '*indent}<!DOCTYPE #{token[:name]}>"
else
output << "#{' '*indent}<!DOCTYPE >"
end
when :Characters, :SpaceCharacters
output << "#{' '*indent}\"#{token[:data]}\""
else
@ -76,7 +80,7 @@ class TestTreeWalkers < Test::Unit::TestCase
next if test_name == 'tests5' # TODO
TestData.new(test_file, %w(data errors document-fragment document)).
each_with_index do |(input, errors, innerHTML, expected), index|
each_with_index do |(input, errors, inner_html, expected), index|
expected = expected.gsub("\n| ","\n")[2..-1]
@ -86,13 +90,13 @@ class TestTreeWalkers < Test::Unit::TestCase
parser = HTML5::HTMLParser.new(:tree => tree_class[:builder])
if innerHTML
parser.parseFragment(input, innerHTML)
if inner_html
parser.parse_fragment(input, inner_html)
else
parser.parse(input)
end
document = parser.tree.getDocument
document = parser.tree.get_document
begin
output = sortattrs(convertTokens(tree_class[:walker].new(document)))

View file

@ -54,7 +54,7 @@ class TokenizerTestParser
@outputTokens.push(["Character", token[:data]])
end
def processEOF(token)
def process_eof(token)
end
def processParseError(token)