Merged with Jacques' latest changes.
This commit is contained in:
commit
b96ff30026
|
@ -152,8 +152,7 @@ class ApplicationController < ActionController::Base
|
|||
elsif %w(tex).include?(action_name)
|
||||
response.headers['Content-Type'] = 'text/plain; charset=UTF-8'
|
||||
elsif request.env['HTTP_USER_AGENT'] =~ /Validator/ or request.env.include?('HTTP_ACCEPT') &&
|
||||
Mime::Type.parse(request.env["HTTP_ACCEPT"]).include?(Mime::XHTML) &&
|
||||
!(request.env['HTTP_USER_AGENT'] =~ /Safari/ and %w(s5).include?(action_name))
|
||||
Mime::Type.parse(request.env["HTTP_ACCEPT"]).include?(Mime::XHTML)
|
||||
response.headers['Content-Type'] = 'application/xhtml+xml; charset=UTF-8'
|
||||
elsif request.env['HTTP_USER_AGENT'] =~ /MathPlayer/
|
||||
response.headers['Content-Type'] = 'application/xhtml+xml'
|
||||
|
|
|
@ -18,7 +18,7 @@ xml.feed('xmlns' => "http://www.w3.org/2005/Atom", "xml:lang" => 'en') do
|
|||
xml.name(page.author)
|
||||
end
|
||||
if @hide_description
|
||||
xml.summary('Content suppressed.', 'type' => 'text')
|
||||
xml.summary("Updated by #{page.author} on #{page.updated_at.getgm.strftime("%Y-%m-%d")} at #{page.updated_at.getgm.strftime("%H:%M:%SZ")}.", 'type' => 'text')
|
||||
else
|
||||
xml.content('type' => 'xhtml', 'xml:base' => url_for(:only_path => false, :web => @web_name, :action => @link_action, :id => page.name) ) do
|
||||
xml.div('xmlns' => 'http://www.w3.org/1999/xhtml' ) do
|
||||
|
|
|
@ -11,6 +11,16 @@
|
|||
%----Macros----------
|
||||
\newcommand{\gt}{>}
|
||||
\newcommand{\lt}{<}
|
||||
\newcommand{\darr}{\downarrow}
|
||||
\newcommand{\nearr}{\nearrow}
|
||||
\newcommand{\nwarr}{\nwarrow}
|
||||
\newcommand{\searr}{\searrow}
|
||||
\newcommand{\swarr}{\swarrow}
|
||||
\newcommand{\iff}{\Longleftrightarrow}
|
||||
\newcommand{\impliedby}{\Leftarrow}
|
||||
\newcommand{\map}{\mapsto}
|
||||
\newcommand{\embedsin}{\hookrightarrow}
|
||||
\newcommand{\implies}{\Rightarrow}
|
||||
\newcommand{\qed}{\blacksquare}
|
||||
|
||||
%-------------------------------------------------------------------
|
||||
|
|
|
@ -16,7 +16,7 @@ class Category < Chunk::Abstract
|
|||
def initialize(match_data, content)
|
||||
super(match_data, content)
|
||||
@hidden = match_data[1]
|
||||
@list = match_data[2].split(',').map { |c| c.strip }
|
||||
@list = match_data[2].split(',').map { |c| html_escape(c.strip) }
|
||||
@unmask_text = ''
|
||||
if @hidden
|
||||
@unmask_text = ''
|
||||
|
|
|
@ -74,6 +74,13 @@ module Chunk
|
|||
@content.delete_chunk(self)
|
||||
end
|
||||
|
||||
def html_escape(string)
|
||||
string.gsub( /&/, "&" ).
|
||||
gsub( /</, "<" ).
|
||||
gsub( />/, ">" ).
|
||||
gsub( /"/, """ )
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
|
|
@ -25,14 +25,14 @@
|
|||
|
||||
module Sanitize
|
||||
|
||||
require 'html5lib/html5parser'
|
||||
require 'html5lib/liberalxmlparser'
|
||||
require 'html5lib/treewalkers'
|
||||
require 'html5lib/treebuilders'
|
||||
require 'html5lib/serializer'
|
||||
require 'html5lib/sanitizer'
|
||||
require 'html5/html5parser'
|
||||
require 'html5/liberalxmlparser'
|
||||
require 'html5/treewalkers'
|
||||
require 'html5/treebuilders'
|
||||
require 'html5/serializer'
|
||||
require 'html5/sanitizer'
|
||||
|
||||
include HTML5lib
|
||||
include HTML5
|
||||
|
||||
# Sanitize a string, parsed using XHTML parsing rules.
|
||||
#
|
||||
|
@ -52,12 +52,12 @@ module Sanitize
|
|||
options.each do |name, value|
|
||||
next unless %w(encoding treebuilder to_tree).include? name.to_s
|
||||
if name.to_s == 'treebuilder'
|
||||
@treebuilder = HTML5lib::TreeBuilders.getTreeBuilder(value)
|
||||
@treebuilder = HTML5lib::TreeBuilders.get_tree_builder(value)
|
||||
else
|
||||
instance_variable_set("@#{name}", value)
|
||||
end
|
||||
end
|
||||
parsed = XHTMLParser.parseFragment(html.to_ncr, {:tokenizer => HTMLSanitizer,
|
||||
parsed = XHTMLParser.parse_fragment(html.to_ncr, {:tokenizer => HTMLSanitizer,
|
||||
:encoding => @encoding, :tree => @treebuilder })
|
||||
return parsed if @to_tree
|
||||
return parsed.to_s
|
||||
|
@ -81,12 +81,12 @@ module Sanitize
|
|||
options.each do |name, value|
|
||||
next unless %w(encoding treebuilder to_tree).include? name.to_s
|
||||
if name.to_s == 'treebuilder'
|
||||
@treebuilder = HTML5lib::TreeBuilders.getTreeBuilder(value)
|
||||
@treebuilder = HTML5lib::TreeBuilders.get_tree_builder(value)
|
||||
else
|
||||
instance_variable_set("@#{name}", value)
|
||||
end
|
||||
end
|
||||
parsed = HTMLParser.parseFragment(html.to_ncr, {:tokenizer => HTMLSanitizer,
|
||||
parsed = HTMLParser.parse_fragment(html.to_ncr, {:tokenizer => HTMLSanitizer,
|
||||
:encoding => @encoding, :tree => @treebuilder })
|
||||
return parsed if @to_tree
|
||||
return parsed.to_s
|
||||
|
@ -98,13 +98,9 @@ module Sanitize
|
|||
# sanitize_rexml(tree) -> string
|
||||
#
|
||||
def sanitize_rexml(tree)
|
||||
tokens = TreeWalkers.getTreeWalker('rexml').new(tree.to_ncr)
|
||||
HTMLSerializer.serialize(tokens, {:encoding=>'utf-8',
|
||||
:quote_attr_values => true,
|
||||
:minimize_boolean_attributes => false,
|
||||
:use_trailing_solidus => true,
|
||||
tokens = TreeWalkers.get_tree_walker('rexml').new(tree.to_ncr)
|
||||
XHTMLSerializer.serialize(tokens, {:encoding=>'utf-8',
|
||||
:space_before_trailing_solidus => true,
|
||||
:omit_optional_tags => false,
|
||||
:inject_meta_charset => false,
|
||||
:sanitize => true})
|
||||
end
|
||||
|
|
|
@ -16,4 +16,4 @@ table.plaintable {
|
|||
text-align:center;
|
||||
margin-left:30px;
|
||||
}
|
||||
|
||||
.noborder td, .noborder th {border:0}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/* Following are the presentation styles -- edit away! */
|
||||
|
||||
body {background: #FFF; color: #000; font-size: 2em;}
|
||||
body {background: #FFF; color: #000; font-size: 1.6em;}
|
||||
:link, :visited {text-decoration: none; color: #00C;}
|
||||
#controls :active {color: #8A8 !important;}
|
||||
#controls :focus {outline: 1px dotted #272;}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
// S5 v1.2a1 slides.js -- released into the Public Domain
|
||||
// S5 v1.2a2 slides.js -- released into the Public Domain
|
||||
// Many modifications by Jacques Distler to allow operation as real XHTML.
|
||||
//
|
||||
// Please see http://www.meyerweb.com/eric/tools/s5/credits.html for information
|
||||
// about all the wonderful and talented contributors to this code!
|
||||
|
@ -30,6 +31,7 @@ var countdown = {
|
|||
|
||||
var isIE = navigator.appName == 'Microsoft Internet Explorer' && navigator.userAgent.indexOf('Opera') < 1 ? 1 : 0;
|
||||
var isOp = navigator.userAgent.indexOf('Opera') > -1 ? 1 : 0;
|
||||
var isSa = navigator.userAgent.indexOf('Safari') > -1 ? 1 : 0;
|
||||
var isGe = navigator.userAgent.indexOf('Gecko') > -1 && navigator.userAgent.indexOf('Safari') < 1 ? 1 : 0;
|
||||
|
||||
function hasClass(object, className) {
|
||||
|
@ -111,9 +113,16 @@ function slideLabel() {
|
|||
for (var o = 0; o < menunodes.length; o++) {
|
||||
otext += nodeValue(menunodes[o]);
|
||||
}
|
||||
if (isSa) {
|
||||
var option = createElement('option');
|
||||
option.setAttribute('value', n);
|
||||
option.appendChild(document.createTextNode(n + ' : ' + otext) );
|
||||
list.appendChild(option);
|
||||
} else {
|
||||
list.options[list.length] = new Option(n + ' : ' + otext, n);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function currentSlide() {
|
||||
var cs;
|
||||
|
@ -122,12 +131,12 @@ function currentSlide() {
|
|||
} else {
|
||||
cs = document.currentSlide;
|
||||
}
|
||||
var plink = document.createElement('a');
|
||||
var plink = createElement('a');
|
||||
plink.id = 'plink';
|
||||
plink.setAttribute('href', '');
|
||||
var csHere = document.createElement('span');
|
||||
var csSep = document.createElement('span');
|
||||
var csTotal = document.createElement('span');
|
||||
var csHere = createElement('span');
|
||||
var csSep = createElement('span');
|
||||
var csTotal = createElement('span');
|
||||
csHere.id = 'csHere';
|
||||
csSep.id = 'csSep';
|
||||
csTotal.id = 'csTotal';
|
||||
|
@ -376,7 +385,7 @@ function slideJump() {
|
|||
function fixLinks() {
|
||||
var thisUri = window.location.href;
|
||||
thisUri = thisUri.slice(0, thisUri.length - window.location.hash.length);
|
||||
var aelements = document.getElementsByTagName('A');
|
||||
var aelements = document.getElementsByTagName('a');
|
||||
for (var i = 0; i < aelements.length; i++) {
|
||||
var a = aelements[i].href;
|
||||
var slideID = a.match('\#slide[0-9]{1,2}');
|
||||
|
@ -418,43 +427,43 @@ function permaLink() {
|
|||
function createControls() {
|
||||
var controlsDiv = document.getElementById("controls");
|
||||
if (!controlsDiv) return;
|
||||
var controlForm = document.createElement('form');
|
||||
var controlForm = createElement('form');
|
||||
controlForm.id = 'controlForm';
|
||||
controlForm.setAttribute('action', '#');
|
||||
if (controlVis == 'hidden') {
|
||||
controlForm.setAttribute('onmouseover', 'showHide(\'s\');');
|
||||
controlForm.setAttribute('onmouseout', 'showHide(\'h\');');
|
||||
}
|
||||
var navLinks = document.createElement('div');
|
||||
var navLinks = createElement('div');
|
||||
navLinks.id = 'navLinks';
|
||||
var showNotes = document.createElement('a');
|
||||
var showNotes = createElement('a');
|
||||
showNotes.id = 'show-notes';
|
||||
showNotes.setAttribute('accesskey', 'n');
|
||||
showNotes.setAttribute('href', 'javascript:createNotesWindow();');
|
||||
showNotes.setAttribute('title', 'Show Notes');
|
||||
showNotes.appendChild(document.createTextNode('\u2261'));
|
||||
var toggle = document.createElement('a');
|
||||
var toggle = createElement('a');
|
||||
toggle.id = 'toggle';
|
||||
toggle.setAttribute('accesskey', 't');
|
||||
toggle.setAttribute('href', 'javascript:toggle();');
|
||||
toggle.appendChild(document.createTextNode('\u00D8'));
|
||||
var prev = document.createElement('a');
|
||||
var prev = createElement('a');
|
||||
prev.id = 'prev';
|
||||
prev.setAttribute('accesskey', 'z');
|
||||
prev.setAttribute('href', 'javascript:go(-1);');
|
||||
prev.appendChild(document.createTextNode('\u00AB'));
|
||||
var next = document.createElement('a');
|
||||
var next = createElement('a');
|
||||
next.id = 'next';
|
||||
next.setAttribute('accesskey', 'x');
|
||||
next.setAttribute('href', 'javascript:go(1);');
|
||||
next.appendChild(document.createTextNode('\u00BB'));
|
||||
var navList = document.createElement('div');
|
||||
var navList = createElement('div');
|
||||
navList.id = 'navList';
|
||||
if (controlVis != 'hidden') {
|
||||
navList.setAttribute('onmouseover', 'showHide(\'s\');');
|
||||
navList.setAttribute('onmouseout', 'showHide(\'h\');');
|
||||
}
|
||||
var jumplist = document.createElement('select');
|
||||
var jumplist = createElement('select');
|
||||
jumplist.id = 'jumplist';
|
||||
jumplist.setAttribute('onchange', 'go(\'j\');');
|
||||
navList.appendChild(jumplist);
|
||||
|
@ -503,7 +512,7 @@ function fontScale() { // causes layout problems in FireFox that get fixed if b
|
|||
function fontSize(value) {
|
||||
if (!(s5ss = document.getElementById('s5ss'))) {
|
||||
if (!document.createStyleSheet) {
|
||||
document.getElementsByTagName('head')[0].appendChild(s5ss = document.createElement('style'));
|
||||
document.getElementsByTagName('head')[0].appendChild(s5ss = createElement('style'));
|
||||
s5ss.setAttribute('media','screen, projection');
|
||||
s5ss.setAttribute('id','s5ss');
|
||||
} else {
|
||||
|
@ -784,6 +793,14 @@ function readTime(val) {
|
|||
}
|
||||
}
|
||||
|
||||
function createElement(element) {
|
||||
if (typeof document.createElementNS != 'undefined') {
|
||||
return document.createElementNS('http://www.w3.org/1999/xhtml', element);
|
||||
} else {
|
||||
return document.createElement(element);
|
||||
}
|
||||
}
|
||||
|
||||
function windowChange() {
|
||||
fontScale();
|
||||
}
|
||||
|
|
64
public/s5/ui/s5-notes.xhtml
Normal file
64
public/s5/ui/s5-notes.xhtml
Normal file
|
@ -0,0 +1,64 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN" "http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f.dtd" >
|
||||
|
||||
<!-- Do not edit this document! The system will likely break if you do. -->
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<title>Notes</title>
|
||||
<link rel="stylesheet" href="default/notes.css" type="text/css" />
|
||||
<script type="text/javascript">
|
||||
// <![CDATA[
|
||||
document.onkeyup = opener.keys;
|
||||
document.onkeypress = opener.trap;
|
||||
document.onclick = opener.clicker;
|
||||
// ]]>
|
||||
</script>
|
||||
</head>
|
||||
|
||||
<body onload="opener.s5NotesWindowLoaded=true;" onunload="opener.s5NotesWindowLoaded=false;">
|
||||
|
||||
|
||||
<div class="timers" id="elapsed">
|
||||
<h1>
|
||||
<a href="#" onclick="opener.minimizeTimer('elapsed'); return false;">Elapsed Time</a>
|
||||
</h1>
|
||||
<ul>
|
||||
<li>
|
||||
<h2>Presentation</h2>
|
||||
<span class="clock" id="elapsed-presentation">00:00:00</span>
|
||||
</li>
|
||||
<li>
|
||||
<h2>Current Slide</h2>
|
||||
<span class="clock" id="elapsed-slide">00:00:00</span>
|
||||
</li>
|
||||
</ul>
|
||||
<div class="controls">
|
||||
<a href="#reset-elapsed" onclick="opener.resetElapsedTime(); return false;" title="Reset Elapsed Time">|←</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="timers" id="remaining">
|
||||
<h1>
|
||||
<a href="#" onclick="opener.minimizeTimer('remaining'); return false;">Remaining Time</a>
|
||||
</h1>
|
||||
<p>
|
||||
<a href="#subtract-remaining" class="control" id="minus" onclick="opener.alterRemainingTime('-5'); return false;" title="Subtract 5 Minutes">-</a>
|
||||
<span class="clock" id="timeLeft">00:00:00</span>
|
||||
<a href="#add-remaining" class="control" id="plus" onclick="opener.alterRemainingTime('5'); return false;" title="Add 5 Minutes">+</a>
|
||||
</p>
|
||||
<div class="controls">
|
||||
<form action="#" onsubmit="opener.resetRemainingTime(); return false;">
|
||||
<input type="text" class="text" id="startFrom" value="0" size="4" maxlength="4" />
|
||||
<a href="#toggle-remaining" onclick="opener.toggleRemainingTime(); return false;" title="Pause/Run Remaining Time">||</a>
|
||||
<a href="#reset-remaining" onclick="opener.resetRemainingTime(); return false;" title="Reset Remaining Time">|←</a>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h2 id="slide">...</h2>
|
||||
<div id="notes"></div>
|
||||
|
||||
<h2 id="next">...</h2>
|
||||
<div id="nextnotes"></div>
|
||||
|
||||
</body>
|
||||
</html>
|
5
vendor/plugins/HTML5lib/History.txt
vendored
Normal file
5
vendor/plugins/HTML5lib/History.txt
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
== 0.1.0 / 2007-08-07
|
||||
|
||||
* 1 major enhancement
|
||||
* Birthday!
|
||||
|
59
vendor/plugins/HTML5lib/Manifest.txt
vendored
Normal file
59
vendor/plugins/HTML5lib/Manifest.txt
vendored
Normal file
|
@ -0,0 +1,59 @@
|
|||
History.txt
|
||||
Manifest.txt
|
||||
README
|
||||
Rakefile.rb
|
||||
lib/html5.rb
|
||||
lib/html5/constants.rb
|
||||
lib/html5/filters/base.rb
|
||||
lib/html5/filters/inject_meta_charset.rb
|
||||
lib/html5/filters/optionaltags.rb
|
||||
lib/html5/filters/sanitizer.rb
|
||||
lib/html5/filters/whitespace.rb
|
||||
lib/html5/html5parser.rb
|
||||
lib/html5/html5parser/after_body_phase.rb
|
||||
lib/html5/html5parser/after_frameset_phase.rb
|
||||
lib/html5/html5parser/after_head_phase.rb
|
||||
lib/html5/html5parser/before_head_phase.rb
|
||||
lib/html5/html5parser/in_body_phase.rb
|
||||
lib/html5/html5parser/in_caption_phase.rb
|
||||
lib/html5/html5parser/in_cell_phase.rb
|
||||
lib/html5/html5parser/in_column_group_phase.rb
|
||||
lib/html5/html5parser/in_frameset_phase.rb
|
||||
lib/html5/html5parser/in_head_phase.rb
|
||||
lib/html5/html5parser/in_row_phase.rb
|
||||
lib/html5/html5parser/in_select_phase.rb
|
||||
lib/html5/html5parser/in_table_body_phase.rb
|
||||
lib/html5/html5parser/in_table_phase.rb
|
||||
lib/html5/html5parser/initial_phase.rb
|
||||
lib/html5/html5parser/phase.rb
|
||||
lib/html5/html5parser/root_element_phase.rb
|
||||
lib/html5/html5parser/trailing_end_phase.rb
|
||||
lib/html5/inputstream.rb
|
||||
lib/html5/liberalxmlparser.rb
|
||||
lib/html5/sanitizer.rb
|
||||
lib/html5/serializer.rb
|
||||
lib/html5/serializer/htmlserializer.rb
|
||||
lib/html5/serializer/xhtmlserializer.rb
|
||||
lib/html5/tokenizer.rb
|
||||
lib/html5/treebuilders.rb
|
||||
lib/html5/treebuilders/base.rb
|
||||
lib/html5/treebuilders/hpricot.rb
|
||||
lib/html5/treebuilders/rexml.rb
|
||||
lib/html5/treebuilders/simpletree.rb
|
||||
lib/html5/treewalkers.rb
|
||||
lib/html5/treewalkers/base.rb
|
||||
lib/html5/treewalkers/hpricot.rb
|
||||
lib/html5/treewalkers/rexml.rb
|
||||
lib/html5/treewalkers/simpletree.rb
|
||||
lib/html5/version.rb
|
||||
parse.rb
|
||||
tests/preamble.rb
|
||||
tests/test_encoding.rb
|
||||
tests/test_lxp.rb
|
||||
tests/test_parser.rb
|
||||
tests/test_sanitizer.rb
|
||||
tests/test_serializer.rb
|
||||
tests/test_stream.rb
|
||||
tests/test_tokenizer.rb
|
||||
tests/test_treewalkers.rb
|
||||
tests/tokenizer_test_parser.rb
|
46
vendor/plugins/HTML5lib/README
vendored
46
vendor/plugins/HTML5lib/README
vendored
|
@ -1,9 +1,45 @@
|
|||
= HTML5lib
|
||||
html5
|
||||
by Ryan King, et al
|
||||
http://code.google.com/p/html5lib
|
||||
|
||||
== Basic Usage
|
||||
== DESCRIPTION:
|
||||
|
||||
require 'html5lib'
|
||||
A ruby implementation of the parsing algorithm in HTML5.
|
||||
|
||||
doc = HTML5lib.parse('<html>...</html>')
|
||||
|
||||
doc.class # REXML::Document
|
||||
== FEATURES/PROBLEMS:
|
||||
|
||||
|
||||
|
||||
== SYNOPSIS:
|
||||
|
||||
TODO
|
||||
|
||||
== REQUIREMENTS:
|
||||
|
||||
* chardet, only tested with 0.9.0
|
||||
|
||||
== INSTALL:
|
||||
|
||||
* sudo gem install html5
|
||||
|
||||
== LICENSE:
|
||||
|
||||
Copyright (c) 2006-2007 The Authors
|
||||
|
||||
Contributers:
|
||||
James Graham - jg307@cam.ac.uk
|
||||
Anne van Kesteren - annevankesteren@gmail.com
|
||||
Lachlan Hunt - lachlan.hunt@lachy.id.au
|
||||
Matt McDonald - kanashii@kanashii.ca
|
||||
Sam Ruby - rubys@intertwingly.net
|
||||
Ian Hickson (Google) - ian@hixie.ch
|
||||
Thomas Broyer - t.broyer@ltgt.net
|
||||
Jacques Distler - distler@golem.ph.utexas.edu
|
||||
Ryan King - ryan@theryanking.com
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
|
34
vendor/plugins/HTML5lib/Rakefile.rb
vendored
34
vendor/plugins/HTML5lib/Rakefile.rb
vendored
|
@ -1,7 +1,33 @@
|
|||
require 'rake'
|
||||
require 'rake/testtask'
|
||||
require 'hoe'
|
||||
require 'lib/html5/version'
|
||||
|
||||
Rake::TestTask.new do |task|
|
||||
task.pattern = 'tests/test_*.rb'
|
||||
task.verbose = true
|
||||
Hoe.new("html5", HTML5::VERSION) do |p|
|
||||
p.name = "html5"
|
||||
p.description = p.paragraphs_of('README', 2..5).join("\n\n")
|
||||
p.summary = "HTML5 parser/tokenizer."
|
||||
|
||||
p.author = ['Ryan King'] # TODO: add more names
|
||||
p.email = 'ryan@theryanking.com'
|
||||
p.url = 'http://code.google.com/p/html5lib'
|
||||
p.need_zip = true
|
||||
|
||||
p.extra_deps << ['chardet', '>= 0.9.0']
|
||||
p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
|
||||
end
|
||||
|
||||
require 'rcov/rcovtask'
|
||||
|
||||
namespace :test do
|
||||
namespace :coverage do
|
||||
desc "Delete aggregate coverage data."
|
||||
task(:clean) { rm_f "coverage.data" }
|
||||
end
|
||||
desc 'Aggregate code coverage for unit, functional and integration tests'
|
||||
Rcov::RcovTask.new(:coverage => "test:coverage:clean") do |t|
|
||||
t.libs << "tests"
|
||||
t.test_files = FileList["tests/test_*.rb"]
|
||||
t.output_dir = "tests/coverage/"
|
||||
t.verbose = true
|
||||
end
|
||||
end
|
215
vendor/plugins/HTML5lib/bin/html5
vendored
Executable file
215
vendor/plugins/HTML5lib/bin/html5
vendored
Executable file
|
@ -0,0 +1,215 @@
|
|||
#!/usr/bin/env ruby
|
||||
|
||||
$:.unshift File.dirname(__FILE__), 'lib'
|
||||
|
||||
def parse(opts, args)
|
||||
encoding = nil
|
||||
|
||||
f = args[-1]
|
||||
if f
|
||||
begin
|
||||
if f[0..6] == 'http://'
|
||||
require 'open-uri'
|
||||
f = URI.parse(f).open
|
||||
encoding = f.charset
|
||||
elsif f == '-'
|
||||
f = $stdin
|
||||
else
|
||||
f = open(f)
|
||||
end
|
||||
rescue
|
||||
end
|
||||
else
|
||||
$stderr.write("No filename provided. Use -h for help\n")
|
||||
exit(1)
|
||||
end
|
||||
|
||||
require 'html5/treebuilders'
|
||||
treebuilder = HTML5::TreeBuilders[opts.treebuilder]
|
||||
|
||||
if opts.output == :xml
|
||||
require 'html5/liberalxmlparser'
|
||||
p = HTML5::XMLParser.new(:tree=>treebuilder)
|
||||
else
|
||||
require 'html5/html5parser'
|
||||
p = HTML5::HTMLParser.new(:tree=>treebuilder)
|
||||
end
|
||||
|
||||
if opts.parsemethod == :parse
|
||||
args = [f, encoding]
|
||||
else
|
||||
args = [f, 'div', encoding]
|
||||
end
|
||||
|
||||
if opts.profile
|
||||
require 'profiler'
|
||||
Profiler__::start_profile
|
||||
p.send(opts.parsemethod, *args)
|
||||
Profiler__::stop_profile
|
||||
Profiler__::print_profile($stderr)
|
||||
elsif opts.time
|
||||
require 'time' # TODO: switch to benchmark
|
||||
t0 = Time.new
|
||||
document = p.send(opts.parsemethod, *args)
|
||||
t1 = Time.new
|
||||
print_output(p, document, opts)
|
||||
t2 = Time.new
|
||||
puts "\n\nRun took: %fs (plus %fs to print the output)"%[t1-t0, t2-t1]
|
||||
else
|
||||
document = p.send(opts.parsemethod, *args)
|
||||
print_output(p, document, opts)
|
||||
end
|
||||
end
|
||||
|
||||
def print_output(parser, document, opts)
|
||||
puts "Encoding: #{parser.tokenizer.stream.char_encoding}" if opts.encoding
|
||||
|
||||
case opts.output
|
||||
when :xml
|
||||
print document
|
||||
when :html
|
||||
require 'html5/treewalkers'
|
||||
tokens = HTML5::TreeWalkers[opts.treebuilder].new(document)
|
||||
require 'html5/serializer'
|
||||
puts HTML5::HTMLSerializer.serialize(tokens, opts.serializer)
|
||||
when :hilite
|
||||
print document.hilite
|
||||
when :tree
|
||||
document = [document] unless document.respond_to?(:each)
|
||||
document.each {|fragment| puts parser.tree.testSerializer(fragment)}
|
||||
end
|
||||
|
||||
if opts.error
|
||||
errList=[]
|
||||
for pos, message in parser.errors
|
||||
errList << ("Line %i Col %i"%pos + " " + message)
|
||||
end
|
||||
$stdout.write("\nParse errors:\n" + errList.join("\n")+"\n")
|
||||
end
|
||||
end
|
||||
|
||||
require 'ostruct'
|
||||
options = OpenStruct.new
|
||||
options.profile = false
|
||||
options.time = false
|
||||
options.output = :html
|
||||
options.treebuilder = 'simpletree'
|
||||
options.error = false
|
||||
options.encoding = false
|
||||
options.parsemethod = :parse
|
||||
options.serializer = {
|
||||
:encoding => 'utf-8',
|
||||
:omit_optional_tags => false,
|
||||
:inject_meta_charset => false
|
||||
}
|
||||
|
||||
require 'optparse'
|
||||
opts = OptionParser.new do |opts|
|
||||
opts.separator ""
|
||||
opts.separator "Parse Options:"
|
||||
|
||||
opts.on("-b", "--treebuilder NAME") do |treebuilder|
|
||||
options.treebuilder = treebuilder
|
||||
end
|
||||
|
||||
opts.on("-f", "--fragment", "Parse as a fragment") do |parse|
|
||||
options.parsemethod = :parse_fragment
|
||||
end
|
||||
|
||||
opts.separator ""
|
||||
opts.separator "Filter Options:"
|
||||
|
||||
opts.on("--[no-]inject-meta-charset", "inject <meta charset>") do |inject|
|
||||
options.serializer[:inject_meta_charset] = inject
|
||||
end
|
||||
|
||||
opts.on("--[no-]strip-whitespace", "strip unnecessary whitespace") do |strip|
|
||||
options.serializer[:strip_whitespace] = strip
|
||||
end
|
||||
|
||||
opts.on("--[no-]sanitize", "escape unsafe tags") do |sanitize|
|
||||
options.serializer[:sanitize] = sanitize
|
||||
end
|
||||
|
||||
opts.separator ""
|
||||
opts.separator "Output Options:"
|
||||
|
||||
opts.on("--tree", "output as debug tree") do |tree|
|
||||
options.output = :tree
|
||||
end
|
||||
|
||||
opts.on("-x", "--xml", "output as xml") do |xml|
|
||||
options.output = :xml
|
||||
options.treebuilder = "rexml"
|
||||
end
|
||||
|
||||
opts.on("--[no-]html", "Output as html") do |html|
|
||||
options.output = (html ? :html : nil)
|
||||
end
|
||||
|
||||
opts.on("--hilite", "Output as formatted highlighted code.") do |hilite|
|
||||
options.output = :hilite
|
||||
end
|
||||
|
||||
opts.on("-e", "--error", "Print a list of parse errors") do |error|
|
||||
options.error = error
|
||||
end
|
||||
|
||||
opts.separator ""
|
||||
opts.separator "Serialization Options:"
|
||||
|
||||
opts.on("--[no-]omit-optional-tags", "Omit optional tags") do |omit|
|
||||
options.serializer[:omit_optional_tags] = omit
|
||||
end
|
||||
|
||||
opts.on("--[no-]quote-attr-values", "Quote attribute values") do |quote|
|
||||
options.serializer[:quote_attr_values] = quote
|
||||
end
|
||||
|
||||
opts.on("--[no-]use-best-quote-char", "Use best quote character") do |best|
|
||||
options.serializer[:use_best_quote_char] = best
|
||||
end
|
||||
|
||||
opts.on("--quote-char C", "Use specified quote character") do |c|
|
||||
options.serializer[:quote_char] = c
|
||||
end
|
||||
|
||||
opts.on("--[no-]minimize-boolean-attributes", "Minimize boolean attributes") do |min|
|
||||
options.serializer[:minimize_boolean_attributes] = min
|
||||
end
|
||||
|
||||
opts.on("--[no-]use-trailing-solidus", "Use trailing solidus") do |slash|
|
||||
options.serializer[:use_trailing_solidus] = slash
|
||||
end
|
||||
|
||||
opts.on("--[no-]escape-lt-in-attrs", "Escape less than signs in attribute values") do |lt|
|
||||
options.serializer[:escape_lt_in_attrs] = lt
|
||||
end
|
||||
|
||||
opts.on("--[no-]escape-rcdata", "Escape rcdata element values") do |rcdata|
|
||||
options.serializer[:escape_rcdata] = rcdata
|
||||
end
|
||||
|
||||
opts.separator ""
|
||||
opts.separator "Other Options:"
|
||||
|
||||
opts.on("-p", "--[no-]profile", "Profile the run") do |profile|
|
||||
options.profile = profile
|
||||
end
|
||||
|
||||
opts.on("-t", "--[no-]time", "Time the run") do |time|
|
||||
options.time = time
|
||||
end
|
||||
|
||||
opts.on("-c", "--[no-]encoding", "Print character encoding used") do |encoding|
|
||||
options.encoding = encoding
|
||||
end
|
||||
|
||||
opts.on_tail("-h", "--help", "Show this message") do
|
||||
puts opts
|
||||
exit
|
||||
end
|
||||
end
|
||||
|
||||
opts.parse!(ARGV)
|
||||
parse options, ARGV
|
13
vendor/plugins/HTML5lib/lib/html5.rb
vendored
Normal file
13
vendor/plugins/HTML5lib/lib/html5.rb
vendored
Normal file
|
@ -0,0 +1,13 @@
|
|||
require 'html5/html5parser'
|
||||
require 'html5/version'
|
||||
|
||||
module HTML5
|
||||
|
||||
def self.parse(stream, options={})
|
||||
HTMLParser.parse(stream, options)
|
||||
end
|
||||
|
||||
def self.parse_fragment(stream, options={})
|
||||
HTMLParser.parse(stream, options)
|
||||
end
|
||||
end
|
818
vendor/plugins/HTML5lib/lib/html5/constants.rb
vendored
Executable file
818
vendor/plugins/HTML5lib/lib/html5/constants.rb
vendored
Executable file
|
@ -0,0 +1,818 @@
|
|||
module HTML5
|
||||
|
||||
class EOF < Exception; end
|
||||
|
||||
CONTENT_MODEL_FLAGS = [
|
||||
:PCDATA,
|
||||
:RCDATA,
|
||||
:CDATA,
|
||||
:PLAINTEXT
|
||||
]
|
||||
|
||||
SCOPING_ELEMENTS = %w[
|
||||
button
|
||||
caption
|
||||
html
|
||||
marquee
|
||||
object
|
||||
table
|
||||
td
|
||||
th
|
||||
]
|
||||
|
||||
FORMATTING_ELEMENTS = %w[
|
||||
a
|
||||
b
|
||||
big
|
||||
em
|
||||
font
|
||||
i
|
||||
nobr
|
||||
s
|
||||
small
|
||||
strike
|
||||
strong
|
||||
tt
|
||||
u
|
||||
]
|
||||
|
||||
SPECIAL_ELEMENTS = %w[
|
||||
address
|
||||
area
|
||||
base
|
||||
basefont
|
||||
bgsound
|
||||
blockquote
|
||||
body
|
||||
br
|
||||
center
|
||||
col
|
||||
colgroup
|
||||
dd
|
||||
dir
|
||||
div
|
||||
dl
|
||||
dt
|
||||
embed
|
||||
fieldset
|
||||
form
|
||||
frame
|
||||
frameset
|
||||
h1
|
||||
h2
|
||||
h3
|
||||
h4
|
||||
h5
|
||||
h6
|
||||
head
|
||||
hr
|
||||
iframe
|
||||
image
|
||||
img
|
||||
input
|
||||
isindex
|
||||
li
|
||||
link
|
||||
listing
|
||||
menu
|
||||
meta
|
||||
noembed
|
||||
noframes
|
||||
noscript
|
||||
ol
|
||||
optgroup
|
||||
option
|
||||
p
|
||||
param
|
||||
plaintext
|
||||
pre
|
||||
script
|
||||
select
|
||||
spacer
|
||||
style
|
||||
tbody
|
||||
textarea
|
||||
tfoot
|
||||
thead
|
||||
title
|
||||
tr
|
||||
ul
|
||||
wbr
|
||||
]
|
||||
|
||||
SPACE_CHARACTERS = %W[
|
||||
\t
|
||||
\n
|
||||
\x0B
|
||||
\x0C
|
||||
\x20
|
||||
\r
|
||||
]
|
||||
|
||||
TABLE_INSERT_MODE_ELEMENTS = %w[
|
||||
table
|
||||
tbody
|
||||
tfoot
|
||||
thead
|
||||
tr
|
||||
]
|
||||
|
||||
ASCII_LOWERCASE = ('a'..'z').to_a.join('')
|
||||
ASCII_UPPERCASE = ('A'..'Z').to_a.join('')
|
||||
ASCII_LETTERS = ASCII_LOWERCASE + ASCII_UPPERCASE
|
||||
DIGITS = '0'..'9'
|
||||
HEX_DIGITS = DIGITS.to_a + ('a'..'f').to_a + ('A'..'F').to_a
|
||||
|
||||
# Heading elements need to be ordered
|
||||
HEADING_ELEMENTS = %w[
|
||||
h1
|
||||
h2
|
||||
h3
|
||||
h4
|
||||
h5
|
||||
h6
|
||||
]
|
||||
|
||||
# XXX What about event-source and command?
|
||||
VOID_ELEMENTS = %w[
|
||||
base
|
||||
link
|
||||
meta
|
||||
hr
|
||||
br
|
||||
img
|
||||
embed
|
||||
param
|
||||
area
|
||||
col
|
||||
input
|
||||
]
|
||||
|
||||
CDATA_ELEMENTS = %w[title textarea]
|
||||
|
||||
RCDATA_ELEMENTS = %w[
|
||||
style
|
||||
script
|
||||
xmp
|
||||
iframe
|
||||
noembed
|
||||
noframes
|
||||
noscript
|
||||
]
|
||||
|
||||
BOOLEAN_ATTRIBUTES = {
|
||||
:global => %w[irrelevant],
|
||||
'style' => %w[scoped],
|
||||
'img' => %w[ismap],
|
||||
'audio' => %w[autoplay controls],
|
||||
'video' => %w[autoplay controls],
|
||||
'script' => %w[defer async],
|
||||
'details' => %w[open],
|
||||
'datagrid' => %w[multiple disabled],
|
||||
'command' => %w[hidden disabled checked default],
|
||||
'menu' => %w[autosubmit],
|
||||
'fieldset' => %w[disabled readonly],
|
||||
'option' => %w[disabled readonly selected],
|
||||
'optgroup' => %w[disabled readonly],
|
||||
'button' => %w[disabled autofocus],
|
||||
'input' => %w[disabled readonly required autofocus checked ismap],
|
||||
'select' => %w[disabled readonly autofocus multiple],
|
||||
'output' => %w[disabled readonly]
|
||||
|
||||
}
|
||||
|
||||
# entitiesWindows1252 has to be _ordered_ and needs to have an index.
|
||||
ENTITIES_WINDOWS1252 = [
|
||||
8364, # 0x80 0x20AC EURO SIGN
|
||||
65533, # 0x81 UNDEFINED
|
||||
8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK
|
||||
402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK
|
||||
8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK
|
||||
8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS
|
||||
8224, # 0x86 0x2020 DAGGER
|
||||
8225, # 0x87 0x2021 DOUBLE DAGGER
|
||||
710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
8240, # 0x89 0x2030 PER MILLE SIGN
|
||||
352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON
|
||||
8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE
|
||||
65533, # 0x8D UNDEFINED
|
||||
381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON
|
||||
65533, # 0x8F UNDEFINED
|
||||
65533, # 0x90 UNDEFINED
|
||||
8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK
|
||||
8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK
|
||||
8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK
|
||||
8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK
|
||||
8226, # 0x95 0x2022 BULLET
|
||||
8211, # 0x96 0x2013 EN DASH
|
||||
8212, # 0x97 0x2014 EM DASH
|
||||
732, # 0x98 0x02DC SMALL TILDE
|
||||
8482, # 0x99 0x2122 TRADE MARK SIGN
|
||||
353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON
|
||||
8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE
|
||||
65533, # 0x9D UNDEFINED
|
||||
382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON
|
||||
376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
]
|
||||
|
||||
# ENTITIES was generated from Python using the following code:
|
||||
#
|
||||
# import constants
|
||||
# entities = constants.entities.items()
|
||||
# entities.sort()
|
||||
# list = [ ' '.join([repr(entity), '=>', ord(value)<128 and
|
||||
# repr(str(value)) or repr(value.encode('utf-8')).replace("'",'"')])
|
||||
# for entity, value in entities]
|
||||
# print ' ENTITIES = {\n ' + ',\n '.join(list) + '\n }'
|
||||
|
||||
ENTITIES = {
|
||||
'AElig' => "\xc3\x86",
|
||||
'AElig;' => "\xc3\x86",
|
||||
'AMP' => '&',
|
||||
'AMP;' => '&',
|
||||
'Aacute' => "\xc3\x81",
|
||||
'Aacute;' => "\xc3\x81",
|
||||
'Acirc' => "\xc3\x82",
|
||||
'Acirc;' => "\xc3\x82",
|
||||
'Agrave' => "\xc3\x80",
|
||||
'Agrave;' => "\xc3\x80",
|
||||
'Alpha;' => "\xce\x91",
|
||||
'Aring' => "\xc3\x85",
|
||||
'Aring;' => "\xc3\x85",
|
||||
'Atilde' => "\xc3\x83",
|
||||
'Atilde;' => "\xc3\x83",
|
||||
'Auml' => "\xc3\x84",
|
||||
'Auml;' => "\xc3\x84",
|
||||
'Beta;' => "\xce\x92",
|
||||
'COPY' => "\xc2\xa9",
|
||||
'COPY;' => "\xc2\xa9",
|
||||
'Ccedil' => "\xc3\x87",
|
||||
'Ccedil;' => "\xc3\x87",
|
||||
'Chi;' => "\xce\xa7",
|
||||
'Dagger;' => "\xe2\x80\xa1",
|
||||
'Delta;' => "\xce\x94",
|
||||
'ETH' => "\xc3\x90",
|
||||
'ETH;' => "\xc3\x90",
|
||||
'Eacute' => "\xc3\x89",
|
||||
'Eacute;' => "\xc3\x89",
|
||||
'Ecirc' => "\xc3\x8a",
|
||||
'Ecirc;' => "\xc3\x8a",
|
||||
'Egrave' => "\xc3\x88",
|
||||
'Egrave;' => "\xc3\x88",
|
||||
'Epsilon;' => "\xce\x95",
|
||||
'Eta;' => "\xce\x97",
|
||||
'Euml' => "\xc3\x8b",
|
||||
'Euml;' => "\xc3\x8b",
|
||||
'GT' => '>',
|
||||
'GT;' => '>',
|
||||
'Gamma;' => "\xce\x93",
|
||||
'Iacute' => "\xc3\x8d",
|
||||
'Iacute;' => "\xc3\x8d",
|
||||
'Icirc' => "\xc3\x8e",
|
||||
'Icirc;' => "\xc3\x8e",
|
||||
'Igrave' => "\xc3\x8c",
|
||||
'Igrave;' => "\xc3\x8c",
|
||||
'Iota;' => "\xce\x99",
|
||||
'Iuml' => "\xc3\x8f",
|
||||
'Iuml;' => "\xc3\x8f",
|
||||
'Kappa;' => "\xce\x9a",
|
||||
'LT' => '<',
|
||||
'LT;' => '<',
|
||||
'Lambda;' => "\xce\x9b",
|
||||
'Mu;' => "\xce\x9c",
|
||||
'Ntilde' => "\xc3\x91",
|
||||
'Ntilde;' => "\xc3\x91",
|
||||
'Nu;' => "\xce\x9d",
|
||||
'OElig;' => "\xc5\x92",
|
||||
'Oacute' => "\xc3\x93",
|
||||
'Oacute;' => "\xc3\x93",
|
||||
'Ocirc' => "\xc3\x94",
|
||||
'Ocirc;' => "\xc3\x94",
|
||||
'Ograve' => "\xc3\x92",
|
||||
'Ograve;' => "\xc3\x92",
|
||||
'Omega;' => "\xce\xa9",
|
||||
'Omicron;' => "\xce\x9f",
|
||||
'Oslash' => "\xc3\x98",
|
||||
'Oslash;' => "\xc3\x98",
|
||||
'Otilde' => "\xc3\x95",
|
||||
'Otilde;' => "\xc3\x95",
|
||||
'Ouml' => "\xc3\x96",
|
||||
'Ouml;' => "\xc3\x96",
|
||||
'Phi;' => "\xce\xa6",
|
||||
'Pi;' => "\xce\xa0",
|
||||
'Prime;' => "\xe2\x80\xb3",
|
||||
'Psi;' => "\xce\xa8",
|
||||
'QUOT' => '"',
|
||||
'QUOT;' => '"',
|
||||
'REG' => "\xc2\xae",
|
||||
'REG;' => "\xc2\xae",
|
||||
'Rho;' => "\xce\xa1",
|
||||
'Scaron;' => "\xc5\xa0",
|
||||
'Sigma;' => "\xce\xa3",
|
||||
'THORN' => "\xc3\x9e",
|
||||
'THORN;' => "\xc3\x9e",
|
||||
'TRADE;' => "\xe2\x84\xa2",
|
||||
'Tau;' => "\xce\xa4",
|
||||
'Theta;' => "\xce\x98",
|
||||
'Uacute' => "\xc3\x9a",
|
||||
'Uacute;' => "\xc3\x9a",
|
||||
'Ucirc' => "\xc3\x9b",
|
||||
'Ucirc;' => "\xc3\x9b",
|
||||
'Ugrave' => "\xc3\x99",
|
||||
'Ugrave;' => "\xc3\x99",
|
||||
'Upsilon;' => "\xce\xa5",
|
||||
'Uuml' => "\xc3\x9c",
|
||||
'Uuml;' => "\xc3\x9c",
|
||||
'Xi;' => "\xce\x9e",
|
||||
'Yacute' => "\xc3\x9d",
|
||||
'Yacute;' => "\xc3\x9d",
|
||||
'Yuml;' => "\xc5\xb8",
|
||||
'Zeta;' => "\xce\x96",
|
||||
'aacute' => "\xc3\xa1",
|
||||
'aacute;' => "\xc3\xa1",
|
||||
'acirc' => "\xc3\xa2",
|
||||
'acirc;' => "\xc3\xa2",
|
||||
'acute' => "\xc2\xb4",
|
||||
'acute;' => "\xc2\xb4",
|
||||
'aelig' => "\xc3\xa6",
|
||||
'aelig;' => "\xc3\xa6",
|
||||
'agrave' => "\xc3\xa0",
|
||||
'agrave;' => "\xc3\xa0",
|
||||
'alefsym;' => "\xe2\x84\xb5",
|
||||
'alpha;' => "\xce\xb1",
|
||||
'amp' => '&',
|
||||
'amp;' => '&',
|
||||
'and;' => "\xe2\x88\xa7",
|
||||
'ang;' => "\xe2\x88\xa0",
|
||||
'apos;' => "'",
|
||||
'aring' => "\xc3\xa5",
|
||||
'aring;' => "\xc3\xa5",
|
||||
'asymp;' => "\xe2\x89\x88",
|
||||
'atilde' => "\xc3\xa3",
|
||||
'atilde;' => "\xc3\xa3",
|
||||
'auml' => "\xc3\xa4",
|
||||
'auml;' => "\xc3\xa4",
|
||||
'bdquo;' => "\xe2\x80\x9e",
|
||||
'beta;' => "\xce\xb2",
|
||||
'brvbar' => "\xc2\xa6",
|
||||
'brvbar;' => "\xc2\xa6",
|
||||
'bull;' => "\xe2\x80\xa2",
|
||||
'cap;' => "\xe2\x88\xa9",
|
||||
'ccedil' => "\xc3\xa7",
|
||||
'ccedil;' => "\xc3\xa7",
|
||||
'cedil' => "\xc2\xb8",
|
||||
'cedil;' => "\xc2\xb8",
|
||||
'cent' => "\xc2\xa2",
|
||||
'cent;' => "\xc2\xa2",
|
||||
'chi;' => "\xcf\x87",
|
||||
'circ;' => "\xcb\x86",
|
||||
'clubs;' => "\xe2\x99\xa3",
|
||||
'cong;' => "\xe2\x89\x85",
|
||||
'copy' => "\xc2\xa9",
|
||||
'copy;' => "\xc2\xa9",
|
||||
'crarr;' => "\xe2\x86\xb5",
|
||||
'cup;' => "\xe2\x88\xaa",
|
||||
'curren' => "\xc2\xa4",
|
||||
'curren;' => "\xc2\xa4",
|
||||
'dArr;' => "\xe2\x87\x93",
|
||||
'dagger;' => "\xe2\x80\xa0",
|
||||
'darr;' => "\xe2\x86\x93",
|
||||
'deg' => "\xc2\xb0",
|
||||
'deg;' => "\xc2\xb0",
|
||||
'delta;' => "\xce\xb4",
|
||||
'diams;' => "\xe2\x99\xa6",
|
||||
'divide' => "\xc3\xb7",
|
||||
'divide;' => "\xc3\xb7",
|
||||
'eacute' => "\xc3\xa9",
|
||||
'eacute;' => "\xc3\xa9",
|
||||
'ecirc' => "\xc3\xaa",
|
||||
'ecirc;' => "\xc3\xaa",
|
||||
'egrave' => "\xc3\xa8",
|
||||
'egrave;' => "\xc3\xa8",
|
||||
'empty;' => "\xe2\x88\x85",
|
||||
'emsp;' => "\xe2\x80\x83",
|
||||
'ensp;' => "\xe2\x80\x82",
|
||||
'epsilon;' => "\xce\xb5",
|
||||
'equiv;' => "\xe2\x89\xa1",
|
||||
'eta;' => "\xce\xb7",
|
||||
'eth' => "\xc3\xb0",
|
||||
'eth;' => "\xc3\xb0",
|
||||
'euml' => "\xc3\xab",
|
||||
'euml;' => "\xc3\xab",
|
||||
'euro;' => "\xe2\x82\xac",
|
||||
'exist;' => "\xe2\x88\x83",
|
||||
'fnof;' => "\xc6\x92",
|
||||
'forall;' => "\xe2\x88\x80",
|
||||
'frac12' => "\xc2\xbd",
|
||||
'frac12;' => "\xc2\xbd",
|
||||
'frac14' => "\xc2\xbc",
|
||||
'frac14;' => "\xc2\xbc",
|
||||
'frac34' => "\xc2\xbe",
|
||||
'frac34;' => "\xc2\xbe",
|
||||
'frasl;' => "\xe2\x81\x84",
|
||||
'gamma;' => "\xce\xb3",
|
||||
'ge;' => "\xe2\x89\xa5",
|
||||
'gt' => '>',
|
||||
'gt;' => '>',
|
||||
'hArr;' => "\xe2\x87\x94",
|
||||
'harr;' => "\xe2\x86\x94",
|
||||
'hearts;' => "\xe2\x99\xa5",
|
||||
'hellip;' => "\xe2\x80\xa6",
|
||||
'iacute' => "\xc3\xad",
|
||||
'iacute;' => "\xc3\xad",
|
||||
'icirc' => "\xc3\xae",
|
||||
'icirc;' => "\xc3\xae",
|
||||
'iexcl' => "\xc2\xa1",
|
||||
'iexcl;' => "\xc2\xa1",
|
||||
'igrave' => "\xc3\xac",
|
||||
'igrave;' => "\xc3\xac",
|
||||
'image;' => "\xe2\x84\x91",
|
||||
'infin;' => "\xe2\x88\x9e",
|
||||
'int;' => "\xe2\x88\xab",
|
||||
'iota;' => "\xce\xb9",
|
||||
'iquest' => "\xc2\xbf",
|
||||
'iquest;' => "\xc2\xbf",
|
||||
'isin;' => "\xe2\x88\x88",
|
||||
'iuml' => "\xc3\xaf",
|
||||
'iuml;' => "\xc3\xaf",
|
||||
'kappa;' => "\xce\xba",
|
||||
'lArr;' => "\xe2\x87\x90",
|
||||
'lambda;' => "\xce\xbb",
|
||||
'lang;' => "\xe3\x80\x88",
|
||||
'laquo' => "\xc2\xab",
|
||||
'laquo;' => "\xc2\xab",
|
||||
'larr;' => "\xe2\x86\x90",
|
||||
'lceil;' => "\xe2\x8c\x88",
|
||||
'ldquo;' => "\xe2\x80\x9c",
|
||||
'le;' => "\xe2\x89\xa4",
|
||||
'lfloor;' => "\xe2\x8c\x8a",
|
||||
'lowast;' => "\xe2\x88\x97",
|
||||
'loz;' => "\xe2\x97\x8a",
|
||||
'lrm;' => "\xe2\x80\x8e",
|
||||
'lsaquo;' => "\xe2\x80\xb9",
|
||||
'lsquo;' => "\xe2\x80\x98",
|
||||
'lt' => '<',
|
||||
'lt;' => '<',
|
||||
'macr' => "\xc2\xaf",
|
||||
'macr;' => "\xc2\xaf",
|
||||
'mdash;' => "\xe2\x80\x94",
|
||||
'micro' => "\xc2\xb5",
|
||||
'micro;' => "\xc2\xb5",
|
||||
'middot' => "\xc2\xb7",
|
||||
'middot;' => "\xc2\xb7",
|
||||
'minus;' => "\xe2\x88\x92",
|
||||
'mu;' => "\xce\xbc",
|
||||
'nabla;' => "\xe2\x88\x87",
|
||||
'nbsp' => "\xc2\xa0",
|
||||
'nbsp;' => "\xc2\xa0",
|
||||
'ndash;' => "\xe2\x80\x93",
|
||||
'ne;' => "\xe2\x89\xa0",
|
||||
'ni;' => "\xe2\x88\x8b",
|
||||
'not' => "\xc2\xac",
|
||||
'not;' => "\xc2\xac",
|
||||
'notin;' => "\xe2\x88\x89",
|
||||
'nsub;' => "\xe2\x8a\x84",
|
||||
'ntilde' => "\xc3\xb1",
|
||||
'ntilde;' => "\xc3\xb1",
|
||||
'nu;' => "\xce\xbd",
|
||||
'oacute' => "\xc3\xb3",
|
||||
'oacute;' => "\xc3\xb3",
|
||||
'ocirc' => "\xc3\xb4",
|
||||
'ocirc;' => "\xc3\xb4",
|
||||
'oelig;' => "\xc5\x93",
|
||||
'ograve' => "\xc3\xb2",
|
||||
'ograve;' => "\xc3\xb2",
|
||||
'oline;' => "\xe2\x80\xbe",
|
||||
'omega;' => "\xcf\x89",
|
||||
'omicron;' => "\xce\xbf",
|
||||
'oplus;' => "\xe2\x8a\x95",
|
||||
'or;' => "\xe2\x88\xa8",
|
||||
'ordf' => "\xc2\xaa",
|
||||
'ordf;' => "\xc2\xaa",
|
||||
'ordm' => "\xc2\xba",
|
||||
'ordm;' => "\xc2\xba",
|
||||
'oslash' => "\xc3\xb8",
|
||||
'oslash;' => "\xc3\xb8",
|
||||
'otilde' => "\xc3\xb5",
|
||||
'otilde;' => "\xc3\xb5",
|
||||
'otimes;' => "\xe2\x8a\x97",
|
||||
'ouml' => "\xc3\xb6",
|
||||
'ouml;' => "\xc3\xb6",
|
||||
'para' => "\xc2\xb6",
|
||||
'para;' => "\xc2\xb6",
|
||||
'part;' => "\xe2\x88\x82",
|
||||
'permil;' => "\xe2\x80\xb0",
|
||||
'perp;' => "\xe2\x8a\xa5",
|
||||
'phi;' => "\xcf\x86",
|
||||
'pi;' => "\xcf\x80",
|
||||
'piv;' => "\xcf\x96",
|
||||
'plusmn' => "\xc2\xb1",
|
||||
'plusmn;' => "\xc2\xb1",
|
||||
'pound' => "\xc2\xa3",
|
||||
'pound;' => "\xc2\xa3",
|
||||
'prime;' => "\xe2\x80\xb2",
|
||||
'prod;' => "\xe2\x88\x8f",
|
||||
'prop;' => "\xe2\x88\x9d",
|
||||
'psi;' => "\xcf\x88",
|
||||
'quot' => '"',
|
||||
'quot;' => '"',
|
||||
'rArr;' => "\xe2\x87\x92",
|
||||
'radic;' => "\xe2\x88\x9a",
|
||||
'rang;' => "\xe3\x80\x89",
|
||||
'raquo' => "\xc2\xbb",
|
||||
'raquo;' => "\xc2\xbb",
|
||||
'rarr;' => "\xe2\x86\x92",
|
||||
'rceil;' => "\xe2\x8c\x89",
|
||||
'rdquo;' => "\xe2\x80\x9d",
|
||||
'real;' => "\xe2\x84\x9c",
|
||||
'reg' => "\xc2\xae",
|
||||
'reg;' => "\xc2\xae",
|
||||
'rfloor;' => "\xe2\x8c\x8b",
|
||||
'rho;' => "\xcf\x81",
|
||||
'rlm;' => "\xe2\x80\x8f",
|
||||
'rsaquo;' => "\xe2\x80\xba",
|
||||
'rsquo;' => "\xe2\x80\x99",
|
||||
'sbquo;' => "\xe2\x80\x9a",
|
||||
'scaron;' => "\xc5\xa1",
|
||||
'sdot;' => "\xe2\x8b\x85",
|
||||
'sect' => "\xc2\xa7",
|
||||
'sect;' => "\xc2\xa7",
|
||||
'shy' => "\xc2\xad",
|
||||
'shy;' => "\xc2\xad",
|
||||
'sigma;' => "\xcf\x83",
|
||||
'sigmaf;' => "\xcf\x82",
|
||||
'sim;' => "\xe2\x88\xbc",
|
||||
'spades;' => "\xe2\x99\xa0",
|
||||
'sub;' => "\xe2\x8a\x82",
|
||||
'sube;' => "\xe2\x8a\x86",
|
||||
'sum;' => "\xe2\x88\x91",
|
||||
'sup1' => "\xc2\xb9",
|
||||
'sup1;' => "\xc2\xb9",
|
||||
'sup2' => "\xc2\xb2",
|
||||
'sup2;' => "\xc2\xb2",
|
||||
'sup3' => "\xc2\xb3",
|
||||
'sup3;' => "\xc2\xb3",
|
||||
'sup;' => "\xe2\x8a\x83",
|
||||
'supe;' => "\xe2\x8a\x87",
|
||||
'szlig' => "\xc3\x9f",
|
||||
'szlig;' => "\xc3\x9f",
|
||||
'tau;' => "\xcf\x84",
|
||||
'there4;' => "\xe2\x88\xb4",
|
||||
'theta;' => "\xce\xb8",
|
||||
'thetasym;' => "\xcf\x91",
|
||||
'thinsp;' => "\xe2\x80\x89",
|
||||
'thorn' => "\xc3\xbe",
|
||||
'thorn;' => "\xc3\xbe",
|
||||
'tilde;' => "\xcb\x9c",
|
||||
'times' => "\xc3\x97",
|
||||
'times;' => "\xc3\x97",
|
||||
'trade;' => "\xe2\x84\xa2",
|
||||
'uArr;' => "\xe2\x87\x91",
|
||||
'uacute' => "\xc3\xba",
|
||||
'uacute;' => "\xc3\xba",
|
||||
'uarr;' => "\xe2\x86\x91",
|
||||
'ucirc' => "\xc3\xbb",
|
||||
'ucirc;' => "\xc3\xbb",
|
||||
'ugrave' => "\xc3\xb9",
|
||||
'ugrave;' => "\xc3\xb9",
|
||||
'uml' => "\xc2\xa8",
|
||||
'uml;' => "\xc2\xa8",
|
||||
'upsih;' => "\xcf\x92",
|
||||
'upsilon;' => "\xcf\x85",
|
||||
'uuml' => "\xc3\xbc",
|
||||
'uuml;' => "\xc3\xbc",
|
||||
'weierp;' => "\xe2\x84\x98",
|
||||
'xi;' => "\xce\xbe",
|
||||
'yacute' => "\xc3\xbd",
|
||||
'yacute;' => "\xc3\xbd",
|
||||
'yen' => "\xc2\xa5",
|
||||
'yen;' => "\xc2\xa5",
|
||||
'yuml' => "\xc3\xbf",
|
||||
'yuml;' => "\xc3\xbf",
|
||||
'zeta;' => "\xce\xb6",
|
||||
'zwj;' => "\xe2\x80\x8d",
|
||||
'zwnj;' => "\xe2\x80\x8c"
|
||||
}
|
||||
|
||||
ENCODINGS = %w[
|
||||
ansi_x3.4-1968
|
||||
iso-ir-6
|
||||
ansi_x3.4-1986
|
||||
iso_646.irv:1991
|
||||
ascii
|
||||
iso646-us
|
||||
us-ascii
|
||||
us
|
||||
ibm367
|
||||
cp367
|
||||
csascii
|
||||
ks_c_5601-1987
|
||||
korean
|
||||
iso-2022-kr
|
||||
csiso2022kr
|
||||
euc-kr
|
||||
iso-2022-jp
|
||||
csiso2022jp
|
||||
iso-2022-jp-2
|
||||
iso-ir-58
|
||||
chinese
|
||||
csiso58gb231280
|
||||
iso_8859-1:1987
|
||||
iso-ir-100
|
||||
iso_8859-1
|
||||
iso-8859-1
|
||||
latin1
|
||||
l1
|
||||
ibm819
|
||||
cp819
|
||||
csisolatin1
|
||||
iso_8859-2:1987
|
||||
iso-ir-101
|
||||
iso_8859-2
|
||||
iso-8859-2
|
||||
latin2
|
||||
l2
|
||||
csisolatin2
|
||||
iso_8859-3:1988
|
||||
iso-ir-109
|
||||
iso_8859-3
|
||||
iso-8859-3
|
||||
latin3
|
||||
l3
|
||||
csisolatin3
|
||||
iso_8859-4:1988
|
||||
iso-ir-110
|
||||
iso_8859-4
|
||||
iso-8859-4
|
||||
latin4
|
||||
l4
|
||||
csisolatin4
|
||||
iso_8859-6:1987
|
||||
iso-ir-127
|
||||
iso_8859-6
|
||||
iso-8859-6
|
||||
ecma-114
|
||||
asmo-708
|
||||
arabic
|
||||
csisolatinarabic
|
||||
iso_8859-7:1987
|
||||
iso-ir-126
|
||||
iso_8859-7
|
||||
iso-8859-7
|
||||
elot_928
|
||||
ecma-118
|
||||
greek
|
||||
greek8
|
||||
csisolatingreek
|
||||
iso_8859-8:1988
|
||||
iso-ir-138
|
||||
iso_8859-8
|
||||
iso-8859-8
|
||||
hebrew
|
||||
csisolatinhebrew
|
||||
iso_8859-5:1988
|
||||
iso-ir-144
|
||||
iso_8859-5
|
||||
iso-8859-5
|
||||
cyrillic
|
||||
csisolatincyrillic
|
||||
iso_8859-9:1989
|
||||
iso-ir-148
|
||||
iso_8859-9
|
||||
iso-8859-9
|
||||
latin5
|
||||
l5
|
||||
csisolatin5
|
||||
iso-8859-10
|
||||
iso-ir-157
|
||||
l6
|
||||
iso_8859-10:1992
|
||||
csisolatin6
|
||||
latin6
|
||||
hp-roman8
|
||||
roman8
|
||||
r8
|
||||
ibm037
|
||||
cp037
|
||||
csibm037
|
||||
ibm424
|
||||
cp424
|
||||
csibm424
|
||||
ibm437
|
||||
cp437
|
||||
437
|
||||
cspc8codepage437
|
||||
ibm500
|
||||
cp500
|
||||
csibm500
|
||||
ibm775
|
||||
cp775
|
||||
cspc775baltic
|
||||
ibm850
|
||||
cp850
|
||||
850
|
||||
cspc850multilingual
|
||||
ibm852
|
||||
cp852
|
||||
852
|
||||
cspcp852
|
||||
ibm855
|
||||
cp855
|
||||
855
|
||||
csibm855
|
||||
ibm857
|
||||
cp857
|
||||
857
|
||||
csibm857
|
||||
ibm860
|
||||
cp860
|
||||
860
|
||||
csibm860
|
||||
ibm861
|
||||
cp861
|
||||
861
|
||||
cp-is
|
||||
csibm861
|
||||
ibm862
|
||||
cp862
|
||||
862
|
||||
cspc862latinhebrew
|
||||
ibm863
|
||||
cp863
|
||||
863
|
||||
csibm863
|
||||
ibm864
|
||||
cp864
|
||||
csibm864
|
||||
ibm865
|
||||
cp865
|
||||
865
|
||||
csibm865
|
||||
ibm866
|
||||
cp866
|
||||
866
|
||||
csibm866
|
||||
ibm869
|
||||
cp869
|
||||
869
|
||||
cp-gr
|
||||
csibm869
|
||||
ibm1026
|
||||
cp1026
|
||||
csibm1026
|
||||
koi8-r
|
||||
cskoi8r
|
||||
koi8-u
|
||||
big5-hkscs
|
||||
ptcp154
|
||||
csptcp154
|
||||
pt154
|
||||
cp154
|
||||
utf-7
|
||||
utf-16be
|
||||
utf-16le
|
||||
utf-16
|
||||
utf-8
|
||||
iso-8859-13
|
||||
iso-8859-14
|
||||
iso-ir-199
|
||||
iso_8859-14:1998
|
||||
iso_8859-14
|
||||
latin8
|
||||
iso-celtic
|
||||
l8
|
||||
iso-8859-15
|
||||
iso_8859-15
|
||||
iso-8859-16
|
||||
iso-ir-226
|
||||
iso_8859-16:2001
|
||||
iso_8859-16
|
||||
latin10
|
||||
l10
|
||||
gbk
|
||||
cp936
|
||||
ms936
|
||||
gb18030
|
||||
shift_jis
|
||||
ms_kanji
|
||||
csshiftjis
|
||||
euc-jp
|
||||
gb2312
|
||||
big5
|
||||
csbig5
|
||||
windows-1250
|
||||
windows-1251
|
||||
windows-1252
|
||||
windows-1253
|
||||
windows-1254
|
||||
windows-1255
|
||||
windows-1256
|
||||
windows-1257
|
||||
windows-1258
|
||||
tis-620
|
||||
hz-gb-2312
|
||||
]
|
||||
|
||||
end
|
1
vendor/plugins/HTML5lib/lib/html5/filters.rb
vendored
Normal file
1
vendor/plugins/HTML5lib/lib/html5/filters.rb
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
require 'html5/filters/optionaltags'
|
|
@ -1,7 +1,7 @@
|
|||
require 'delegate'
|
||||
require 'enumerator'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
module Filters
|
||||
class Base < SimpleDelegator
|
||||
include Enumerable
|
|
@ -1,6 +1,6 @@
|
|||
require 'html5lib/filters/base'
|
||||
require 'html5/filters/base'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
module Filters
|
||||
class InjectMetaCharset < Base
|
||||
def initialize(source, encoding)
|
||||
|
@ -43,30 +43,27 @@ module HTML5lib
|
|||
end
|
||||
end
|
||||
|
||||
if not meta_found
|
||||
if has_http_equiv_content_type and content_index >= 0
|
||||
token[:data][content_index][1] =
|
||||
'text/html; charset=%s' % @encoding
|
||||
if !meta_found
|
||||
if has_http_equiv_content_type && content_index >= 0
|
||||
token[:data][content_index][1] = 'text/html; charset=%s' % @encoding
|
||||
meta_found = true
|
||||
end
|
||||
end
|
||||
|
||||
elsif token[:name].downcase == "head" and not meta_found
|
||||
elsif token[:name].downcase == "head" && !meta_found
|
||||
# insert meta into empty head
|
||||
yield(:type => :StartTag, :name => "head", :data => token[:data])
|
||||
yield(:type => :EmptyTag, :name => "meta",
|
||||
:data => [["charset", @encoding]])
|
||||
yield(:type => :EndTag, :name => "head")
|
||||
yield :type => :StartTag, :name => "head", :data => token[:data]
|
||||
yield :type => :EmptyTag, :name => "meta", :data => [["charset", @encoding]]
|
||||
yield :type => :EndTag, :name => "head"
|
||||
meta_found = true
|
||||
next
|
||||
end
|
||||
|
||||
when :EndTag
|
||||
if token[:name].downcase == "head" and pending.any?
|
||||
if token[:name].downcase == "head" && pending.any?
|
||||
# insert meta into head (if necessary) and flush pending queue
|
||||
yield pending.shift
|
||||
yield(:type => :EmptyTag, :name => "meta",
|
||||
:data => [["charset", @encoding]]) if not meta_found
|
||||
yield :type => :EmptyTag, :name => "meta", :data => [["charset", @encoding]] if !meta_found
|
||||
yield pending.shift while pending.any?
|
||||
meta_found = true
|
||||
state = :post_head
|
|
@ -1,7 +1,7 @@
|
|||
require 'html5lib/constants'
|
||||
require 'html5lib/filters/base'
|
||||
require 'html5/constants'
|
||||
require 'html5/filters/base'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
module Filters
|
||||
|
||||
class OptionalTagFilter < Base
|
||||
|
@ -75,8 +75,7 @@ module HTML5lib
|
|||
if type == :StartTag
|
||||
# omit the thead and tfoot elements' end tag when they are
|
||||
# immediately followed by a tbody element. See is_optional_end.
|
||||
if previous and previous[:type] == :EndTag and \
|
||||
%w(tbody thead tfoot).include?(previous[:name])
|
||||
if previous and previous[:type] == :EndTag && %w(tbody thead tfoot).include?(previous[:name])
|
||||
return false
|
||||
end
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
require 'html5lib/filters/base'
|
||||
require 'html5lib/sanitizer'
|
||||
require 'html5/filters/base'
|
||||
require 'html5/sanitizer'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
module Filters
|
||||
class HTMLSanitizeFilter < Base
|
||||
include HTMLSanitizeModule
|
|
@ -1,7 +1,7 @@
|
|||
require 'html5lib/constants'
|
||||
require 'html5lib/filters/base'
|
||||
require 'html5/constants'
|
||||
require 'html5/filters/base'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
module Filters
|
||||
class WhitespaceFilter < Base
|
||||
|
||||
|
@ -21,7 +21,7 @@ module HTML5lib
|
|||
preserve -= 1 if preserve > 0
|
||||
|
||||
when :SpaceCharacters
|
||||
next if preserve == 0
|
||||
token[:data] = " " if preserve == 0 && token[:data]
|
||||
|
||||
when :Characters
|
||||
token[:data] = token[:data].sub(SPACES,' ') if preserve == 0
|
|
@ -1,12 +1,12 @@
|
|||
require 'html5lib/constants'
|
||||
require 'html5lib/tokenizer'
|
||||
require 'html5lib/treebuilders/rexml'
|
||||
require 'html5/constants'
|
||||
require 'html5/tokenizer'
|
||||
require 'html5/treebuilders/rexml'
|
||||
|
||||
Dir.glob(File.join(File.dirname(__FILE__), 'html5parser', '*_phase.rb')).each do |path|
|
||||
require 'html5lib/html5parser/' + File.basename(path)
|
||||
require 'html5/html5parser/' + File.basename(path)
|
||||
end
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
|
||||
# Error in parsed document
|
||||
class ParseError < Exception; end
|
||||
|
@ -16,7 +16,7 @@ module HTML5lib
|
|||
#
|
||||
class HTMLParser
|
||||
|
||||
attr_accessor :phase, :firstStartTag, :innerHTML, :lastPhase, :insertFromTable
|
||||
attr_accessor :phase, :first_start_tag, :inner_html, :last_phase, :insert_from_table
|
||||
|
||||
attr_reader :phases, :tokenizer, :tree, :errors
|
||||
|
||||
|
@ -25,10 +25,10 @@ module HTML5lib
|
|||
new(options).parse(stream,encoding)
|
||||
end
|
||||
|
||||
def self.parseFragment(stream, options = {})
|
||||
def self.parse_fragment(stream, options = {})
|
||||
container = options.delete(:container) || 'div'
|
||||
encoding = options.delete(:encoding)
|
||||
new(options).parseFragment(stream,container,encoding)
|
||||
new(options).parse_fragment(stream, container, encoding)
|
||||
end
|
||||
|
||||
@@phases = %w( initial rootElement beforeHead inHead afterHead inBody inTable inCaption
|
||||
|
@ -37,7 +37,7 @@ module HTML5lib
|
|||
# :strict - raise an exception when a parse error is encountered
|
||||
# :tree - a treebuilder class controlling the type of tree that will be
|
||||
# returned. Built in treebuilders can be accessed through
|
||||
# HTML5lib::TreeBuilders[treeType]
|
||||
# HTML5::TreeBuilders[treeType]
|
||||
def initialize(options = {})
|
||||
@strict = false
|
||||
@errors = []
|
||||
|
@ -46,54 +46,56 @@ module HTML5lib
|
|||
@tree = TreeBuilders::REXML::TreeBuilder
|
||||
|
||||
options.each {|name, value| instance_variable_set("@#{name}", value) }
|
||||
@lowercase_attr_name = nil unless instance_variables.include?("@lowercase_attr_name")
|
||||
@lowercase_element_name = nil unless instance_variables.include?("@lowercase_element_name")
|
||||
|
||||
@tree = @tree.new
|
||||
|
||||
@phases = @@phases.inject({}) do |phases, phase_name|
|
||||
phase_class_name = phase_name.sub(/(.)/) { $1.upcase } + 'Phase'
|
||||
phases[phase_name.to_sym] = HTML5lib.const_get(phase_class_name).new(self, @tree)
|
||||
phases[phase_name.to_sym] = HTML5.const_get(phase_class_name).new(self, @tree)
|
||||
phases
|
||||
end
|
||||
end
|
||||
|
||||
def _parse(stream, innerHTML, encoding, container = 'div')
|
||||
def _parse(stream, inner_html, encoding, container = 'div')
|
||||
@tree.reset
|
||||
@firstStartTag = false
|
||||
@first_start_tag = false
|
||||
@errors = []
|
||||
|
||||
@tokenizer = @tokenizer.class unless Class === @tokenizer
|
||||
@tokenizer = @tokenizer.new(stream, :encoding => encoding,
|
||||
:parseMeta => !innerHTML)
|
||||
:parseMeta => !inner_html, :lowercase_attr_name => @lowercase_attr_name, :lowercase_element_name => @lowercase_element_name)
|
||||
|
||||
if innerHTML
|
||||
case @innerHTML = container.downcase
|
||||
if inner_html
|
||||
case @inner_html = container.downcase
|
||||
when 'title', 'textarea'
|
||||
@tokenizer.contentModelFlag = :RCDATA
|
||||
@tokenizer.content_model_flag = :RCDATA
|
||||
when 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript'
|
||||
@tokenizer.contentModelFlag = :CDATA
|
||||
@tokenizer.content_model_flag = :CDATA
|
||||
when 'plaintext'
|
||||
@tokenizer.contentModelFlag = :PLAINTEXT
|
||||
@tokenizer.content_model_flag = :PLAINTEXT
|
||||
else
|
||||
# contentModelFlag already is PCDATA
|
||||
#@tokenizer.contentModelFlag = :PCDATA
|
||||
# content_model_flag already is PCDATA
|
||||
#@tokenizer.content_model_flag = :PCDATA
|
||||
end
|
||||
|
||||
@phase = @phases[:rootElement]
|
||||
@phase.insertHtmlElement
|
||||
resetInsertionMode
|
||||
@phase.insert_html_element
|
||||
reset_insertion_mode
|
||||
else
|
||||
@innerHTML = false
|
||||
@inner_html = false
|
||||
@phase = @phases[:initial]
|
||||
end
|
||||
|
||||
# We only seem to have InBodyPhase testcases where the following is
|
||||
# relevant ... need others too
|
||||
@lastPhase = nil
|
||||
@last_phase = nil
|
||||
|
||||
# XXX This is temporary for the moment so there isn't any other
|
||||
# changes needed for the parser to work with the iterable tokenizer
|
||||
@tokenizer.each do |token|
|
||||
token = normalizeToken(token)
|
||||
token = normalize_token(token)
|
||||
|
||||
method = 'process%s' % token[:type]
|
||||
|
||||
|
@ -108,12 +110,12 @@ module HTML5lib
|
|||
@phase.send method, token[:name], token[:publicId],
|
||||
token[:systemId], token[:correct]
|
||||
else
|
||||
parseError(token[:data])
|
||||
parse_error(token[:data])
|
||||
end
|
||||
end
|
||||
|
||||
# When the loop finishes it's EOF
|
||||
@phase.processEOF
|
||||
@phase.process_eof
|
||||
end
|
||||
|
||||
# Parse a HTML document into a well-formed tree
|
||||
|
@ -126,12 +128,12 @@ module HTML5lib
|
|||
# element)
|
||||
def parse(stream, encoding=nil)
|
||||
_parse(stream, false, encoding)
|
||||
return @tree.getDocument
|
||||
@tree.get_document
|
||||
end
|
||||
|
||||
# Parse a HTML fragment into a well-formed tree fragment
|
||||
|
||||
# container - name of the element we're setting the innerHTML property
|
||||
# container - name of the element we're setting the inner_html property
|
||||
# if set to nil, default to 'div'
|
||||
#
|
||||
# stream - a filelike object or string containing the HTML to be parsed
|
||||
|
@ -140,19 +142,19 @@ module HTML5lib
|
|||
# the encoding. If specified, that encoding will be used,
|
||||
# regardless of any BOM or later declaration (such as in a meta
|
||||
# element)
|
||||
def parseFragment(stream, container='div', encoding=nil)
|
||||
def parse_fragment(stream, container='div', encoding=nil)
|
||||
_parse(stream, true, encoding, container)
|
||||
return @tree.getFragment
|
||||
@tree.get_fragment
|
||||
end
|
||||
|
||||
def parseError(data = 'XXX ERROR MESSAGE NEEDED')
|
||||
def parse_error(data = 'XXX ERROR MESSAGE NEEDED')
|
||||
# XXX The idea is to make data mandatory.
|
||||
@errors.push([@tokenizer.stream.position, data])
|
||||
raise ParseError if @strict
|
||||
end
|
||||
|
||||
# HTML5 specific normalizations to the token stream
|
||||
def normalizeToken(token)
|
||||
def normalize_token(token)
|
||||
|
||||
if token[:type] == :EmptyTag
|
||||
# When a solidus (/) is encountered within a tag name what happens
|
||||
|
@ -161,29 +163,29 @@ module HTML5lib
|
|||
# thing and if it doesn't it's wrong for everyone.
|
||||
|
||||
unless VOID_ELEMENTS.include?(token[:name])
|
||||
parseError(_('Solidus (/) incorrectly placed in tag.'))
|
||||
parse_error(_('Solidus (/) incorrectly placed in tag.'))
|
||||
end
|
||||
|
||||
token[:type] = :StartTag
|
||||
end
|
||||
|
||||
if token[:type] == :StartTag
|
||||
token[:name] = token[:name].tr(ASCII_UPPERCASE,ASCII_LOWERCASE)
|
||||
token[:name] = token[:name].downcase
|
||||
|
||||
# We need to remove the duplicate attributes and convert attributes
|
||||
# to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
|
||||
|
||||
unless token[:data].empty?
|
||||
data = token[:data].reverse.map { |attr, value| [attr.tr(ASCII_UPPERCASE, ASCII_LOWERCASE), value] }
|
||||
data = token[:data].reverse.map {|attr, value| [attr.downcase, value] }
|
||||
token[:data] = Hash[*data.flatten]
|
||||
end
|
||||
|
||||
elsif token[:type] == :EndTag
|
||||
parseError(_('End tag contains unexpected attributes.')) unless token[:data].empty?
|
||||
parse_error(_('End tag contains unexpected attributes.')) unless token[:data].empty?
|
||||
token[:name] = token[:name].downcase
|
||||
end
|
||||
|
||||
return token
|
||||
token
|
||||
end
|
||||
|
||||
@@new_modes = {
|
||||
|
@ -202,34 +204,34 @@ module HTML5lib
|
|||
'frameset' => :inFrameset
|
||||
}
|
||||
|
||||
def resetInsertionMode
|
||||
def reset_insertion_mode
|
||||
# The name of this method is mostly historical. (It's also used in the
|
||||
# specification.)
|
||||
last = false
|
||||
|
||||
@tree.openElements.reverse.each do |node|
|
||||
nodeName = node.name
|
||||
@tree.open_elements.reverse.each do |node|
|
||||
node_name = node.name
|
||||
|
||||
if node == @tree.openElements[0]
|
||||
if node == @tree.open_elements.first
|
||||
last = true
|
||||
unless ['td', 'th'].include?(nodeName)
|
||||
unless ['td', 'th'].include?(node_name)
|
||||
# XXX
|
||||
# assert @innerHTML
|
||||
nodeName = @innerHTML
|
||||
# assert @inner_html
|
||||
node_name = @inner_html
|
||||
end
|
||||
end
|
||||
|
||||
# Check for conditions that should only happen in the innerHTML
|
||||
# Check for conditions that should only happen in the inner_html
|
||||
# case
|
||||
if ['select', 'colgroup', 'head', 'frameset'].include?(nodeName)
|
||||
if ['select', 'colgroup', 'head', 'frameset'].include?(node_name)
|
||||
# XXX
|
||||
# assert @innerHTML
|
||||
# assert @inner_html
|
||||
end
|
||||
|
||||
if @@new_modes.has_key?(nodeName)
|
||||
@phase = @phases[@@new_modes[nodeName]]
|
||||
elsif nodeName == 'html'
|
||||
@phase = @phases[@tree.headPointer.nil?? :beforeHead : :afterHead]
|
||||
if @@new_modes.has_key?(node_name)
|
||||
@phase = @phases[@@new_modes[node_name]]
|
||||
elsif node_name == 'html'
|
||||
@phase = @phases[@tree.head_pointer.nil?? :beforeHead : :afterHead]
|
||||
elsif last
|
||||
@phase = @phases[:inBody]
|
||||
else
|
|
@ -1,6 +1,6 @@
|
|||
require 'html5lib/html5parser/phase'
|
||||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
class AfterBodyPhase < Phase
|
||||
|
||||
handle_end 'html'
|
||||
|
@ -8,36 +8,36 @@ module HTML5lib
|
|||
def processComment(data)
|
||||
# This is needed because data is to be appended to the <html> element
|
||||
# here and not to whatever is currently open.
|
||||
@tree.insertComment(data, @tree.openElements[0])
|
||||
@tree.insert_comment(data, @tree.open_elements.first)
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
@parser.parseError(_('Unexpected non-space characters in the after body phase.'))
|
||||
parse_error(_('Unexpected non-space characters in the after body phase.'))
|
||||
@parser.phase = @parser.phases[:inBody]
|
||||
@parser.phase.processCharacters(data)
|
||||
end
|
||||
|
||||
def processStartTag(name, attributes)
|
||||
@parser.parseError(_("Unexpected start tag token (#{name}) in the after body phase."))
|
||||
parse_error(_("Unexpected start tag token (#{name}) in the after body phase."))
|
||||
@parser.phase = @parser.phases[:inBody]
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def endTagHtml(name)
|
||||
if @parser.innerHTML
|
||||
@parser.parseError
|
||||
if @parser.inner_html
|
||||
parse_error
|
||||
else
|
||||
# XXX: This may need to be done, not sure
|
||||
# Don't set lastPhase to the current phase but to the inBody phase
|
||||
# Don't set last_phase to the current phase but to the inBody phase
|
||||
# instead. No need for extra parse errors if there's something after </html>.
|
||||
# Try "<!doctype html>X</html>X" for instance.
|
||||
@parser.lastPhase = @parser.phase
|
||||
@parser.last_phase = @parser.phase
|
||||
@parser.phase = @parser.phases[:trailingEnd]
|
||||
end
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
@parser.parseError(_("Unexpected end tag token (#{name}) in the after body phase."))
|
||||
parse_error(_("Unexpected end tag token (#{name}) in the after body phase."))
|
||||
@parser.phase = @parser.phases[:inBody]
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
|
@ -1,6 +1,6 @@
|
|||
require 'html5lib/html5parser/phase'
|
||||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
class AfterFramesetPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#after3
|
||||
|
@ -10,7 +10,7 @@ module HTML5lib
|
|||
handle_end 'html'
|
||||
|
||||
def processCharacters(data)
|
||||
@parser.parseError(_('Unexpected non-space characters in the after frameset phase. Ignored.'))
|
||||
parse_error(_('Unexpected non-space characters in the after frameset phase. Ignored.'))
|
||||
end
|
||||
|
||||
def startTagNoframes(name, attributes)
|
||||
|
@ -18,16 +18,16 @@ module HTML5lib
|
|||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
@parser.parseError(_("Unexpected start tag (#{name}) in the after frameset phase. Ignored."))
|
||||
parse_error(_("Unexpected start tag (#{name}) in the after frameset phase. Ignored."))
|
||||
end
|
||||
|
||||
def endTagHtml(name)
|
||||
@parser.lastPhase = @parser.phase
|
||||
@parser.last_phase = @parser.phase
|
||||
@parser.phase = @parser.phases[:trailingEnd]
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
@parser.parseError(_("Unexpected end tag (#{name}) in the after frameset phase. Ignored."))
|
||||
parse_error(_("Unexpected end tag (#{name}) in the after frameset phase. Ignored."))
|
||||
end
|
||||
|
||||
end
|
|
@ -1,48 +1,48 @@
|
|||
require 'html5lib/html5parser/phase'
|
||||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
class AfterHeadPhase < Phase
|
||||
|
||||
handle_start 'html', 'body', 'frameset', %w( base link meta script style title ) => 'FromHead'
|
||||
|
||||
def processEOF
|
||||
anythingElse
|
||||
@parser.phase.processEOF
|
||||
def process_eof
|
||||
anything_else
|
||||
@parser.phase.process_eof
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
anythingElse
|
||||
anything_else
|
||||
@parser.phase.processCharacters(data)
|
||||
end
|
||||
|
||||
def startTagBody(name, attributes)
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.phase = @parser.phases[:inBody]
|
||||
end
|
||||
|
||||
def startTagFrameset(name, attributes)
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.phase = @parser.phases[:inFrameset]
|
||||
end
|
||||
|
||||
def startTagFromHead(name, attributes)
|
||||
@parser.parseError(_("Unexpected start tag (#{name}) that can be in head. Moved."))
|
||||
parse_error(_("Unexpected start tag (#{name}) that can be in head. Moved."))
|
||||
@parser.phase = @parser.phases[:inHead]
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
anythingElse
|
||||
anything_else
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def processEndTag(name)
|
||||
anythingElse
|
||||
anything_else
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
|
||||
def anythingElse
|
||||
@tree.insertElement('body', {})
|
||||
def anything_else
|
||||
@tree.insert_element('body', {})
|
||||
@parser.phase = @parser.phases[:inBody]
|
||||
end
|
||||
|
|
@ -1,15 +1,15 @@
|
|||
require 'html5lib/html5parser/phase'
|
||||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
class BeforeHeadPhase < Phase
|
||||
|
||||
handle_start 'html', 'head'
|
||||
|
||||
handle_end %w( html head body br ) => 'ImplyHead'
|
||||
handle_end %w( html head body br p ) => 'ImplyHead'
|
||||
|
||||
def processEOF
|
||||
def process_eof
|
||||
startTagHead('head', {})
|
||||
@parser.phase.processEOF
|
||||
@parser.phase.process_eof
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
|
@ -18,8 +18,8 @@ module HTML5lib
|
|||
end
|
||||
|
||||
def startTagHead(name, attributes)
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.headPointer = @tree.openElements[-1]
|
||||
@tree.insert_element(name, attributes)
|
||||
@tree.head_pointer = @tree.open_elements[-1]
|
||||
@parser.phase = @parser.phases[:inHead]
|
||||
end
|
||||
|
||||
|
@ -34,7 +34,7 @@ module HTML5lib
|
|||
end
|
||||
|
||||
def endTagOther(name)
|
||||
@parser.parseError(_("Unexpected end tag (#{name}) after the (implied) root element."))
|
||||
parse_error(_("Unexpected end tag (#{name}) after the (implied) root element."))
|
||||
end
|
||||
|
||||
end
|
|
@ -1,6 +1,6 @@
|
|||
require 'html5lib/html5parser/phase'
|
||||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
class InBodyPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-body
|
||||
|
@ -51,25 +51,40 @@ module HTML5lib
|
|||
|
||||
# for special handling of whitespace in <pre>
|
||||
@processSpaceCharactersDropNewline = false
|
||||
if $-w
|
||||
$-w = false
|
||||
alias processSpaceCharactersNonPre processSpaceCharacters
|
||||
$-w = true
|
||||
else
|
||||
alias processSpaceCharactersNonPre processSpaceCharacters
|
||||
end
|
||||
end
|
||||
|
||||
def processSpaceCharactersDropNewline(data)
|
||||
#Sometimes (start of <pre> blocks) we want to drop leading newlines
|
||||
@processSpaceCharactersDropNewline = false
|
||||
if (data.length > 0 and data[0] == ?\n and
|
||||
%w[pre textarea].include?(@tree.openElements[-1].name) and
|
||||
not @tree.openElements[-1].hasContent)
|
||||
# #Sometimes (start of <pre> blocks) we want to drop leading newlines
|
||||
|
||||
if $-w
|
||||
$-w = false
|
||||
alias processSpaceCharacters processSpaceCharactersNonPre
|
||||
$-w = true
|
||||
else
|
||||
alias processSpaceCharacters processSpaceCharactersNonPre
|
||||
end
|
||||
|
||||
if (data.length > 0 and data[0] == ?\n &&
|
||||
%w[pre textarea].include?(@tree.open_elements.last.name) && !@tree.open_elements.last.hasContent)
|
||||
data = data[1..-1]
|
||||
end
|
||||
@tree.insertText(data) if data.length > 0
|
||||
|
||||
if data.length > 0
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insertText(data)
|
||||
end
|
||||
end
|
||||
|
||||
def processSpaceCharacters(data)
|
||||
if @processSpaceCharactersDropNewline
|
||||
processSpaceCharactersDropNewline(data)
|
||||
else
|
||||
super(data)
|
||||
end
|
||||
@tree.reconstructActiveFormattingElements()
|
||||
@tree.insertText(data)
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
|
@ -85,20 +100,19 @@ module HTML5lib
|
|||
end
|
||||
|
||||
def startTagTitle(name, attributes)
|
||||
@parser.parseError(_("Unexpected start tag (#{name}) that belongs in the head. Moved."))
|
||||
parse_error(_("Unexpected start tag (#{name}) that belongs in the head. Moved."))
|
||||
@parser.phases[:inHead].processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def startTagBody(name, attributes)
|
||||
@parser.parseError(_('Unexpected start tag (body).'))
|
||||
parse_error(_('Unexpected start tag (body).'))
|
||||
|
||||
if (@tree.openElements.length == 1 or
|
||||
@tree.openElements[1].name != 'body')
|
||||
assert @parser.innerHTML
|
||||
if (@tree.open_elements.length == 1 || @tree.open_elements[1].name != 'body')
|
||||
assert @parser.inner_html
|
||||
else
|
||||
attributes.each do |attr, value|
|
||||
unless @tree.openElements[1].attributes.has_key?(attr)
|
||||
@tree.openElements[1].attributes[attr] = value
|
||||
unless @tree.open_elements[1].attributes.has_key?(attr)
|
||||
@tree.open_elements[1].attributes[attr] = value
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -106,17 +120,17 @@ module HTML5lib
|
|||
|
||||
def startTagCloseP(name, attributes)
|
||||
endTagP('p') if in_scope?('p')
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.insert_element(name, attributes)
|
||||
@processSpaceCharactersDropNewline = true if name == 'pre'
|
||||
end
|
||||
|
||||
def startTagForm(name, attributes)
|
||||
if @tree.formPointer
|
||||
@parser.parseError('Unexpected start tag (form). Ignored.')
|
||||
parse_error(_('Unexpected start tag (form). Ignored.'))
|
||||
else
|
||||
endTagP('p') if in_scope?('p')
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.formPointer = @tree.openElements[-1]
|
||||
@tree.insert_element(name, attributes)
|
||||
@tree.formPointer = @tree.open_elements[-1]
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -125,31 +139,28 @@ module HTML5lib
|
|||
stopNames = {'li' => ['li'], 'dd' => ['dd', 'dt'], 'dt' => ['dd', 'dt']}
|
||||
stopName = stopNames[name]
|
||||
|
||||
@tree.openElements.reverse.each_with_index do |node, i|
|
||||
@tree.open_elements.reverse.each_with_index do |node, i|
|
||||
if stopName.include?(node.name)
|
||||
poppedNodes = (0..i).collect { @tree.openElements.pop }
|
||||
poppedNodes = (0..i).collect { @tree.open_elements.pop }
|
||||
if i >= 1
|
||||
@parser.parseError("Missing end tag%s (%s)" % [
|
||||
(i>1 ? 's' : ''),
|
||||
poppedNodes.reverse.map {|item| item.name}.join(', ')])
|
||||
parse_error(_("Missing end tag%s (%s)" % [(i>1 ? 's' : ''), poppedNodes.reverse.map{|item| item.name}.join(', ')]))
|
||||
end
|
||||
break
|
||||
end
|
||||
|
||||
# Phrasing elements are all non special, non scoping, non
|
||||
# formatting elements
|
||||
break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) and
|
||||
not ['address', 'div'].include?(node.name))
|
||||
break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) && !%w[address div].include?(node.name))
|
||||
end
|
||||
|
||||
# Always insert an <li> element.
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.insert_element(name, attributes)
|
||||
end
|
||||
|
||||
def startTagPlaintext(name, attributes)
|
||||
endTagP('p') if in_scope?('p')
|
||||
@tree.insertElement(name, attributes)
|
||||
@parser.tokenizer.contentModelFlag = :PLAINTEXT
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.tokenizer.content_model_flag = :PLAINTEXT
|
||||
end
|
||||
|
||||
def startTagHeading(name, attributes)
|
||||
|
@ -158,7 +169,7 @@ module HTML5lib
|
|||
# Uncomment the following for IE7 behavior:
|
||||
# HEADING_ELEMENTS.each do |element|
|
||||
# if in_scope?(element)
|
||||
# @parser.parseError(_("Unexpected start tag (#{name})."))
|
||||
# parse_error(_("Unexpected start tag (#{name})."))
|
||||
#
|
||||
# remove_open_elements_until do |element|
|
||||
# HEADING_ELEMENTS.include?(element.name)
|
||||
|
@ -167,14 +178,14 @@ module HTML5lib
|
|||
# break
|
||||
# end
|
||||
# end
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.insert_element(name, attributes)
|
||||
end
|
||||
|
||||
def startTagA(name, attributes)
|
||||
if afeAElement = @tree.elementInActiveFormattingElements('a')
|
||||
@parser.parseError(_('Unexpected start tag (a) implies end tag (a).'))
|
||||
parse_error(_('Unexpected start tag (a) implies end tag (a).'))
|
||||
endTagFormatting('a')
|
||||
@tree.openElements.delete(afeAElement) if @tree.openElements.include?(afeAElement)
|
||||
@tree.open_elements.delete(afeAElement) if @tree.open_elements.include?(afeAElement)
|
||||
@tree.activeFormattingElements.delete(afeAElement) if @tree.activeFormattingElements.include?(afeAElement)
|
||||
end
|
||||
@tree.reconstructActiveFormattingElements
|
||||
|
@ -188,70 +199,75 @@ module HTML5lib
|
|||
|
||||
def startTagNobr(name, attributes)
|
||||
@tree.reconstructActiveFormattingElements
|
||||
processEndTag('nobr') if in_scope?('nobr')
|
||||
if in_scope?('nobr')
|
||||
parse_error(_('Unexpected start tag (nobr) implies end tag (nobr).'))
|
||||
processEndTag('nobr')
|
||||
# XXX Need tests that trigger the following
|
||||
@tree.reconstructActiveFormattingElements
|
||||
end
|
||||
addFormattingElement(name, attributes)
|
||||
end
|
||||
|
||||
def startTagButton(name, attributes)
|
||||
if in_scope?('button')
|
||||
@parser.parseError(_('Unexpected start tag (button) implied end tag (button).'))
|
||||
parse_error(_('Unexpected start tag (button) implied end tag (button).'))
|
||||
processEndTag('button')
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
else
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.insert_element(name, attributes)
|
||||
@tree.activeFormattingElements.push(Marker)
|
||||
end
|
||||
end
|
||||
|
||||
def startTagMarqueeObject(name, attributes)
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.insert_element(name, attributes)
|
||||
@tree.activeFormattingElements.push(Marker)
|
||||
end
|
||||
|
||||
def startTagXmp(name, attributes)
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insertElement(name, attributes)
|
||||
@parser.tokenizer.contentModelFlag = :CDATA
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.tokenizer.content_model_flag = :CDATA
|
||||
end
|
||||
|
||||
def startTagTable(name, attributes)
|
||||
processEndTag('p') if in_scope?('p')
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.phase = @parser.phases[:inTable]
|
||||
end
|
||||
|
||||
def startTagVoidFormatting(name, attributes)
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.openElements.pop
|
||||
@tree.insert_element(name, attributes)
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
|
||||
def startTagHr(name, attributes)
|
||||
endTagP('p') if in_scope?('p')
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.openElements.pop
|
||||
@tree.insert_element(name, attributes)
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
|
||||
def startTagImage(name, attributes)
|
||||
# No really...
|
||||
@parser.parseError(_('Unexpected start tag (image). Treated as img.'))
|
||||
parse_error(_('Unexpected start tag (image). Treated as img.'))
|
||||
processStartTag('img', attributes)
|
||||
end
|
||||
|
||||
def startTagInput(name, attributes)
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.insert_element(name, attributes)
|
||||
if @tree.formPointer
|
||||
# XXX Not exactly sure what to do here
|
||||
# @tree.openElements[-1].form = @tree.formPointer
|
||||
# @tree.open_elements[-1].form = @tree.formPointer
|
||||
end
|
||||
@tree.openElements.pop
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
|
||||
def startTagIsindex(name, attributes)
|
||||
@parser.parseError("Unexpected start tag isindex. Don't use it!")
|
||||
parse_error(_("Unexpected start tag isindex. Don't use it!"))
|
||||
return if @tree.formPointer
|
||||
processStartTag('form', {})
|
||||
processStartTag('hr', {})
|
||||
|
@ -270,20 +286,21 @@ module HTML5lib
|
|||
|
||||
def startTagTextarea(name, attributes)
|
||||
# XXX Form element pointer checking here as well...
|
||||
@tree.insertElement(name, attributes)
|
||||
@parser.tokenizer.contentModelFlag = :RCDATA
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.tokenizer.content_model_flag = :RCDATA
|
||||
@processSpaceCharactersDropNewline = true
|
||||
alias processSpaceCharacters processSpaceCharactersDropNewline
|
||||
end
|
||||
|
||||
# iframe, noembed noframes, noscript(if scripting enabled)
|
||||
def startTagCdata(name, attributes)
|
||||
@tree.insertElement(name, attributes)
|
||||
@parser.tokenizer.contentModelFlag = :CDATA
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.tokenizer.content_model_flag = :CDATA
|
||||
end
|
||||
|
||||
def startTagSelect(name, attributes)
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.phase = @parser.phases[:inSelect]
|
||||
end
|
||||
|
||||
|
@ -293,7 +310,7 @@ module HTML5lib
|
|||
# "caption", "col", "colgroup", "frame", "frameset", "head",
|
||||
# "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
|
||||
# "tr", "noscript"
|
||||
@parser.parseError(_("Unexpected start tag (#{name}). Ignored."))
|
||||
parse_error(_("Unexpected start tag (#{name}). Ignored."))
|
||||
end
|
||||
|
||||
def startTagNew(name, attributes)
|
||||
|
@ -306,33 +323,38 @@ module HTML5lib
|
|||
|
||||
def startTagOther(name, attributes)
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.insert_element(name, attributes)
|
||||
end
|
||||
|
||||
def endTagP(name)
|
||||
@tree.generateImpliedEndTags('p') if in_scope?('p')
|
||||
@parser.parseError('Unexpected end tag (p).') unless @tree.openElements[-1].name == 'p'
|
||||
@tree.openElements.pop while in_scope?('p')
|
||||
parse_error(_('Unexpected end tag (p).')) unless @tree.open_elements.last.name == 'p'
|
||||
if in_scope?('p')
|
||||
@tree.open_elements.pop while in_scope?('p')
|
||||
else
|
||||
startTagCloseP('p', {})
|
||||
endTagP('p')
|
||||
end
|
||||
end
|
||||
|
||||
def endTagBody(name)
|
||||
# XXX Need to take open <p> tags into account here. We shouldn't imply
|
||||
# </p> but we should not throw a parse error either. Specification is
|
||||
# likely to be updated.
|
||||
unless @tree.openElements[1].name == 'body'
|
||||
# innerHTML case
|
||||
@parser.parseError
|
||||
unless @tree.open_elements[1].name == 'body'
|
||||
# inner_html case
|
||||
parse_error
|
||||
return
|
||||
end
|
||||
unless @tree.openElements[-1].name == 'body'
|
||||
@parser.parseError(_("Unexpected end tag (body). Missing end tag (#{@tree.openElements[-1].name})."))
|
||||
unless @tree.open_elements.last.name == 'body'
|
||||
parse_error(_("Unexpected end tag (body). Missing end tag (#{@tree.open_elements[-1].name})."))
|
||||
end
|
||||
@parser.phase = @parser.phases[:afterBody]
|
||||
end
|
||||
|
||||
def endTagHtml(name)
|
||||
endTagBody(name)
|
||||
@parser.phase.processEndTag(name) unless @parser.innerHTML
|
||||
@parser.phase.processEndTag(name) unless @parser.inner_html
|
||||
end
|
||||
|
||||
def endTagBlock(name)
|
||||
|
@ -341,8 +363,8 @@ module HTML5lib
|
|||
|
||||
@tree.generateImpliedEndTags if in_scope?(name)
|
||||
|
||||
unless @tree.openElements[-1].name == name
|
||||
@parser.parseError(("End tag (#{name}) seen too early. Expected other end tag."))
|
||||
unless @tree.open_elements.last.name == name
|
||||
parse_error(_("End tag (#{name}) seen too early. Expected other end tag."))
|
||||
end
|
||||
|
||||
if in_scope?(name)
|
||||
|
@ -351,18 +373,23 @@ module HTML5lib
|
|||
end
|
||||
|
||||
def endTagForm(name)
|
||||
endTagBlock(name)
|
||||
if in_scope?(name)
|
||||
@tree.generateImpliedEndTags
|
||||
end
|
||||
if @tree.open_elements.last.name != name
|
||||
parse_error(_("End tag (form) seen too early. Ignored."))
|
||||
else
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
@tree.formPointer = nil
|
||||
end
|
||||
|
||||
def endTagListItem(name)
|
||||
# AT Could merge this with the Block case
|
||||
if in_scope?(name)
|
||||
@tree.generateImpliedEndTags(name)
|
||||
@tree.generateImpliedEndTags(name) if in_scope?(name)
|
||||
|
||||
unless @tree.openElements[-1].name == name
|
||||
@parser.parseError(("End tag (#{name}) seen too early. Expected other end tag."))
|
||||
end
|
||||
unless @tree.open_elements.last.name == name
|
||||
parse_error(_("End tag (#{name}) seen too early. " + 'Expected other end tag.'))
|
||||
end
|
||||
|
||||
remove_open_elements_until(name) if in_scope?(name)
|
||||
|
@ -376,8 +403,8 @@ module HTML5lib
|
|||
end
|
||||
end
|
||||
|
||||
unless @tree.openElements[-1].name == name
|
||||
@parser.parseError(("Unexpected end tag (#{name}). Expected other end tag."))
|
||||
unless @tree.open_elements.last.name == name
|
||||
parse_error(_("Unexpected end tag (#{name}). Expected other end tag."))
|
||||
end
|
||||
|
||||
HEADING_ELEMENTS.each do |element|
|
||||
|
@ -391,30 +418,30 @@ module HTML5lib
|
|||
# The much-feared adoption agency algorithm
|
||||
def endTagFormatting(name)
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
|
||||
# XXX Better parseError messages appreciated.
|
||||
# XXX Better parse_error messages appreciated.
|
||||
while true
|
||||
# Step 1 paragraph 1
|
||||
afeElement = @tree.elementInActiveFormattingElements(name)
|
||||
if not afeElement or (@tree.openElements.include?(afeElement) and not in_scope?(afeElement.name))
|
||||
@parser.parseError(_("End tag (#{name}) violates step 1, paragraph 1 of the adoption agency algorithm."))
|
||||
if !afeElement or (@tree.open_elements.include?(afeElement) && !in_scope?(afeElement.name))
|
||||
parse_error(_("End tag (#{name}) violates step 1, paragraph 1 of the adoption agency algorithm."))
|
||||
return
|
||||
# Step 1 paragraph 2
|
||||
elsif not @tree.openElements.include?(afeElement)
|
||||
@parser.parseError(_("End tag (#{name}) violates step 1, paragraph 2 of the adoption agency algorithm."))
|
||||
elsif not @tree.open_elements.include?(afeElement)
|
||||
parse_error(_("End tag (#{name}) violates step 1, paragraph 2 of the adoption agency algorithm."))
|
||||
@tree.activeFormattingElements.delete(afeElement)
|
||||
return
|
||||
end
|
||||
|
||||
# Step 1 paragraph 3
|
||||
if afeElement != @tree.openElements[-1]
|
||||
@parser.parseError(_("End tag (#{name}) violates step 1, paragraph 3 of the adoption agency algorithm."))
|
||||
if afeElement != @tree.open_elements.last
|
||||
parse_error(_("End tag (#{name}) violates step 1, paragraph 3 of the adoption agency algorithm."))
|
||||
end
|
||||
|
||||
# Step 2
|
||||
# Start of the adoption agency algorithm proper
|
||||
afeIndex = @tree.openElements.index(afeElement)
|
||||
afeIndex = @tree.open_elements.index(afeElement)
|
||||
furthestBlock = nil
|
||||
@tree.openElements[afeIndex..-1].each do |element|
|
||||
@tree.open_elements[afeIndex..-1].each do |element|
|
||||
if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(element.name)
|
||||
furthestBlock = element
|
||||
break
|
||||
|
@ -427,7 +454,7 @@ module HTML5lib
|
|||
@tree.activeFormattingElements.delete(element)
|
||||
return
|
||||
end
|
||||
commonAncestor = @tree.openElements[afeIndex - 1]
|
||||
commonAncestor = @tree.open_elements[afeIndex - 1]
|
||||
|
||||
# Step 5
|
||||
furthestBlock.parent.removeChild(furthestBlock) if furthestBlock.parent
|
||||
|
@ -444,11 +471,11 @@ module HTML5lib
|
|||
while true
|
||||
# AT replace this with a function and recursion?
|
||||
# Node is element before node in open elements
|
||||
node = @tree.openElements[@tree.openElements.index(node) - 1]
|
||||
node = @tree.open_elements[@tree.open_elements.index(node) - 1]
|
||||
until @tree.activeFormattingElements.include?(node)
|
||||
tmpNode = node
|
||||
node = @tree.openElements[@tree.openElements.index(node) - 1]
|
||||
@tree.openElements.delete(tmpNode)
|
||||
node = @tree.open_elements[@tree.open_elements.index(node) - 1]
|
||||
@tree.open_elements.delete(tmpNode)
|
||||
end
|
||||
# Step 7.3
|
||||
break if node == afeElement
|
||||
|
@ -465,7 +492,7 @@ module HTML5lib
|
|||
clone = node.cloneNode
|
||||
# Replace node with clone
|
||||
@tree.activeFormattingElements[@tree.activeFormattingElements.index(node)] = clone
|
||||
@tree.openElements[@tree.openElements.index(node)] = clone
|
||||
@tree.open_elements[@tree.open_elements.index(node)] = clone
|
||||
node = clone
|
||||
end
|
||||
# Step 7.6
|
||||
|
@ -495,16 +522,16 @@ module HTML5lib
|
|||
@tree.activeFormattingElements.insert([bookmark,@tree.activeFormattingElements.length].min, clone)
|
||||
|
||||
# Step 13
|
||||
@tree.openElements.delete(afeElement)
|
||||
@tree.openElements.insert(@tree.openElements.index(furthestBlock) + 1, clone)
|
||||
@tree.open_elements.delete(afeElement)
|
||||
@tree.open_elements.insert(@tree.open_elements.index(furthestBlock) + 1, clone)
|
||||
end
|
||||
end
|
||||
|
||||
def endTagButtonMarqueeObject(name)
|
||||
@tree.generateImpliedEndTags if in_scope?(name)
|
||||
|
||||
unless @tree.openElements[-1].name == name
|
||||
@parser.parseError(_("Unexpected end tag (#{name}). Expected other end tag first."))
|
||||
unless @tree.open_elements.last.name == name
|
||||
parse_error(_("Unexpected end tag (#{name}). Expected other end tag first."))
|
||||
end
|
||||
|
||||
if in_scope?(name)
|
||||
|
@ -516,26 +543,26 @@ module HTML5lib
|
|||
|
||||
def endTagMisplaced(name)
|
||||
# This handles elements with end tags in other insertion modes.
|
||||
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||
parse_error(_("Unexpected end tag (#{name}). Ignored."))
|
||||
end
|
||||
|
||||
def endTagBr(name)
|
||||
@parser.parseError(_("Unexpected end tag (br). Treated as br element."))
|
||||
parse_error(_("Unexpected end tag (br). Treated as br element."))
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insertElement(name, {})
|
||||
@tree.openElements.pop()
|
||||
@tree.insert_element(name, {})
|
||||
@tree.open_elements.pop()
|
||||
end
|
||||
|
||||
def endTagNone(name)
|
||||
# This handles elements with no end tag.
|
||||
@parser.parseError(_("This tag (#{name}) has no end tag"))
|
||||
parse_error(_("This tag (#{name}) has no end tag"))
|
||||
end
|
||||
|
||||
def endTagCdataTextAreaXmp(name)
|
||||
if @tree.openElements[-1].name == name
|
||||
@tree.openElements.pop
|
||||
if @tree.open_elements.last.name == name
|
||||
@tree.open_elements.pop
|
||||
else
|
||||
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||
parse_error(_("Unexpected end tag (#{name}). Ignored."))
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -549,12 +576,12 @@ module HTML5lib
|
|||
|
||||
def endTagOther(name)
|
||||
# XXX This logic should be moved into the treebuilder
|
||||
@tree.openElements.reverse.each do |node|
|
||||
@tree.open_elements.reverse.each do |node|
|
||||
if node.name == name
|
||||
@tree.generateImpliedEndTags
|
||||
|
||||
unless @tree.openElements[-1].name == name
|
||||
@parser.parseError(_("Unexpected end tag (#{name})."))
|
||||
unless @tree.open_elements.last.name == name
|
||||
parse_error(_("Unexpected end tag (#{name})."))
|
||||
end
|
||||
|
||||
remove_open_elements_until {|element| element == node }
|
||||
|
@ -562,7 +589,7 @@ module HTML5lib
|
|||
break
|
||||
else
|
||||
if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name)
|
||||
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||
parse_error(_("Unexpected end tag (#{name}). Ignored."))
|
||||
break
|
||||
end
|
||||
end
|
||||
|
@ -572,8 +599,8 @@ module HTML5lib
|
|||
protected
|
||||
|
||||
def addFormattingElement(name, attributes)
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.activeFormattingElements.push(@tree.openElements[-1])
|
||||
@tree.insert_element(name, attributes)
|
||||
@tree.activeFormattingElements.push(@tree.open_elements.last)
|
||||
end
|
||||
|
||||
end
|
|
@ -1,6 +1,6 @@
|
|||
require 'html5lib/html5parser/phase'
|
||||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
class InCaptionPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-caption
|
||||
|
@ -10,7 +10,7 @@ module HTML5lib
|
|||
handle_end 'caption', 'table', %w( body col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
|
||||
|
||||
def ignoreEndTagCaption
|
||||
not in_scope?('caption', true)
|
||||
!in_scope?('caption', true)
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
|
@ -18,7 +18,7 @@ module HTML5lib
|
|||
end
|
||||
|
||||
def startTagTableElement(name, attributes)
|
||||
@parser.parseError
|
||||
parse_error
|
||||
#XXX Have to duplicate logic here to find out if the tag is ignored
|
||||
ignoreEndTag = ignoreEndTagCaption
|
||||
@parser.phase.processEndTag('caption')
|
||||
|
@ -31,15 +31,15 @@ module HTML5lib
|
|||
|
||||
def endTagCaption(name)
|
||||
if ignoreEndTagCaption
|
||||
# innerHTML case
|
||||
assert @parser.innerHTML
|
||||
@parser.parseError
|
||||
# inner_html case
|
||||
assert @parser.inner_html
|
||||
parse_error
|
||||
else
|
||||
# AT this code is quite similar to endTagTable in "InTable"
|
||||
@tree.generateImpliedEndTags
|
||||
|
||||
unless @tree.openElements[-1].name == 'caption'
|
||||
@parser.parseError(_("Unexpected end tag (caption). Missing end tags."))
|
||||
unless @tree.open_elements[-1].name == 'caption'
|
||||
parse_error(_("Unexpected end tag (caption). Missing end tags."))
|
||||
end
|
||||
|
||||
remove_open_elements_until('caption')
|
||||
|
@ -50,14 +50,14 @@ module HTML5lib
|
|||
end
|
||||
|
||||
def endTagTable(name)
|
||||
@parser.parseError
|
||||
parse_error
|
||||
ignoreEndTag = ignoreEndTagCaption
|
||||
@parser.phase.processEndTag('caption')
|
||||
@parser.phase.processEndTag(name) unless ignoreEndTag
|
||||
end
|
||||
|
||||
def endTagIgnore(name)
|
||||
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||
parse_error(_("Unexpected end tag (#{name}). Ignored."))
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
|
@ -1,6 +1,6 @@
|
|||
require 'html5lib/html5parser/phase'
|
||||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
class InCellPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-cell
|
||||
|
@ -20,8 +20,8 @@ module HTML5lib
|
|||
closeCell
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
else
|
||||
# innerHTML case
|
||||
@parser.parseError
|
||||
# inner_html case
|
||||
parse_error
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -32,22 +32,22 @@ module HTML5lib
|
|||
def endTagTableCell(name)
|
||||
if in_scope?(name, true)
|
||||
@tree.generateImpliedEndTags(name)
|
||||
if @tree.openElements[-1].name != name
|
||||
@parser.parseError("Got table cell end tag (#{name}) while required end tags are missing.")
|
||||
if @tree.open_elements.last.name != name
|
||||
parse_error("Got table cell end tag (#{name}) while required end tags are missing.")
|
||||
|
||||
remove_open_elements_until(name)
|
||||
else
|
||||
@tree.openElements.pop
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
@tree.clearActiveFormattingElements
|
||||
@parser.phase = @parser.phases[:inRow]
|
||||
else
|
||||
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||
parse_error(_("Unexpected end tag (#{name}). Ignored."))
|
||||
end
|
||||
end
|
||||
|
||||
def endTagIgnore(name)
|
||||
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||
parse_error(_("Unexpected end tag (#{name}). Ignored."))
|
||||
end
|
||||
|
||||
def endTagImply(name)
|
||||
|
@ -55,8 +55,8 @@ module HTML5lib
|
|||
closeCell
|
||||
@parser.phase.processEndTag(name)
|
||||
else
|
||||
# sometimes innerHTML case
|
||||
@parser.parseError
|
||||
# sometimes inner_html case
|
||||
parse_error
|
||||
end
|
||||
end
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
require 'html5lib/html5parser/phase'
|
||||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
class InColumnGroupPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-column
|
||||
|
@ -10,7 +10,7 @@ module HTML5lib
|
|||
handle_end 'colgroup', 'col'
|
||||
|
||||
def ignoreEndTagColgroup
|
||||
@tree.openElements[-1].name == 'html'
|
||||
@tree.open_elements[-1].name == 'html'
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
|
@ -20,8 +20,8 @@ module HTML5lib
|
|||
end
|
||||
|
||||
def startTagCol(name, attributes)
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.openElements.pop
|
||||
@tree.insert_element(name, attributes)
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
|
@ -32,17 +32,17 @@ module HTML5lib
|
|||
|
||||
def endTagColgroup(name)
|
||||
if ignoreEndTagColgroup
|
||||
# innerHTML case
|
||||
assert @parser.innerHTML
|
||||
@parser.parseError
|
||||
# inner_html case
|
||||
assert @parser.inner_html
|
||||
parse_error
|
||||
else
|
||||
@tree.openElements.pop
|
||||
@tree.open_elements.pop
|
||||
@parser.phase = @parser.phases[:inTable]
|
||||
end
|
||||
end
|
||||
|
||||
def endTagCol(name)
|
||||
@parser.parseError(_('Unexpected end tag (col). col has no end tag.'))
|
||||
parse_error(_('Unexpected end tag (col). col has no end tag.'))
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
57
vendor/plugins/HTML5lib/lib/html5/html5parser/in_frameset_phase.rb
vendored
Normal file
57
vendor/plugins/HTML5lib/lib/html5/html5parser/in_frameset_phase.rb
vendored
Normal file
|
@ -0,0 +1,57 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class InFramesetPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
|
||||
|
||||
handle_start 'html', 'frameset', 'frame', 'noframes'
|
||||
|
||||
handle_end 'frameset', 'noframes'
|
||||
|
||||
def processCharacters(data)
|
||||
parse_error(_('Unexpected characters in the frameset phase. Characters ignored.'))
|
||||
end
|
||||
|
||||
def startTagFrameset(name, attributes)
|
||||
@tree.insert_element(name, attributes)
|
||||
end
|
||||
|
||||
def startTagFrame(name, attributes)
|
||||
@tree.insert_element(name, attributes)
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
|
||||
def startTagNoframes(name, attributes)
|
||||
@parser.phases[:inBody].processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
parse_error(_("Unexpected start tag token (#{name}) in the frameset phase. Ignored"))
|
||||
end
|
||||
|
||||
def endTagFrameset(name)
|
||||
if @tree.open_elements.last.name == 'html'
|
||||
# inner_html case
|
||||
parse_error(_("Unexpected end tag token (frameset) in the frameset phase (inner_html)."))
|
||||
else
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
if (not @parser.inner_html and
|
||||
@tree.open_elements.last.name != 'frameset')
|
||||
# If we're not in inner_html mode and the the current node is not a
|
||||
# "frameset" element (anymore) then switch.
|
||||
@parser.phase = @parser.phases[:afterFrameset]
|
||||
end
|
||||
end
|
||||
|
||||
def endTagNoframes(name)
|
||||
@parser.phases[:inBody].processEndTag(name)
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
parse_error(_("Unexpected end tag token (#{name}) in the frameset phase. Ignored."))
|
||||
end
|
||||
|
||||
end
|
||||
end
|
138
vendor/plugins/HTML5lib/lib/html5/html5parser/in_head_phase.rb
vendored
Normal file
138
vendor/plugins/HTML5lib/lib/html5/html5parser/in_head_phase.rb
vendored
Normal file
|
@ -0,0 +1,138 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class InHeadPhase < Phase
|
||||
|
||||
handle_start 'html', 'head', 'title', 'style', 'script', 'noscript'
|
||||
handle_start %w( base link meta )
|
||||
|
||||
handle_end 'head'
|
||||
handle_end %w( html body br p ) => 'ImplyAfterHead'
|
||||
handle_end %w( title style script noscript )
|
||||
|
||||
def process_eof
|
||||
if ['title', 'style', 'script'].include?(name = @tree.open_elements.last.name)
|
||||
parse_error(_("Unexpected end of file. Expected end tag (#{name})."))
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
anything_else
|
||||
@parser.phase.process_eof
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
if %w[title style script noscript].include?(@tree.open_elements.last.name)
|
||||
@tree.insertText(data)
|
||||
else
|
||||
anything_else
|
||||
@parser.phase.processCharacters(data)
|
||||
end
|
||||
end
|
||||
|
||||
def startTagHead(name, attributes)
|
||||
parse_error(_('Unexpected start tag head in existing head. Ignored'))
|
||||
end
|
||||
|
||||
def startTagTitle(name, attributes)
|
||||
element = @tree.createElement(name, attributes)
|
||||
appendToHead(element)
|
||||
@tree.open_elements.push(element)
|
||||
@parser.tokenizer.content_model_flag = :RCDATA
|
||||
end
|
||||
|
||||
def startTagStyle(name, attributes)
|
||||
element = @tree.createElement(name, attributes)
|
||||
if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
|
||||
appendToHead(element)
|
||||
else
|
||||
@tree.open_elements.last.appendChild(element)
|
||||
end
|
||||
@tree.open_elements.push(element)
|
||||
@parser.tokenizer.content_model_flag = :CDATA
|
||||
end
|
||||
|
||||
def startTagNoscript(name, attributes)
|
||||
# XXX Need to decide whether to implement the scripting disabled case.
|
||||
element = @tree.createElement(name, attributes)
|
||||
if @tree.head_pointer !=nil and @parser.phase == @parser.phases[:inHead]
|
||||
appendToHead(element)
|
||||
else
|
||||
@tree.open_elements.last.appendChild(element)
|
||||
end
|
||||
@tree.open_elements.push(element)
|
||||
@parser.tokenizer.content_model_flag = :CDATA
|
||||
end
|
||||
|
||||
def startTagScript(name, attributes)
|
||||
#XXX Inner HTML case may be wrong
|
||||
element = @tree.createElement(name, attributes)
|
||||
element._flags.push("parser-inserted")
|
||||
if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
|
||||
appendToHead(element)
|
||||
else
|
||||
@tree.open_elements.last.appendChild(element)
|
||||
end
|
||||
@tree.open_elements.push(element)
|
||||
@parser.tokenizer.content_model_flag = :CDATA
|
||||
end
|
||||
|
||||
def startTagBaseLinkMeta(name, attributes)
|
||||
element = @tree.createElement(name, attributes)
|
||||
if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
|
||||
appendToHead(element)
|
||||
else
|
||||
@tree.open_elements.last.appendChild(element)
|
||||
end
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
anything_else
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def endTagHead(name)
|
||||
if @tree.open_elements.last.name == 'head'
|
||||
@tree.open_elements.pop
|
||||
else
|
||||
parse_error(_("Unexpected end tag (head). Ignored."))
|
||||
end
|
||||
@parser.phase = @parser.phases[:afterHead]
|
||||
end
|
||||
|
||||
def endTagImplyAfterHead(name)
|
||||
anything_else
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
|
||||
def endTagTitleStyleScriptNoscript(name)
|
||||
if @tree.open_elements.last.name == name
|
||||
@tree.open_elements.pop
|
||||
else
|
||||
parse_error(_("Unexpected end tag (#{name}). Ignored."))
|
||||
end
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
parse_error(_("Unexpected end tag (#{name}). Ignored."))
|
||||
end
|
||||
|
||||
def anything_else
|
||||
if @tree.open_elements.last.name == 'head'
|
||||
endTagHead('head')
|
||||
else
|
||||
@parser.phase = @parser.phases[:afterHead]
|
||||
end
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
def appendToHead(element)
|
||||
if @tree.head_pointer.nil?
|
||||
assert @parser.inner_html
|
||||
@tree.open_elements.last.appendChild(element)
|
||||
else
|
||||
@tree.head_pointer.appendChild(element)
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -1,6 +1,6 @@
|
|||
require 'html5lib/html5parser/phase'
|
||||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
class InRowPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-row
|
||||
|
@ -15,7 +15,7 @@ module HTML5lib
|
|||
|
||||
def startTagTableCell(name, attributes)
|
||||
clearStackToTableRowContext
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.phase = @parser.phases[:inCell]
|
||||
@tree.activeFormattingElements.push(Marker)
|
||||
end
|
||||
|
@ -23,7 +23,7 @@ module HTML5lib
|
|||
def startTagTableOther(name, attributes)
|
||||
ignoreEndTag = ignoreEndTagTr
|
||||
endTagTr('tr')
|
||||
# XXX how are we sure it's always ignored in the innerHTML case?
|
||||
# XXX how are we sure it's always ignored in the inner_html case?
|
||||
@parser.phase.processStartTag(name, attributes) unless ignoreEndTag
|
||||
end
|
||||
|
||||
|
@ -33,12 +33,12 @@ module HTML5lib
|
|||
|
||||
def endTagTr(name)
|
||||
if ignoreEndTagTr
|
||||
# innerHTML case
|
||||
assert @parser.innerHTML
|
||||
@parser.parseError
|
||||
# inner_html case
|
||||
assert @parser.inner_html
|
||||
parse_error
|
||||
else
|
||||
clearStackToTableRowContext
|
||||
@tree.openElements.pop
|
||||
@tree.open_elements.pop
|
||||
@parser.phase = @parser.phases[:inTableBody]
|
||||
end
|
||||
end
|
||||
|
@ -47,7 +47,7 @@ module HTML5lib
|
|||
ignoreEndTag = ignoreEndTagTr
|
||||
endTagTr('tr')
|
||||
# Reprocess the current tag if the tr end tag was not ignored
|
||||
# XXX how are we sure it's always ignored in the innerHTML case?
|
||||
# XXX how are we sure it's always ignored in the inner_html case?
|
||||
@parser.phase.processEndTag(name) unless ignoreEndTag
|
||||
end
|
||||
|
||||
|
@ -56,13 +56,13 @@ module HTML5lib
|
|||
endTagTr('tr')
|
||||
@parser.phase.processEndTag(name)
|
||||
else
|
||||
# innerHTML case
|
||||
@parser.parseError
|
||||
# inner_html case
|
||||
parse_error
|
||||
end
|
||||
end
|
||||
|
||||
def endTagIgnore(name)
|
||||
@parser.parseError(_("Unexpected end tag (#{name}) in the row phase. Ignored."))
|
||||
parse_error(_("Unexpected end tag (#{name}) in the row phase. Ignored."))
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
|
@ -73,9 +73,9 @@ module HTML5lib
|
|||
|
||||
# XXX unify this with other table helper methods
|
||||
def clearStackToTableRowContext
|
||||
until ['tr', 'html'].include?(name = @tree.openElements[-1].name)
|
||||
@parser.parseError(_("Unexpected implied end tag (#{name}) in the row phase."))
|
||||
@tree.openElements.pop
|
||||
until %w[tr html].include?(name = @tree.open_elements.last.name)
|
||||
parse_error(_("Unexpected implied end tag (#{name}) in the row phase."))
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
end
|
||||
|
84
vendor/plugins/HTML5lib/lib/html5/html5parser/in_select_phase.rb
vendored
Normal file
84
vendor/plugins/HTML5lib/lib/html5/html5parser/in_select_phase.rb
vendored
Normal file
|
@ -0,0 +1,84 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class InSelectPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-select
|
||||
|
||||
handle_start 'html', 'option', 'optgroup', 'select'
|
||||
|
||||
handle_end 'option', 'optgroup', 'select', %w( caption table tbody tfoot thead tr td th ) => 'TableElements'
|
||||
|
||||
def processCharacters(data)
|
||||
@tree.insertText(data)
|
||||
end
|
||||
|
||||
def startTagOption(name, attributes)
|
||||
# We need to imply </option> if <option> is the current node.
|
||||
@tree.open_elements.pop if @tree.open_elements.last.name == 'option'
|
||||
@tree.insert_element(name, attributes)
|
||||
end
|
||||
|
||||
def startTagOptgroup(name, attributes)
|
||||
@tree.open_elements.pop if @tree.open_elements.last.name == 'option'
|
||||
@tree.open_elements.pop if @tree.open_elements.last.name == 'optgroup'
|
||||
@tree.insert_element(name, attributes)
|
||||
end
|
||||
|
||||
def startTagSelect(name, attributes)
|
||||
parse_error(_('Unexpected start tag (select) in the select phase implies select start tag.'))
|
||||
endTagSelect('select')
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
parse_error(_('Unexpected start tag token (#{name}) in the select phase. Ignored.'))
|
||||
end
|
||||
|
||||
def endTagOption(name)
|
||||
if @tree.open_elements.last.name == 'option'
|
||||
@tree.open_elements.pop
|
||||
else
|
||||
parse_error(_('Unexpected end tag (option) in the select phase. Ignored.'))
|
||||
end
|
||||
end
|
||||
|
||||
def endTagOptgroup(name)
|
||||
# </optgroup> implicitly closes <option>
|
||||
if @tree.open_elements.last.name == 'option' and @tree.open_elements[-2].name == 'optgroup'
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
# It also closes </optgroup>
|
||||
if @tree.open_elements.last.name == 'optgroup'
|
||||
@tree.open_elements.pop
|
||||
# But nothing else
|
||||
else
|
||||
parse_error(_('Unexpected end tag (optgroup) in the select phase. Ignored.'))
|
||||
end
|
||||
end
|
||||
|
||||
def endTagSelect(name)
|
||||
if in_scope?('select', true)
|
||||
remove_open_elements_until('select')
|
||||
|
||||
@parser.reset_insertion_mode
|
||||
else
|
||||
# inner_html case
|
||||
parse_error
|
||||
end
|
||||
end
|
||||
|
||||
def endTagTableElements(name)
|
||||
parse_error(_("Unexpected table end tag (#{name}) in the select phase."))
|
||||
|
||||
if in_scope?(name, true)
|
||||
endTagSelect('select')
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
parse_error(_("Unexpected end tag token (#{name}) in the select phase. Ignored."))
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -1,6 +1,6 @@
|
|||
require 'html5lib/html5parser/phase'
|
||||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
class InTableBodyPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-table0
|
||||
|
@ -15,12 +15,12 @@ module HTML5lib
|
|||
|
||||
def startTagTr(name, attributes)
|
||||
clearStackToTableBodyContext
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.phase = @parser.phases[:inRow]
|
||||
end
|
||||
|
||||
def startTagTableCell(name, attributes)
|
||||
@parser.parseError(_("Unexpected table cell start tag (#{name}) in the table body phase."))
|
||||
parse_error(_("Unexpected table cell start tag (#{name}) in the table body phase."))
|
||||
startTagTr('tr', {})
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
@ -29,11 +29,11 @@ module HTML5lib
|
|||
# XXX AT Any ideas on how to share this with endTagTable?
|
||||
if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
|
||||
clearStackToTableBodyContext
|
||||
endTagTableRowGroup(@tree.openElements[-1].name)
|
||||
endTagTableRowGroup(@tree.open_elements.last.name)
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
else
|
||||
# innerHTML case
|
||||
@parser.parseError
|
||||
# inner_html case
|
||||
parse_error
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -44,26 +44,26 @@ module HTML5lib
|
|||
def endTagTableRowGroup(name)
|
||||
if in_scope?(name, true)
|
||||
clearStackToTableBodyContext
|
||||
@tree.openElements.pop
|
||||
@tree.open_elements.pop
|
||||
@parser.phase = @parser.phases[:inTable]
|
||||
else
|
||||
@parser.parseError(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
|
||||
parse_error(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
|
||||
end
|
||||
end
|
||||
|
||||
def endTagTable(name)
|
||||
if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
|
||||
clearStackToTableBodyContext
|
||||
endTagTableRowGroup(@tree.openElements[-1].name)
|
||||
endTagTableRowGroup(@tree.open_elements.last.name)
|
||||
@parser.phase.processEndTag(name)
|
||||
else
|
||||
# innerHTML case
|
||||
@parser.parseError
|
||||
# inner_html case
|
||||
parse_error
|
||||
end
|
||||
end
|
||||
|
||||
def endTagIgnore(name)
|
||||
@parser.parseError(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
|
||||
parse_error(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
|
@ -73,9 +73,9 @@ module HTML5lib
|
|||
protected
|
||||
|
||||
def clearStackToTableBodyContext
|
||||
until ['tbody', 'tfoot', 'thead', 'html'].include?(name = @tree.openElements[-1].name)
|
||||
@parser.parseError(_("Unexpected implied end tag (#{name}) in the table body phase."))
|
||||
@tree.openElements.pop
|
||||
until %w[tbody tfoot thead html].include?(name = @tree.open_elements.last.name)
|
||||
parse_error(_("Unexpected implied end tag (#{name}) in the table body phase."))
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
end
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
require 'html5lib/html5parser/phase'
|
||||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
class InTablePhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-table
|
||||
|
@ -12,24 +12,24 @@ module HTML5lib
|
|||
handle_end 'table', %w( body caption col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
|
||||
|
||||
def processCharacters(data)
|
||||
@parser.parseError(_("Unexpected non-space characters in table context caused voodoo mode."))
|
||||
parse_error(_("Unexpected non-space characters in table context caused voodoo mode."))
|
||||
# Make all the special element rearranging voodoo kick in
|
||||
@tree.insertFromTable = true
|
||||
@tree.insert_from_table = true
|
||||
# Process the character in the "in body" mode
|
||||
@parser.phases[:inBody].processCharacters(data)
|
||||
@tree.insertFromTable = false
|
||||
@tree.insert_from_table = false
|
||||
end
|
||||
|
||||
def startTagCaption(name, attributes)
|
||||
clearStackToTableContext
|
||||
@tree.activeFormattingElements.push(Marker)
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.phase = @parser.phases[:inCaption]
|
||||
end
|
||||
|
||||
def startTagColgroup(name, attributes)
|
||||
clearStackToTableContext
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.phase = @parser.phases[:inColumnGroup]
|
||||
end
|
||||
|
||||
|
@ -40,7 +40,7 @@ module HTML5lib
|
|||
|
||||
def startTagRowGroup(name, attributes)
|
||||
clearStackToTableContext
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.phase = @parser.phases[:inTableBody]
|
||||
end
|
||||
|
||||
|
@ -50,60 +50,60 @@ module HTML5lib
|
|||
end
|
||||
|
||||
def startTagTable(name, attributes)
|
||||
@parser.parseError(_("Unexpected start tag (table) in table phase. Implies end tag (table)."))
|
||||
parse_error(_("Unexpected start tag (table) in table phase. Implies end tag (table)."))
|
||||
@parser.phase.processEndTag('table')
|
||||
@parser.phase.processStartTag(name, attributes) unless @parser.innerHTML
|
||||
@parser.phase.processStartTag(name, attributes) unless @parser.inner_html
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
@parser.parseError(_("Unexpected start tag (#{name}) in table context caused voodoo mode."))
|
||||
parse_error(_("Unexpected start tag (#{name}) in table context caused voodoo mode."))
|
||||
# Make all the special element rearranging voodoo kick in
|
||||
@tree.insertFromTable = true
|
||||
@tree.insert_from_table = true
|
||||
# Process the start tag in the "in body" mode
|
||||
@parser.phases[:inBody].processStartTag(name, attributes)
|
||||
@tree.insertFromTable = false
|
||||
@tree.insert_from_table = false
|
||||
end
|
||||
|
||||
def endTagTable(name)
|
||||
if in_scope?('table', true)
|
||||
@tree.generateImpliedEndTags
|
||||
|
||||
unless @tree.openElements[-1].name == 'table'
|
||||
@parser.parseError(_("Unexpected end tag (table). Expected end tag (#{@tree.openElements[-1].name})."))
|
||||
unless @tree.open_elements.last.name == 'table'
|
||||
parse_error(_("Unexpected end tag (table). Expected end tag (#{@tree.open_elements.last.name})."))
|
||||
end
|
||||
|
||||
remove_open_elements_until('table')
|
||||
|
||||
@parser.resetInsertionMode
|
||||
@parser.reset_insertion_mode
|
||||
else
|
||||
# innerHTML case
|
||||
assert @parser.innerHTML
|
||||
@parser.parseError
|
||||
# inner_html case
|
||||
assert @parser.inner_html
|
||||
parse_error
|
||||
end
|
||||
end
|
||||
|
||||
def endTagIgnore(name)
|
||||
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||
parse_error(_("Unexpected end tag (#{name}). Ignored."))
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
@parser.parseError(_("Unexpected end tag (#{name}) in table context caused voodoo mode."))
|
||||
parse_error(_("Unexpected end tag (#{name}) in table context caused voodoo mode."))
|
||||
# Make all the special element rearranging voodoo kick in
|
||||
@tree.insertFromTable = true
|
||||
@tree.insert_from_table = true
|
||||
# Process the end tag in the "in body" mode
|
||||
@parser.phases[:inBody].processEndTag(name)
|
||||
@tree.insertFromTable = false
|
||||
@tree.insert_from_table = false
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
def clearStackToTableContext
|
||||
# "clear the stack back to a table context"
|
||||
until ['table', 'html'].include?(name = @tree.openElements[-1].name)
|
||||
@parser.parseError(_("Unexpected implied end tag (#{name}) in the table phase."))
|
||||
@tree.openElements.pop
|
||||
until %w[table html].include?(name = @tree.open_elements.last.name)
|
||||
parse_error(_("Unexpected implied end tag (#{name}) in the table phase."))
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
# When the current node is <html> it's an innerHTML case
|
||||
# When the current node is <html> it's an inner_html case
|
||||
end
|
||||
|
||||
end
|
|
@ -1,28 +1,28 @@
|
|||
require 'html5lib/html5parser/phase'
|
||||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
class InitialPhase < Phase
|
||||
|
||||
# This phase deals with error handling as well which is currently not
|
||||
# covered in the specification. The error handling is typically known as
|
||||
# "quirks mode". It is expected that a future version of HTML5 will define this.
|
||||
|
||||
def processEOF
|
||||
@parser.parseError(_('Unexpected End of file. Expected DOCTYPE.'))
|
||||
def process_eof
|
||||
parse_error(_('Unexpected End of file. Expected DOCTYPE.'))
|
||||
@parser.phase = @parser.phases[:rootElement]
|
||||
@parser.phase.processEOF
|
||||
@parser.phase.process_eof
|
||||
end
|
||||
|
||||
def processComment(data)
|
||||
@tree.insertComment(data, @tree.document)
|
||||
@tree.insert_comment(data, @tree.document)
|
||||
end
|
||||
|
||||
def processDoctype(name, publicId, systemId, correct)
|
||||
if name.downcase != 'html' or publicId or systemId
|
||||
@parser.parseError(_('Erroneous DOCTYPE.'))
|
||||
parse_error(_('Erroneous DOCTYPE.'))
|
||||
end
|
||||
# XXX need to update DOCTYPE tokens
|
||||
@tree.insertDoctype(name)
|
||||
@tree.insertDoctype(name, publicId, systemId)
|
||||
|
||||
publicId = publicId.to_s.upcase
|
||||
|
||||
|
@ -110,23 +110,22 @@ module HTML5lib
|
|||
end
|
||||
|
||||
def processSpaceCharacters(data)
|
||||
@tree.insertText(data, @tree.document)
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
@parser.parseError(_('Unexpected non-space characters. Expected DOCTYPE.'))
|
||||
parse_error(_('Unexpected non-space characters. Expected DOCTYPE.'))
|
||||
@parser.phase = @parser.phases[:rootElement]
|
||||
@parser.phase.processCharacters(data)
|
||||
end
|
||||
|
||||
def processStartTag(name, attributes)
|
||||
@parser.parseError(_("Unexpected start tag (#{name}). Expected DOCTYPE."))
|
||||
parse_error(_("Unexpected start tag (#{name}). Expected DOCTYPE."))
|
||||
@parser.phase = @parser.phases[:rootElement]
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def processEndTag(name)
|
||||
@parser.parseError(_("Unexpected end tag (#{name}). Expected DOCTYPE."))
|
||||
parse_error(_("Unexpected end tag (#{name}). Expected DOCTYPE."))
|
||||
@parser.phase = @parser.phases[:rootElement]
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
|
@ -1,4 +1,4 @@
|
|||
module HTML5lib
|
||||
module HTML5
|
||||
# Base class for helper objects that implement each phase of processing.
|
||||
#
|
||||
# Handler methods should be in the following order (they can be omitted):
|
||||
|
@ -15,9 +15,12 @@ module HTML5lib
|
|||
#
|
||||
class Phase
|
||||
|
||||
extend Forwardable
|
||||
def_delegators :@parser, :parse_error
|
||||
|
||||
# The following example call:
|
||||
#
|
||||
# tag_handlers('startTag', 'html', %( base link meta ), %( li dt dd ) => 'ListItem')
|
||||
# tag_handlers('startTag', 'html', %w( base link meta ), %w( li dt dd ) => 'ListItem')
|
||||
#
|
||||
# ...would return a hash equal to this:
|
||||
#
|
||||
|
@ -42,7 +45,7 @@ module HTML5lib
|
|||
handler_method = prefix + names.map {|name| name.capitalize }.join
|
||||
names.each {|name| mapping[name] = handler_method }
|
||||
end
|
||||
return mapping
|
||||
mapping
|
||||
end
|
||||
|
||||
def self.start_tag_handlers
|
||||
|
@ -80,17 +83,17 @@ module HTML5lib
|
|||
@parser, @tree = parser, tree
|
||||
end
|
||||
|
||||
def processEOF
|
||||
def process_eof
|
||||
@tree.generateImpliedEndTags
|
||||
|
||||
if @tree.openElements.length > 2
|
||||
@parser.parseError(_('Unexpected end of file. Missing closing tags.'))
|
||||
elsif @tree.openElements.length == 2 and @tree.openElements[1].name != 'body'
|
||||
if @tree.open_elements.length > 2
|
||||
parse_error(_('Unexpected end of file. Missing closing tags.'))
|
||||
elsif @tree.open_elements.length == 2 and @tree.open_elements[1].name != 'body'
|
||||
# This happens for framesets or something?
|
||||
@parser.parseError(_("Unexpected end of file. Expected end tag (#{@tree.openElements[1].name}) first."))
|
||||
elsif @parser.innerHTML and @tree.openElements.length > 1
|
||||
parse_error(_("Unexpected end of file. Expected end tag (#{@tree.open_elements[1].name}) first."))
|
||||
elsif @parser.inner_html and @tree.open_elements.length > 1
|
||||
# XXX This is not what the specification says. Not sure what to do here.
|
||||
@parser.parseError(_('XXX innerHTML EOF'))
|
||||
parse_error(_('XXX inner_html EOF'))
|
||||
end
|
||||
# Betting ends.
|
||||
end
|
||||
|
@ -98,11 +101,11 @@ module HTML5lib
|
|||
def processComment(data)
|
||||
# For most phases the following is correct. Where it's not it will be
|
||||
# overridden.
|
||||
@tree.insertComment(data, @tree.openElements[-1])
|
||||
@tree.insert_comment(data, @tree.open_elements.last)
|
||||
end
|
||||
|
||||
def processDoctype(name, publicId, systemId, correct)
|
||||
@parser.parseError(_('Unexpected DOCTYPE. Ignored.'))
|
||||
parse_error(_('Unexpected DOCTYPE. Ignored.'))
|
||||
end
|
||||
|
||||
def processSpaceCharacters(data)
|
||||
|
@ -114,17 +117,17 @@ module HTML5lib
|
|||
end
|
||||
|
||||
def startTagHtml(name, attributes)
|
||||
if @parser.firstStartTag == false and name == 'html'
|
||||
@parser.parseError(_('html needs to be the first start tag.'))
|
||||
if @parser.first_start_tag == false and name == 'html'
|
||||
parse_error(_('html needs to be the first start tag.'))
|
||||
end
|
||||
# XXX Need a check here to see if the first start tag token emitted is
|
||||
# this token... If it's not, invoke @parser.parseError.
|
||||
# this token... If it's not, invoke parse_error.
|
||||
attributes.each do |attr, value|
|
||||
unless @tree.openElements[0].attributes.has_key?(attr)
|
||||
@tree.openElements[0].attributes[attr] = value
|
||||
unless @tree.open_elements.first.attributes.has_key?(attr)
|
||||
@tree.open_elements.first.attributes[attr] = value
|
||||
end
|
||||
end
|
||||
@parser.firstStartTag = false
|
||||
@parser.first_start_tag = false
|
||||
end
|
||||
|
||||
def processEndTag(name)
|
||||
|
@ -146,11 +149,10 @@ module HTML5lib
|
|||
def remove_open_elements_until(name=nil)
|
||||
finished = false
|
||||
until finished
|
||||
element = @tree.openElements.pop
|
||||
element = @tree.open_elements.pop
|
||||
finished = name.nil? ? yield(element) : element.name == name
|
||||
end
|
||||
return element
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -1,40 +1,39 @@
|
|||
require 'html5lib/html5parser/phase'
|
||||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
class RootElementPhase < Phase
|
||||
|
||||
def processEOF
|
||||
insertHtmlElement
|
||||
@parser.phase.processEOF
|
||||
def process_eof
|
||||
insert_html_element
|
||||
@parser.phase.process_eof
|
||||
end
|
||||
|
||||
def processComment(data)
|
||||
@tree.insertComment(data, @tree.document)
|
||||
@tree.insert_comment(data, @tree.document)
|
||||
end
|
||||
|
||||
def processSpaceCharacters(data)
|
||||
@tree.insertText(data, @tree.document)
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
insertHtmlElement
|
||||
insert_html_element
|
||||
@parser.phase.processCharacters(data)
|
||||
end
|
||||
|
||||
def processStartTag(name, attributes)
|
||||
@parser.firstStartTag = true if name == 'html'
|
||||
insertHtmlElement
|
||||
@parser.first_start_tag = true if name == 'html'
|
||||
insert_html_element
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def processEndTag(name)
|
||||
insertHtmlElement
|
||||
insert_html_element
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
|
||||
def insertHtmlElement
|
||||
def insert_html_element
|
||||
element = @tree.createElement('html', {})
|
||||
@tree.openElements.push(element)
|
||||
@tree.open_elements.push(element)
|
||||
@tree.document.appendChild(element)
|
||||
@parser.phase = @parser.phases[:beforeHead]
|
||||
end
|
35
vendor/plugins/HTML5lib/lib/html5/html5parser/trailing_end_phase.rb
vendored
Normal file
35
vendor/plugins/HTML5lib/lib/html5/html5parser/trailing_end_phase.rb
vendored
Normal file
|
@ -0,0 +1,35 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class TrailingEndPhase < Phase
|
||||
|
||||
def process_eof
|
||||
end
|
||||
|
||||
def processComment(data)
|
||||
@tree.insert_comment(data, @tree.document)
|
||||
end
|
||||
|
||||
def processSpaceCharacters(data)
|
||||
@parser.last_phase.processSpaceCharacters(data)
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
parse_error(_('Unexpected non-space characters. Expected end of file.'))
|
||||
@parser.phase = @parser.last_phase
|
||||
@parser.phase.processCharacters(data)
|
||||
end
|
||||
|
||||
def processStartTag(name, attributes)
|
||||
parse_error(_('Unexpected start tag (#{name}). Expected end of file.'))
|
||||
@parser.phase = @parser.last_phase
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def processEndTag(name)
|
||||
parse_error(_('Unexpected end tag (#{name}). Expected end of file.'))
|
||||
@parser.phase = @parser.last_phase
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,7 +1,7 @@
|
|||
require 'stringio'
|
||||
require 'html5lib/constants'
|
||||
require 'html5/constants'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
|
||||
# Provides a unicode stream of characters to the HTMLTokenizer.
|
||||
|
||||
|
@ -10,7 +10,7 @@ module HTML5lib
|
|||
|
||||
class HTMLInputStream
|
||||
|
||||
attr_accessor :queue, :char_encoding
|
||||
attr_accessor :queue, :char_encoding, :errors
|
||||
|
||||
# Initialises the HTMLInputStream.
|
||||
#
|
||||
|
@ -40,25 +40,31 @@ module HTML5lib
|
|||
#Number of bytes to use when looking for a meta element with
|
||||
#encoding information
|
||||
@NUM_BYTES_META = 512
|
||||
#Number of bytes to use when using detecting encoding using chardet
|
||||
@NUM_BYTES_CHARDET = 256
|
||||
#Number of bytes to use when reading content
|
||||
@NUM_BYTES_BUFFER = 1024
|
||||
|
||||
#Encoding to use if no other information can be found
|
||||
@DEFAULT_ENCODING = 'windows-1252'
|
||||
|
||||
#Detect encoding iff no explicit "transport level" encoding is supplied
|
||||
if @encoding.nil? or not HTML5lib.is_valid_encoding(@encoding)
|
||||
if @encoding.nil? or not HTML5.is_valid_encoding(@encoding)
|
||||
@char_encoding = detect_encoding
|
||||
else
|
||||
@char_encoding = @encoding
|
||||
end
|
||||
|
||||
# Read bytes from stream decoding them into Unicode
|
||||
uString = @raw_stream.read
|
||||
@buffer = @raw_stream.read(@NUM_BYTES_BUFFER) || ''
|
||||
if @char_encoding == 'windows-1252'
|
||||
@win1252 = true
|
||||
elsif @char_encoding != 'utf-8'
|
||||
begin
|
||||
require 'iconv'
|
||||
begin
|
||||
uString = Iconv.iconv('utf-8', @char_encoding, uString).first
|
||||
@buffer << @raw_stream.read unless @raw_stream.eof?
|
||||
@buffer = Iconv.iconv('utf-8', @char_encoding, @buffer).first
|
||||
rescue
|
||||
@win1252 = true
|
||||
end
|
||||
|
@ -67,10 +73,8 @@ module HTML5lib
|
|||
end
|
||||
end
|
||||
|
||||
# Convert the unicode string into a list to be used as the data stream
|
||||
@data_stream = uString
|
||||
|
||||
@queue = []
|
||||
@errors = []
|
||||
|
||||
# Reset position in the list to read from
|
||||
@tell = 0
|
||||
|
@ -109,9 +113,22 @@ module HTML5lib
|
|||
begin
|
||||
require 'rubygems'
|
||||
require 'UniversalDetector' # gem install chardet
|
||||
buffer = @raw_stream.read
|
||||
encoding = UniversalDetector::chardet(buffer)['encoding']
|
||||
seek(buffer, 0)
|
||||
buffers = []
|
||||
detector = UniversalDetector::Detector.instance
|
||||
detector.reset
|
||||
until @raw_stream.eof?
|
||||
buffer = @raw_stream.read(@NUM_BYTES_CHARDET)
|
||||
break if !buffer or buffer.empty?
|
||||
buffers << buffer
|
||||
detector.feed(buffer)
|
||||
break if detector.instance_eval {@done}
|
||||
detector.instance_eval {
|
||||
@_mLastChar = @_mLastChar.chr if Fixnum === @_mLastChar
|
||||
}
|
||||
end
|
||||
detector.close
|
||||
encoding = detector.result['encoding']
|
||||
seek(buffers*'', 0)
|
||||
rescue LoadError
|
||||
end
|
||||
end
|
||||
|
@ -242,14 +259,20 @@ module HTML5lib
|
|||
unless @queue.empty?
|
||||
return @queue.shift
|
||||
else
|
||||
c = @data_stream[@tell]
|
||||
if @tell + 3 > @buffer.length and !@raw_stream.eof?
|
||||
# read next block
|
||||
@buffer = @buffer[@tell .. -1] + @raw_stream.read(@NUM_BYTES_BUFFER)
|
||||
@tell = 0
|
||||
end
|
||||
|
||||
c = @buffer[@tell]
|
||||
@tell += 1
|
||||
|
||||
case c
|
||||
when 0x01 .. 0x7F
|
||||
if c == 0x0D
|
||||
# normalize newlines
|
||||
@tell += 1 if @data_stream[@tell] == 0x0A
|
||||
@tell += 1 if @buffer[@tell] == 0x0A
|
||||
c = 0x0A
|
||||
end
|
||||
|
||||
|
@ -274,9 +297,9 @@ module HTML5lib
|
|||
end
|
||||
|
||||
when 0xC0 .. 0xFF
|
||||
if @win1252
|
||||
if instance_variables.include?("@win1252") && @win1252
|
||||
"\xC3" + (c-64).chr # convert to utf-8
|
||||
elsif @data_stream[@tell-1 .. -1] =~ /^
|
||||
elsif @buffer[@tell-1 .. @tell+3] =~ /^
|
||||
( [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
|
||||
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
|
||||
| [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
|
||||
|
@ -292,6 +315,8 @@ module HTML5lib
|
|||
end
|
||||
|
||||
when 0x00
|
||||
@errors.push('null character found in input stream, ' +
|
||||
'replaced with U+FFFD')
|
||||
[0xFFFD].pack('U') # null characters are invalid
|
||||
|
||||
else
|
||||
|
@ -317,6 +342,10 @@ module HTML5lib
|
|||
@queue.insert(0, c) unless c == :EOF
|
||||
return char_stack.join('')
|
||||
end
|
||||
|
||||
def unget(characters)
|
||||
@queue.unshift(*characters.to_a) unless characters == :EOF
|
||||
end
|
||||
end
|
||||
|
||||
# String-like object with an assosiated position and various extra methods
|
||||
|
@ -433,14 +462,14 @@ module HTML5lib
|
|||
|
||||
if attr[0] == 'charset'
|
||||
tentative_encoding = attr[1]
|
||||
if HTML5lib.is_valid_encoding(tentative_encoding)
|
||||
if HTML5.is_valid_encoding(tentative_encoding)
|
||||
@encoding = tentative_encoding
|
||||
return false
|
||||
end
|
||||
elsif attr[0] == 'content'
|
||||
content_parser = ContentAttrParser.new(EncodingBytes.new(attr[1]))
|
||||
tentative_encoding = content_parser.parse
|
||||
if HTML5lib.is_valid_encoding(tentative_encoding)
|
||||
if HTML5.is_valid_encoding(tentative_encoding)
|
||||
@encoding = tentative_encoding
|
||||
return false
|
||||
end
|
|
@ -11,10 +11,10 @@
|
|||
#
|
||||
# @@TODO:
|
||||
# * Selectively lowercase only XHTML, but not foreign markup
|
||||
require 'html5lib/html5parser'
|
||||
require 'html5lib/constants'
|
||||
require 'html5/html5parser'
|
||||
require 'html5/constants'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
|
||||
# liberal XML parser
|
||||
class XMLParser < HTMLParser
|
||||
|
@ -24,26 +24,36 @@ module HTML5lib
|
|||
@phases[:initial] = XmlRootPhase.new(self, @tree)
|
||||
end
|
||||
|
||||
def normalizeToken(token)
|
||||
if token[:type] == :StartTag or token[:type] == :EmptyTag
|
||||
def normalize_token(token)
|
||||
case token[:type]
|
||||
when :StartTag, :EmptyTag
|
||||
# We need to remove the duplicate attributes and convert attributes
|
||||
# to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
|
||||
# to a Hash so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
|
||||
|
||||
token[:data] = Hash[*token[:data].reverse.flatten]
|
||||
|
||||
# For EmptyTags, process both a Start and an End tag
|
||||
if token[:type] == :EmptyTag
|
||||
save = @tokenizer.content_model_flag
|
||||
@phase.processStartTag(token[:name], token[:data])
|
||||
@tokenizer.content_model_flag = save
|
||||
token[:data] = {}
|
||||
token[:type] = :EndTag
|
||||
end
|
||||
|
||||
elsif token[:type] == :EndTag
|
||||
if token[:data]
|
||||
parseError(_("End tag contains unexpected attributes."))
|
||||
when :Characters
|
||||
# un-escape RCDATA_ELEMENTS (e.g. style, script)
|
||||
if @tokenizer.content_model_flag == :CDATA
|
||||
token[:data] = token[:data].
|
||||
gsub('<','<').gsub('>','>').gsub('&','&')
|
||||
end
|
||||
|
||||
elsif token[:type] == :Comment
|
||||
when :EndTag
|
||||
if token[:data]
|
||||
parse_error(_("End tag contains unexpected attributes."))
|
||||
end
|
||||
|
||||
when :Comment
|
||||
# Rescue CDATA from the comments
|
||||
if token[:data][0..6] == "[CDATA[" and token[:data][-2..-1] == "]]"
|
||||
token[:type] = :Characters
|
||||
|
@ -64,22 +74,22 @@ module HTML5lib
|
|||
@phases[:rootElement] = XhmlRootPhase.new(self, @tree)
|
||||
end
|
||||
|
||||
def normalizeToken(token)
|
||||
def normalize_token(token)
|
||||
super(token)
|
||||
|
||||
# ensure that non-void XHTML elements have content so that separate
|
||||
# open and close tags are emitted
|
||||
if token[:type] == :EndTag
|
||||
if VOID_ELEMENTS.include? token[:name]
|
||||
if @tree.openElements[-1].name != token["name"]:
|
||||
if @tree.open_elements[-1].name != token["name"]:
|
||||
token[:type] = :EmptyTag
|
||||
token["data"] ||= {}
|
||||
end
|
||||
else
|
||||
if token[:name] == @tree.openElements[-1].name and \
|
||||
not @tree.openElements[-1].hasContent
|
||||
if token[:name] == @tree.open_elements[-1].name and \
|
||||
not @tree.open_elements[-1].hasContent
|
||||
@tree.insertText('') unless
|
||||
@tree.openElements.any? {|e|
|
||||
@tree.open_elements.any? {|e|
|
||||
e.attributes.keys.include? 'xmlns' and
|
||||
e.attributes['xmlns'] != 'http://www.w3.org/1999/xhtml'
|
||||
}
|
||||
|
@ -92,9 +102,9 @@ module HTML5lib
|
|||
end
|
||||
|
||||
class XhmlRootPhase < RootElementPhase
|
||||
def insertHtmlElement
|
||||
def insert_html_element
|
||||
element = @tree.createElement("html", {'xmlns' => 'http://www.w3.org/1999/xhtml'})
|
||||
@tree.openElements.push(element)
|
||||
@tree.open_elements.push(element)
|
||||
@tree.document.appendChild(element)
|
||||
@parser.phase = @parser.phases[:beforeHead]
|
||||
end
|
||||
|
@ -105,15 +115,15 @@ module HTML5lib
|
|||
@start_tag_handlers = Hash.new(:startTagOther)
|
||||
@end_tag_handlers = Hash.new(:endTagOther)
|
||||
def startTagOther(name, attributes)
|
||||
@tree.openElements.push(@tree.document)
|
||||
@tree.open_elements.push(@tree.document)
|
||||
element = @tree.createElement(name, attributes)
|
||||
@tree.openElements[-1].appendChild(element)
|
||||
@tree.openElements.push(element)
|
||||
@tree.open_elements[-1].appendChild(element)
|
||||
@tree.open_elements.push(element)
|
||||
@parser.phase = XmlElementPhase.new(@parser,@tree)
|
||||
end
|
||||
def endTagOther(name)
|
||||
super
|
||||
@tree.openElements.pop
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -125,17 +135,17 @@ module HTML5lib
|
|||
|
||||
def startTagOther(name, attributes)
|
||||
element = @tree.createElement(name, attributes)
|
||||
@tree.openElements[-1].appendChild(element)
|
||||
@tree.openElements.push(element)
|
||||
@tree.open_elements[-1].appendChild(element)
|
||||
@tree.open_elements.push(element)
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
for node in @tree.openElements.reverse
|
||||
for node in @tree.open_elements.reverse
|
||||
if node.name == name
|
||||
{} while @tree.openElements.pop != node
|
||||
{} while @tree.open_elements.pop != node
|
||||
break
|
||||
else
|
||||
@parser.parseError
|
||||
parse_error
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,6 +1,7 @@
|
|||
require 'cgi'
|
||||
require 'html5/tokenizer'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
|
||||
# This module provides sanitization of XHTML+MathML+SVG
|
||||
# and of inline style attributes.
|
||||
|
@ -12,7 +13,7 @@ module HTML5lib
|
|||
# or, if you already have a parse tree (in this example, a REXML tree),
|
||||
# at the Serializer stage:
|
||||
#
|
||||
# tokens = TreeWalkers.getTreeWalker('rexml').new(tree)
|
||||
# tokens = TreeWalkers.get_tree_walker('rexml').new(tree)
|
||||
# HTMLSerializer.serialize(tokens, {:encoding=>'utf-8',
|
||||
# :sanitize => true})
|
||||
|
2
vendor/plugins/HTML5lib/lib/html5/serializer.rb
vendored
Normal file
2
vendor/plugins/HTML5lib/lib/html5/serializer.rb
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
require 'html5/serializer/htmlserializer'
|
||||
require 'html5/serializer/xhtmlserializer'
|
|
@ -1,6 +1,6 @@
|
|||
require 'html5lib/constants'
|
||||
require 'html5/constants'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
|
||||
class HTMLSerializer
|
||||
|
||||
|
@ -21,6 +21,7 @@ module HTML5lib
|
|||
@use_trailing_solidus = false
|
||||
@space_before_trailing_solidus = true
|
||||
@escape_lt_in_attrs = false
|
||||
@escape_rcdata = false
|
||||
|
||||
@omit_optional_tags = true
|
||||
@sanitize = false
|
||||
|
@ -43,22 +44,22 @@ module HTML5lib
|
|||
@errors = []
|
||||
|
||||
if encoding and @inject_meta_charset
|
||||
require 'html5lib/filters/inject_meta_charset'
|
||||
require 'html5/filters/inject_meta_charset'
|
||||
treewalker = Filters::InjectMetaCharset.new(treewalker, encoding)
|
||||
end
|
||||
|
||||
if @strip_whitespace
|
||||
require 'html5lib/filters/whitespace'
|
||||
require 'html5/filters/whitespace'
|
||||
treewalker = Filters::WhitespaceFilter.new(treewalker)
|
||||
end
|
||||
|
||||
if @sanitize
|
||||
require 'html5lib/filters/sanitizer'
|
||||
require 'html5/filters/sanitizer'
|
||||
treewalker = Filters::HTMLSanitizeFilter.new(treewalker)
|
||||
end
|
||||
|
||||
if @omit_optional_tags
|
||||
require 'html5lib/filters/optionaltags'
|
||||
require 'html5/filters/optionaltags'
|
||||
treewalker = Filters::OptionalTagFilter.new(treewalker)
|
||||
end
|
||||
|
||||
|
@ -72,7 +73,7 @@ module HTML5lib
|
|||
elsif [:Characters, :SpaceCharacters].include? type
|
||||
if type == :SpaceCharacters or in_cdata
|
||||
if in_cdata and token[:data].include?("</")
|
||||
serializeError(_("Unexpected </ in CDATA"))
|
||||
serialize_error(_("Unexpected </ in CDATA"))
|
||||
end
|
||||
result << token[:data]
|
||||
else
|
||||
|
@ -81,10 +82,10 @@ module HTML5lib
|
|||
|
||||
elsif [:StartTag, :EmptyTag].include? type
|
||||
name = token[:name]
|
||||
if RCDATA_ELEMENTS.include?(name)
|
||||
if RCDATA_ELEMENTS.include?(name) and not @escape_rcdata
|
||||
in_cdata = true
|
||||
elsif in_cdata
|
||||
serializeError(_("Unexpected child element of a CDATA element"))
|
||||
serialize_error(_("Unexpected child element of a CDATA element"))
|
||||
end
|
||||
attributes = []
|
||||
for k,v in attrs = token[:data].to_a.sort
|
||||
|
@ -136,19 +137,19 @@ module HTML5lib
|
|||
if RCDATA_ELEMENTS.include?(name)
|
||||
in_cdata = false
|
||||
elsif in_cdata
|
||||
serializeError(_("Unexpected child element of a CDATA element"))
|
||||
serialize_error(_("Unexpected child element of a CDATA element"))
|
||||
end
|
||||
end_tag = "</#{name}>"
|
||||
result << end_tag
|
||||
|
||||
elsif type == :Comment
|
||||
data = token[:data]
|
||||
serializeError(_("Comment contains --")) if data.index("--")
|
||||
serialize_error(_("Comment contains --")) if data.index("--")
|
||||
comment = "<!--%s-->" % token[:data]
|
||||
result << comment
|
||||
|
||||
else
|
||||
serializeError(token[:data])
|
||||
serialize_error(token[:data])
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -162,13 +163,15 @@ module HTML5lib
|
|||
|
||||
alias :render :serialize
|
||||
|
||||
def serializeError(data="XXX ERROR MESSAGE NEEDED")
|
||||
def serialize_error(data="XXX ERROR MESSAGE NEEDED")
|
||||
# XXX The idea is to make data mandatory.
|
||||
@errors.push(data)
|
||||
if @strict
|
||||
raise SerializeError
|
||||
end
|
||||
end
|
||||
|
||||
def _(string); string; end
|
||||
end
|
||||
|
||||
# Error in serialized tree
|
20
vendor/plugins/HTML5lib/lib/html5/serializer/xhtmlserializer.rb
vendored
Normal file
20
vendor/plugins/HTML5lib/lib/html5/serializer/xhtmlserializer.rb
vendored
Normal file
|
@ -0,0 +1,20 @@
|
|||
require 'html5/serializer/htmlserializer'
|
||||
|
||||
module HTML5
|
||||
|
||||
class XHTMLSerializer < HTMLSerializer
|
||||
DEFAULTS = {
|
||||
:quote_attr_values => true,
|
||||
:minimize_boolean_attributes => false,
|
||||
:use_trailing_solidus => true,
|
||||
:escape_lt_in_attrs => true,
|
||||
:omit_optional_tags => false,
|
||||
:escape_rcdata => true
|
||||
}
|
||||
|
||||
def initialize(options={})
|
||||
super(DEFAULTS.clone.update(options))
|
||||
end
|
||||
end
|
||||
|
||||
end
|
45
vendor/plugins/HTML5lib/lib/html5/sniffer.rb
vendored
Normal file
45
vendor/plugins/HTML5lib/lib/html5/sniffer.rb
vendored
Normal file
|
@ -0,0 +1,45 @@
|
|||
module HTML5
|
||||
module Sniffer
|
||||
# 4.7.4
|
||||
def html_or_feed str
|
||||
s = str[0, 512] # steps 1, 2
|
||||
pos = 0
|
||||
|
||||
while pos < s.length
|
||||
case s[pos]
|
||||
when 0x09, 0x20, 0x0A, 0x0D # tab, space, LF, CR
|
||||
pos += 1
|
||||
when 0x3C # "<"
|
||||
pos += 1
|
||||
if s[pos..pos+2] == "!--" # [0x21, 0x2D, 0x2D]
|
||||
pos += 3
|
||||
until s[pos..pos+2] == "-->" or pos >= s.length
|
||||
pos += 1
|
||||
end
|
||||
pos += 3
|
||||
elsif s[pos] == 0x21 # "!"
|
||||
pos += 1
|
||||
until s[pos] == 0x3E or pos >= s.length # ">"
|
||||
pos += 1
|
||||
end
|
||||
pos += 1
|
||||
elsif s[pos] == 0x3F # "?"
|
||||
until s[pos..pos+1] == "?>" or pos >= s.length # [0x3F, 0x3E]
|
||||
pos += 1
|
||||
end
|
||||
pos += 2
|
||||
elsif s[pos..pos+2] == "rss" # [0x72, 0x73, 0x73]
|
||||
return "application/rss+xml"
|
||||
elsif s[pos..pos+3] == "feed" # [0x66, 0x65, 0x65, 0x64]
|
||||
return "application/atom+xml"
|
||||
elsif s[pos..pos+6] == "rdf:RDF" # [0x72, 0x64, 0x66, 0x3A, 0x52, 0x44, 0x46]
|
||||
raise NotImplementedError
|
||||
end
|
||||
else
|
||||
break
|
||||
end
|
||||
end
|
||||
"text/html"
|
||||
end
|
||||
end
|
||||
end
|
968
vendor/plugins/HTML5lib/lib/html5/tokenizer.rb
vendored
Normal file
968
vendor/plugins/HTML5lib/lib/html5/tokenizer.rb
vendored
Normal file
|
@ -0,0 +1,968 @@
|
|||
require 'html5/constants'
|
||||
require 'html5/inputstream'
|
||||
|
||||
module HTML5
|
||||
|
||||
# This class takes care of tokenizing HTML.
|
||||
#
|
||||
# * @current_token
|
||||
# Holds the token that is currently being processed.
|
||||
#
|
||||
# * @state
|
||||
# Holds a reference to the method to be invoked... XXX
|
||||
#
|
||||
# * @states
|
||||
# Holds a mapping between states and methods that implement the state.
|
||||
#
|
||||
# * @stream
|
||||
# Points to HTMLInputStream object.
|
||||
|
||||
class HTMLTokenizer
|
||||
attr_accessor :content_model_flag, :current_token
|
||||
attr_reader :stream
|
||||
|
||||
# XXX need to fix documentation
|
||||
|
||||
def initialize(stream, options = {})
|
||||
@stream = HTMLInputStream.new(stream, options)
|
||||
|
||||
# Setup the initial tokenizer state
|
||||
@content_model_flag = :PCDATA
|
||||
@state = :data_state
|
||||
@escapeFlag = false
|
||||
@lastFourChars = []
|
||||
|
||||
# The current token being created
|
||||
@current_token = nil
|
||||
|
||||
# Tokens to be processed.
|
||||
@token_queue = []
|
||||
@lowercase_element_name = options[:lowercase_element_name] != false
|
||||
@lowercase_attr_name = options[:lowercase_attr_name] != false
|
||||
end
|
||||
|
||||
# This is where the magic happens.
|
||||
#
|
||||
# We do our usually processing through the states and when we have a token
|
||||
# to return we yield the token which pauses processing until the next token
|
||||
# is requested.
|
||||
def each
|
||||
@token_queue = []
|
||||
# Start processing. When EOF is reached @state will return false
|
||||
# instead of true and the loop will terminate.
|
||||
while send @state
|
||||
yield :type => :ParseError, :data => @stream.errors.shift until @stream.errors.empty?
|
||||
yield @token_queue.shift until @token_queue.empty?
|
||||
end
|
||||
end
|
||||
|
||||
# Below are various helper functions the tokenizer states use worked out.
|
||||
|
||||
# If the next character is a '>', convert the current_token into
|
||||
# an EmptyTag
|
||||
|
||||
def process_solidus_in_tag
|
||||
|
||||
# We need to consume another character to make sure it's a ">"
|
||||
data = @stream.char
|
||||
|
||||
if @current_token[:type] == :StartTag and data == ">"
|
||||
@current_token[:type] = :EmptyTag
|
||||
else
|
||||
@token_queue << {:type => :ParseError, :data => _("Solidus (/) incorrectly placed in tag.")}
|
||||
end
|
||||
|
||||
# The character we just consumed need to be put back on the stack so it
|
||||
# doesn't get lost...
|
||||
@stream.unget(data)
|
||||
end
|
||||
|
||||
# This function returns either U+FFFD or the character based on the
|
||||
# decimal or hexadecimal representation. It also discards ";" if present.
|
||||
# If not present @token_queue << {:type => :ParseError}" is invoked.
|
||||
|
||||
def consume_number_entity(isHex)
|
||||
|
||||
# XXX More need to be done here. For instance, #13 should prolly be
|
||||
# converted to #10 so we don't get \r (#13 is \r right?) in the DOM and
|
||||
# such. Thoughts on this appreciated.
|
||||
allowed = DIGITS
|
||||
radix = 10
|
||||
if isHex
|
||||
allowed = HEX_DIGITS
|
||||
radix = 16
|
||||
end
|
||||
|
||||
char_stack = []
|
||||
|
||||
# Consume all the characters that are in range while making sure we
|
||||
# don't hit an EOF.
|
||||
c = @stream.char
|
||||
while allowed.include?(c) and c != :EOF
|
||||
char_stack.push(c)
|
||||
c = @stream.char
|
||||
end
|
||||
|
||||
# Convert the set of characters consumed to an int.
|
||||
charAsInt = char_stack.join('').to_i(radix)
|
||||
|
||||
if charAsInt == 13
|
||||
@token_queue << {:type => :ParseError, :data => _("Incorrect CR newline entity. Replaced with LF.")}
|
||||
charAsInt = 10
|
||||
elsif (128..159).include? charAsInt
|
||||
# If the integer is between 127 and 160 (so 128 and bigger and 159
|
||||
# and smaller) we need to do the "windows trick".
|
||||
@token_queue << {:type => :ParseError, :data => _("Entity used with illegal number (windows-1252 reference).")}
|
||||
|
||||
charAsInt = ENTITIES_WINDOWS1252[charAsInt - 128]
|
||||
end
|
||||
|
||||
if 0 < charAsInt and charAsInt <= 1114111 and not (55296 <= charAsInt and charAsInt <= 57343)
|
||||
char = [charAsInt].pack('U')
|
||||
else
|
||||
char = [0xFFFD].pack('U')
|
||||
@token_queue << {:type => :ParseError, :data => _("Numeric entity represents an illegal codepoint.")}
|
||||
end
|
||||
|
||||
# Discard the ; if present. Otherwise, put it back on the queue and
|
||||
# invoke parse_error on parser.
|
||||
if c != ";"
|
||||
@token_queue << {:type => :ParseError, :data => _("Numeric entity didn't end with ';'.")}
|
||||
@stream.unget(c)
|
||||
end
|
||||
|
||||
return char
|
||||
end
|
||||
|
||||
def consume_entity(from_attribute=false)
|
||||
char = nil
|
||||
char_stack = [@stream.char]
|
||||
if SPACE_CHARACTERS.include?(char_stack[0]) or [:EOF, '<', '&'].include?(char_stack[0])
|
||||
@stream.unget(char_stack)
|
||||
elsif char_stack[0] == '#'
|
||||
# We might have a number entity here.
|
||||
char_stack += [@stream.char, @stream.char]
|
||||
if char_stack[0 .. 1].include? :EOF
|
||||
# If we reach the end of the file put everything up to :EOF
|
||||
# back in the queue
|
||||
char_stack = char_stack[0...char_stack.index(:EOF)]
|
||||
@stream.unget(char_stack)
|
||||
@token_queue << {:type => :ParseError, :data => _("Numeric entity expected. Got end of file instead.")}
|
||||
else
|
||||
if char_stack[1].downcase == "x" and HEX_DIGITS.include? char_stack[2]
|
||||
# Hexadecimal entity detected.
|
||||
@stream.unget(char_stack[2])
|
||||
char = consume_number_entity(true)
|
||||
elsif DIGITS.include? char_stack[1]
|
||||
# Decimal entity detected.
|
||||
@stream.unget(char_stack[1..-1])
|
||||
char = consume_number_entity(false)
|
||||
else
|
||||
# No number entity detected.
|
||||
@stream.unget(char_stack)
|
||||
@token_queue << {:type => :ParseError, :data => _("Numeric entity expected but none found.")}
|
||||
end
|
||||
end
|
||||
else
|
||||
# At this point in the process might have named entity. Entities
|
||||
# are stored in the global variable "entities".
|
||||
#
|
||||
# Consume characters and compare to these to a substring of the
|
||||
# entity names in the list until the substring no longer matches.
|
||||
filteredEntityList = ENTITIES.keys
|
||||
filteredEntityList.reject! {|e| e[0].chr != char_stack[0]}
|
||||
entityName = nil
|
||||
|
||||
# Try to find the longest entity the string will match to take care
|
||||
# of ¬i for instance.
|
||||
while char_stack.last != :EOF
|
||||
name = char_stack.join('')
|
||||
if filteredEntityList.any? {|e| e[0...name.length] == name}
|
||||
filteredEntityList.reject! {|e| e[0...name.length] != name}
|
||||
char_stack.push(@stream.char)
|
||||
else
|
||||
break
|
||||
end
|
||||
|
||||
if ENTITIES.include? name
|
||||
entityName = name
|
||||
break if entityName[-1] == ';'
|
||||
end
|
||||
end
|
||||
|
||||
if entityName != nil
|
||||
char = ENTITIES[entityName]
|
||||
|
||||
# Check whether or not the last character returned can be
|
||||
# discarded or needs to be put back.
|
||||
if entityName[-1] != ?;
|
||||
@token_queue << {:type => :ParseError, :data => _("Named entity didn't end with ';'.")}
|
||||
end
|
||||
|
||||
if char_stack[-1] != ";" and from_attribute and
|
||||
(ASCII_LETTERS.include?(char_stack[entityName.length]) or
|
||||
DIGITS.include?(char_stack[entityName.length]))
|
||||
@stream.unget(char_stack)
|
||||
char = '&'
|
||||
else
|
||||
@stream.unget(char_stack[entityName.length..-1])
|
||||
end
|
||||
else
|
||||
@token_queue << {:type => :ParseError, :data => _("Named entity expected. Got none.")}
|
||||
@stream.unget(char_stack)
|
||||
end
|
||||
end
|
||||
return char
|
||||
end
|
||||
|
||||
# This method replaces the need for "entityInAttributeValueState".
|
||||
def process_entity_in_attribute
|
||||
entity = consume_entity(true)
|
||||
if entity
|
||||
@current_token[:data][-1][1] += entity
|
||||
else
|
||||
@current_token[:data][-1][1] += "&"
|
||||
end
|
||||
end
|
||||
|
||||
# This method is a generic handler for emitting the tags. It also sets
|
||||
# the state to "data" because that's what's needed after a token has been
|
||||
# emitted.
|
||||
def emit_current_token
|
||||
# Add token to the queue to be yielded
|
||||
token = @current_token
|
||||
if [:StartTag, :EndTag, :EmptyTag].include?(token[:type])
|
||||
if @lowercase_element_name
|
||||
token[:name] = token[:name].downcase
|
||||
end
|
||||
@token_queue << token
|
||||
@state = :data_state
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
# Below are the various tokenizer states worked out.
|
||||
|
||||
# XXX AT Perhaps we should have Hixie run some evaluation on billions of
|
||||
# documents to figure out what the order of the various if and elsif
|
||||
# statements should be.
|
||||
def data_state
|
||||
data = @stream.char
|
||||
|
||||
if @content_model_flag == :CDATA or @content_model_flag == :RCDATA
|
||||
@lastFourChars << data
|
||||
@lastFourChars.shift if @lastFourChars.length > 4
|
||||
end
|
||||
|
||||
if data == "&" and [:PCDATA,:RCDATA].include?(@content_model_flag) and !@escapeFlag
|
||||
@state = :entity_data_state
|
||||
elsif data == "-" && [:CDATA, :RCDATA].include?(@content_model_flag) && !@escapeFlag && @lastFourChars.join('') == "<!--"
|
||||
@escapeFlag = true
|
||||
@token_queue << {:type => :Characters, :data => data}
|
||||
elsif data == "<" and !@escapeFlag and
|
||||
[:PCDATA,:CDATA,:RCDATA].include?(@content_model_flag)
|
||||
@state = :tag_open_state
|
||||
elsif data == ">" and @escapeFlag and
|
||||
[:CDATA,:RCDATA].include?(@content_model_flag) and
|
||||
@lastFourChars[1..-1].join('') == "-->"
|
||||
@escapeFlag = false
|
||||
@token_queue << {:type => :Characters, :data => data}
|
||||
|
||||
elsif data == :EOF
|
||||
# Tokenization ends.
|
||||
return false
|
||||
|
||||
elsif SPACE_CHARACTERS.include? data
|
||||
# Directly after emitting a token you switch back to the "data
|
||||
# state". At that point SPACE_CHARACTERS are important so they are
|
||||
# emitted separately.
|
||||
# XXX need to check if we don't need a special "spaces" flag on
|
||||
# characters.
|
||||
@token_queue << {:type => :SpaceCharacters, :data => data + @stream.chars_until(SPACE_CHARACTERS, true)}
|
||||
else
|
||||
@token_queue << {:type => :Characters, :data => data + @stream.chars_until(%w[& < > -])}
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def entity_data_state
|
||||
entity = consume_entity
|
||||
if entity
|
||||
@token_queue << {:type => :Characters, :data => entity}
|
||||
else
|
||||
@token_queue << {:type => :Characters, :data => "&"}
|
||||
end
|
||||
@state = :data_state
|
||||
return true
|
||||
end
|
||||
|
||||
def tag_open_state
|
||||
data = @stream.char
|
||||
if @content_model_flag == :PCDATA
|
||||
if data == "!"
|
||||
@state = :markup_declaration_open_state
|
||||
elsif data == "/"
|
||||
@state = :close_tag_open_state
|
||||
elsif data != :EOF and ASCII_LETTERS.include? data
|
||||
@current_token = {:type => :StartTag, :name => data, :data => []}
|
||||
@state = :tag_name_state
|
||||
elsif data == ">"
|
||||
# XXX In theory it could be something besides a tag name. But
|
||||
# do we really care?
|
||||
@token_queue << {:type => :ParseError, :data => _("Expected tag name. Got '>' instead.")}
|
||||
@token_queue << {:type => :Characters, :data => "<>"}
|
||||
@state = :data_state
|
||||
elsif data == "?"
|
||||
# XXX In theory it could be something besides a tag name. But
|
||||
# do we really care?
|
||||
@token_queue.push({:type => :ParseError, :data => _("Expected tag name. Got '?' instead (HTML doesn't " +
|
||||
"support processing instructions).")})
|
||||
@stream.unget(data)
|
||||
@state = :bogus_comment_state
|
||||
else
|
||||
# XXX
|
||||
@token_queue << {:type => :ParseError, :data => _("Expected tag name. Got something else instead")}
|
||||
@token_queue << {:type => :Characters, :data => "<"}
|
||||
@stream.unget(data)
|
||||
@state = :data_state
|
||||
end
|
||||
else
|
||||
# We know the content model flag is set to either RCDATA or CDATA
|
||||
# now because this state can never be entered with the PLAINTEXT
|
||||
# flag.
|
||||
if data == "/"
|
||||
@state = :close_tag_open_state
|
||||
else
|
||||
@token_queue << {:type => :Characters, :data => "<"}
|
||||
@stream.unget(data)
|
||||
@state = :data_state
|
||||
end
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def close_tag_open_state
|
||||
if (@content_model_flag == :RCDATA or @content_model_flag == :CDATA)
|
||||
if @current_token
|
||||
char_stack = []
|
||||
|
||||
# So far we know that "</" has been consumed. We now need to know
|
||||
# whether the next few characters match the name of last emitted
|
||||
# start tag which also happens to be the current_token. We also need
|
||||
# to have the character directly after the characters that could
|
||||
# match the start tag name.
|
||||
(@current_token[:name].length + 1).times do
|
||||
char_stack.push(@stream.char)
|
||||
# Make sure we don't get hit by :EOF
|
||||
break if char_stack[-1] == :EOF
|
||||
end
|
||||
|
||||
# Since this is just for checking. We put the characters back on
|
||||
# the stack.
|
||||
@stream.unget(char_stack)
|
||||
end
|
||||
|
||||
if @current_token and
|
||||
@current_token[:name].downcase ==
|
||||
char_stack[0...-1].join('').downcase and
|
||||
(SPACE_CHARACTERS + [">", "/", "<", :EOF]).include? char_stack[-1]
|
||||
# Because the characters are correct we can safely switch to
|
||||
# PCDATA mode now. This also means we don't have to do it when
|
||||
# emitting the end tag token.
|
||||
@content_model_flag = :PCDATA
|
||||
else
|
||||
@token_queue << {:type => :Characters, :data => "</"}
|
||||
@state = :data_state
|
||||
|
||||
# Need to return here since we don't want the rest of the
|
||||
# method to be walked through.
|
||||
return true
|
||||
end
|
||||
end
|
||||
|
||||
data = @stream.char
|
||||
if data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => _("Expected closing tag. Unexpected end of file.")}
|
||||
@token_queue << {:type => :Characters, :data => "</"}
|
||||
@state = :data_state
|
||||
elsif ASCII_LETTERS.include? data
|
||||
@current_token = {:type => :EndTag, :name => data, :data => []}
|
||||
@state = :tag_name_state
|
||||
elsif data == ">"
|
||||
@token_queue << {:type => :ParseError, :data => _("Expected closing tag. Got '>' instead. Ignoring '</>'.")}
|
||||
@state = :data_state
|
||||
else
|
||||
# XXX data can be _'_...
|
||||
@token_queue << {:type => :ParseError, :data => _("Expected closing tag. Unexpected character '#{data}' found.")}
|
||||
@stream.unget(data)
|
||||
@state = :bogus_comment_state
|
||||
end
|
||||
|
||||
return true
|
||||
end
|
||||
|
||||
def tag_name_state
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include? data
|
||||
@state = :before_attribute_name_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in the tag name.")}
|
||||
emit_current_token
|
||||
elsif ASCII_LETTERS.include? data
|
||||
@current_token[:name] += data + @stream.chars_until(ASCII_LETTERS, true)
|
||||
elsif data == ">"
|
||||
emit_current_token
|
||||
elsif data == "/"
|
||||
process_solidus_in_tag
|
||||
@state = :before_attribute_name_state
|
||||
else
|
||||
@current_token[:name] += data
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def before_attribute_name_state
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include? data
|
||||
@stream.chars_until(SPACE_CHARACTERS, true)
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file. Expected attribute name instead.")}
|
||||
emit_current_token
|
||||
elsif ASCII_LETTERS.include? data
|
||||
@current_token[:data].push([data, ""])
|
||||
@state = :attribute_name_state
|
||||
elsif data == ">"
|
||||
emit_current_token
|
||||
elsif data == "/"
|
||||
process_solidus_in_tag
|
||||
else
|
||||
@current_token[:data].push([data, ""])
|
||||
@state = :attribute_name_state
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def attribute_name_state
|
||||
data = @stream.char
|
||||
leavingThisState = true
|
||||
emitToken = false
|
||||
if data == "="
|
||||
@state = :before_attribute_value_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in attribute name.")}
|
||||
@state = :data_state
|
||||
emitToken = true
|
||||
elsif ASCII_LETTERS.include? data
|
||||
@current_token[:data][-1][0] += data + @stream.chars_until(ASCII_LETTERS, true)
|
||||
leavingThisState = false
|
||||
elsif data == ">"
|
||||
# XXX If we emit here the attributes are converted to a dict
|
||||
# without being checked and when the code below runs we error
|
||||
# because data is a dict not a list
|
||||
emitToken = true
|
||||
elsif SPACE_CHARACTERS.include? data
|
||||
@state = :after_attribute_name_state
|
||||
elsif data == "/"
|
||||
process_solidus_in_tag
|
||||
@state = :before_attribute_name_state
|
||||
else
|
||||
@current_token[:data][-1][0] += data
|
||||
leavingThisState = false
|
||||
end
|
||||
|
||||
if leavingThisState
|
||||
# Attributes are not dropped at this stage. That happens when the
|
||||
# start tag token is emitted so values can still be safely appended
|
||||
# to attributes, but we do want to report the parse error in time.
|
||||
if @lowercase_attr_name
|
||||
@current_token[:data][-1][0] = @current_token[:data].last.first.downcase
|
||||
end
|
||||
@current_token[:data][0...-1].each {|name,value|
|
||||
if @current_token[:data].last.first == name
|
||||
@token_queue << {:type => :ParseError, :data =>_("Dropped duplicate attribute on tag.")}
|
||||
break # don't report an error more than once
|
||||
end
|
||||
}
|
||||
# XXX Fix for above XXX
|
||||
emit_current_token if emitToken
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def after_attribute_name_state
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include? data
|
||||
@stream.chars_until(SPACE_CHARACTERS, true)
|
||||
elsif data == "="
|
||||
@state = :before_attribute_value_state
|
||||
elsif data == ">"
|
||||
emit_current_token
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file. Expected = or end of tag.")}
|
||||
emit_current_token
|
||||
elsif ASCII_LETTERS.include? data
|
||||
@current_token[:data].push([data, ""])
|
||||
@state = :attribute_name_state
|
||||
elsif data == "/"
|
||||
process_solidus_in_tag
|
||||
@state = :before_attribute_name_state
|
||||
else
|
||||
@current_token[:data].push([data, ""])
|
||||
@state = :attribute_name_state
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def before_attribute_value_state
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include? data
|
||||
@stream.chars_until(SPACE_CHARACTERS, true)
|
||||
elsif data == "\""
|
||||
@state = :attribute_value_double_quoted_state
|
||||
elsif data == "&"
|
||||
@state = :attribute_value_unquoted_state
|
||||
@stream.unget(data);
|
||||
elsif data == "'"
|
||||
@state = :attribute_value_single_quoted_state
|
||||
elsif data == ">"
|
||||
emit_current_token
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file. Expected attribute value.")}
|
||||
emit_current_token
|
||||
else
|
||||
@current_token[:data][-1][1] += data
|
||||
@state = :attribute_value_unquoted_state
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def attribute_value_double_quoted_state
|
||||
data = @stream.char
|
||||
if data == "\""
|
||||
@state = :before_attribute_name_state
|
||||
elsif data == "&"
|
||||
process_entity_in_attribute
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in attribute value (\").")}
|
||||
emit_current_token
|
||||
else
|
||||
@current_token[:data][-1][1] += data + @stream.chars_until(["\"", "&"])
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def attribute_value_single_quoted_state
|
||||
data = @stream.char
|
||||
if data == "'"
|
||||
@state = :before_attribute_name_state
|
||||
elsif data == "&"
|
||||
process_entity_in_attribute
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in attribute value (').")}
|
||||
emit_current_token
|
||||
else
|
||||
@current_token[:data][-1][1] += data +\
|
||||
@stream.chars_until(["'", "&"])
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def attribute_value_unquoted_state
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include? data
|
||||
@state = :before_attribute_name_state
|
||||
elsif data == "&"
|
||||
process_entity_in_attribute
|
||||
elsif data == ">"
|
||||
emit_current_token
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in attribute value.")}
|
||||
emit_current_token
|
||||
else
|
||||
@current_token[:data][-1][1] += data + @stream.chars_until(["&", ">","<"] + SPACE_CHARACTERS)
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def bogus_comment_state
|
||||
# Make a new comment token and give it as value all the characters
|
||||
# until the first > or :EOF (chars_until checks for :EOF automatically)
|
||||
# and emit it.
|
||||
@token_queue << {:type => :Comment, :data => @stream.chars_until((">"))}
|
||||
|
||||
# Eat the character directly after the bogus comment which is either a
|
||||
# ">" or an :EOF.
|
||||
@stream.char
|
||||
@state = :data_state
|
||||
return true
|
||||
end
|
||||
|
||||
def markup_declaration_open_state
|
||||
char_stack = [@stream.char, @stream.char]
|
||||
if char_stack == ["-", "-"]
|
||||
@current_token = {:type => :Comment, :data => ""}
|
||||
@state = :comment_start_state
|
||||
else
|
||||
5.times { char_stack.push(@stream.char) }
|
||||
# Put in explicit :EOF check
|
||||
if !char_stack.include?(:EOF) && char_stack.join("").upcase == "DOCTYPE"
|
||||
@current_token = {:type => :Doctype, :name => "", :publicId => nil, :systemId => nil, :correct => true}
|
||||
@state = :doctype_state
|
||||
else
|
||||
@token_queue << {:type => :ParseError, :data => _("Expected '--' or 'DOCTYPE'. Not found.")}
|
||||
@stream.unget(char_stack)
|
||||
@state = :bogus_comment_state
|
||||
end
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def comment_start_state
|
||||
data = @stream.char
|
||||
if data == "-"
|
||||
@state = :comment_start_dash_state
|
||||
elsif data == ">"
|
||||
@token_queue << {:type => :ParseError, :data => _("Incorrect comment.")}
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in comment.")}
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@current_token[:data] += data + @stream.chars_until("-")
|
||||
@state = :comment_state
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def comment_start_dash_state
|
||||
data = @stream.char
|
||||
if data == "-"
|
||||
@state = :comment_end_state
|
||||
elsif data == ">"
|
||||
@token_queue << {:type => :ParseError, :data => _("Incorrect comment.")}
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in comment.")}
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@current_token[:data] += '-' + data + @stream.chars_until("-")
|
||||
@state = :comment_state
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def comment_state
|
||||
data = @stream.char
|
||||
if data == "-"
|
||||
@state = :comment_end_dash_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in comment.")}
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@current_token[:data] += data + @stream.chars_until("-")
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def comment_end_dash_state
|
||||
data = @stream.char
|
||||
if data == "-"
|
||||
@state = :comment_end_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in comment (-)")}
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@current_token[:data] += "-" + data +\
|
||||
@stream.chars_until("-")
|
||||
# Consume the next character which is either a "-" or an :EOF as
|
||||
# well so if there's a "-" directly after the "-" we go nicely to
|
||||
# the "comment end state" without emitting a ParseError there.
|
||||
@stream.char
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def comment_end_state
|
||||
data = @stream.char
|
||||
if data == ">"
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
elsif data == "-"
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected '-' after '--' found in comment.")}
|
||||
@current_token[:data] += data
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in comment (--).")}
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
# XXX
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected character in comment found.")}
|
||||
@current_token[:data] += "--" + data
|
||||
@state = :comment_state
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def doctype_state
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include? data
|
||||
@state = :before_doctype_name_state
|
||||
else
|
||||
@token_queue << {:type => :ParseError, :data => _("No space after literal string 'DOCTYPE'.")}
|
||||
@stream.unget(data)
|
||||
@state = :before_doctype_name_state
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def before_doctype_name_state
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include? data
|
||||
elsif data == ">"
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected > character. Expected DOCTYPE name.")}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file. Expected DOCTYPE name.")}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@current_token[:name] = data
|
||||
@state = :doctype_name_state
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def doctype_name_state
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include? data
|
||||
@state = :after_doctype_name_state
|
||||
elsif data == ">"
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE name.")}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@current_token[:name] += data
|
||||
end
|
||||
|
||||
return true
|
||||
end
|
||||
|
||||
def after_doctype_name_state
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include? data
|
||||
elsif data == ">"
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
elsif data == :EOF
|
||||
@current_token[:correct] = false
|
||||
@stream.unget(data)
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
char_stack = [data]
|
||||
5.times { char_stack << stream.char }
|
||||
token = char_stack.join('').tr(ASCII_UPPERCASE,ASCII_LOWERCASE)
|
||||
if token == "public" and !char_stack.include?(:EOF)
|
||||
@state = :before_doctype_public_identifier_state
|
||||
elsif token == "system" and !char_stack.include?(:EOF)
|
||||
@state = :before_doctype_system_identifier_state
|
||||
else
|
||||
@stream.unget(char_stack)
|
||||
@token_queue << {:type => :ParseError, :data => _("Expected 'public' or 'system'. Got '#{token}'")}
|
||||
@state = :bogus_doctype_state
|
||||
end
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def before_doctype_public_identifier_state
|
||||
data = @stream.char
|
||||
|
||||
if SPACE_CHARACTERS.include?(data)
|
||||
elsif data == "\""
|
||||
@current_token[:publicId] = ""
|
||||
@state = :doctype_public_identifier_double_quoted_state
|
||||
elsif data == "'"
|
||||
@current_token[:publicId] = ""
|
||||
@state = :doctype_public_identifier_single_quoted_state
|
||||
elsif data == ">"
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected end of DOCTYPE.")}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected character in DOCTYPE.")}
|
||||
@state = :bogus_doctype_state
|
||||
end
|
||||
|
||||
return true
|
||||
end
|
||||
|
||||
def doctype_public_identifier_double_quoted_state
|
||||
data = @stream.char
|
||||
if data == "\""
|
||||
@state = :after_doctype_public_identifier_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@current_token[:publicId] += data
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def doctype_public_identifier_single_quoted_state
|
||||
data = @stream.char
|
||||
if data == "'"
|
||||
@state = :after_doctype_public_identifier_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@current_token[:publicId] += data
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def after_doctype_public_identifier_state
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include?(data)
|
||||
elsif data == "\""
|
||||
@current_token[:systemId] = ""
|
||||
@state = :doctype_system_identifier_double_quoted_state
|
||||
elsif data == "'"
|
||||
@current_token[:systemId] = ""
|
||||
@state = :doctype_system_identifier_single_quoted_state
|
||||
elsif data == ">"
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected character in DOCTYPE.")}
|
||||
@state = :bogus_doctype_state
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def before_doctype_system_identifier_state
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include?(data)
|
||||
elsif data == "\""
|
||||
@current_token[:systemId] = ""
|
||||
@state = :doctype_system_identifier_double_quoted_state
|
||||
elsif data == "'"
|
||||
@current_token[:systemId] = ""
|
||||
@state = :doctype_system_identifier_single_quoted_state
|
||||
elsif data == ">"
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected character in DOCTYPE.")}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected character in DOCTYPE.")}
|
||||
@state = :bogus_doctype_state
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def doctype_system_identifier_double_quoted_state
|
||||
data = @stream.char
|
||||
if data == "\""
|
||||
@state = :after_doctype_system_identifier_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@current_token[:systemId] += data
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def doctype_system_identifier_single_quoted_state
|
||||
data = @stream.char
|
||||
if data == "'"
|
||||
@state = :after_doctype_system_identifier_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@current_token[:systemId] += data
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def after_doctype_system_identifier_state
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include?(data)
|
||||
elsif data == ">"
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected character in DOCTYPE.")}
|
||||
@state = :bogus_doctype_state
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def bogus_doctype_state
|
||||
data = @stream.char
|
||||
@current_token[:correct] = false
|
||||
if data == ">"
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
elsif data == :EOF
|
||||
# XXX EMIT
|
||||
@stream.unget(data)
|
||||
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in bogus doctype.")}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def _(string); string; end
|
||||
end
|
||||
|
||||
end
|
|
@ -1,24 +1,24 @@
|
|||
module HTML5lib
|
||||
module HTML5
|
||||
module TreeBuilders
|
||||
|
||||
class << self
|
||||
def [](name)
|
||||
case name.to_s.downcase
|
||||
when 'simpletree' then
|
||||
require 'html5lib/treebuilders/simpletree'
|
||||
require 'html5/treebuilders/simpletree'
|
||||
SimpleTree::TreeBuilder
|
||||
when 'rexml' then
|
||||
require 'html5lib/treebuilders/rexml'
|
||||
require 'html5/treebuilders/rexml'
|
||||
REXML::TreeBuilder
|
||||
when 'hpricot' then
|
||||
require 'html5lib/treebuilders/hpricot'
|
||||
require 'html5/treebuilders/hpricot'
|
||||
Hpricot::TreeBuilder
|
||||
else
|
||||
raise "Unknown TreeBuilder #{name}"
|
||||
end
|
||||
end
|
||||
|
||||
alias :getTreeBuilder :[]
|
||||
alias :get_tree_builder :[]
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,8 +1,8 @@
|
|||
require 'html5lib/constants'
|
||||
require 'html5/constants'
|
||||
|
||||
#XXX - TODO; make the default interface more ElementTree-like rather than DOM-like
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
|
||||
# The scope markers are inserted when entering buttons, object elements,
|
||||
# marquees, table cells, and table captions, and are used to prevent formatting
|
||||
|
@ -76,13 +76,13 @@ module HTML5lib
|
|||
# Base treebuilder implementation
|
||||
class TreeBuilder
|
||||
|
||||
attr_accessor :openElements
|
||||
attr_accessor :open_elements
|
||||
|
||||
attr_accessor :activeFormattingElements
|
||||
|
||||
attr_accessor :document
|
||||
|
||||
attr_accessor :headPointer
|
||||
attr_accessor :head_pointer
|
||||
|
||||
attr_accessor :formPointer
|
||||
|
||||
|
@ -106,25 +106,25 @@ module HTML5lib
|
|||
end
|
||||
|
||||
def reset
|
||||
@openElements = []
|
||||
@open_elements = []
|
||||
@activeFormattingElements = []
|
||||
|
||||
#XXX - rename these to headElement, formElement
|
||||
@headPointer = nil
|
||||
@head_pointer = nil
|
||||
@formPointer = nil
|
||||
|
||||
self.insertFromTable = false
|
||||
self.insert_from_table = false
|
||||
|
||||
@document = @documentClass.new
|
||||
end
|
||||
|
||||
def elementInScope(target, tableVariant=false)
|
||||
# Exit early when possible.
|
||||
return true if @openElements[-1].name == target
|
||||
return true if @open_elements[-1].name == target
|
||||
|
||||
# AT How about while true and simply set node to [-1] and set it to
|
||||
# [-2] at the end...
|
||||
@openElements.reverse.each do |element|
|
||||
@open_elements.reverse.each do |element|
|
||||
if element.name == target
|
||||
return true
|
||||
elsif element.name == 'table'
|
||||
|
@ -149,10 +149,10 @@ module HTML5lib
|
|||
# Step 2 and step 3: we start with the last element. So i is -1.
|
||||
i = -1
|
||||
entry = @activeFormattingElements[i]
|
||||
return if entry == Marker or @openElements.include?(entry)
|
||||
return if entry == Marker or @open_elements.include?(entry)
|
||||
|
||||
# Step 6
|
||||
until entry == Marker or @openElements.include?(entry)
|
||||
until entry == Marker or @open_elements.include?(entry)
|
||||
# Step 5: let entry be one earlier in the list.
|
||||
i -= 1
|
||||
begin
|
||||
|
@ -171,7 +171,7 @@ module HTML5lib
|
|||
clone = @activeFormattingElements[i].cloneNode
|
||||
|
||||
# Step 9
|
||||
element = insertElement(clone.name, clone.attributes)
|
||||
element = insert_element(clone.name, clone.attributes)
|
||||
|
||||
# Step 10
|
||||
@activeFormattingElements[i] = element
|
||||
|
@ -198,12 +198,15 @@ module HTML5lib
|
|||
return false
|
||||
end
|
||||
|
||||
def insertDoctype(name)
|
||||
@document.appendChild(@doctypeClass.new(name))
|
||||
def insertDoctype(name, public_id, system_id)
|
||||
doctype = @doctypeClass.new(name)
|
||||
doctype.public_id = public_id
|
||||
doctype.system_id = system_id
|
||||
@document.appendChild(doctype)
|
||||
end
|
||||
|
||||
def insertComment(data, parent=nil)
|
||||
parent = @openElements[-1] if parent.nil?
|
||||
def insert_comment(data, parent=nil)
|
||||
parent = @open_elements[-1] if parent.nil?
|
||||
parent.appendChild(@commentClass.new(data))
|
||||
end
|
||||
|
||||
|
@ -216,28 +219,28 @@ module HTML5lib
|
|||
|
||||
# Switch the function used to insert an element from the
|
||||
# normal one to the misnested table one and back again
|
||||
def insertFromTable=(value)
|
||||
@insertFromTable = value
|
||||
@insertElement = value ? :insertElementTable : :insertElementNormal
|
||||
def insert_from_table=(value)
|
||||
@insert_from_table = value
|
||||
@insert_element = value ? :insert_elementTable : :insert_elementNormal
|
||||
end
|
||||
|
||||
def insertElement(name, attributes)
|
||||
send(@insertElement, name, attributes)
|
||||
def insert_element(name, attributes)
|
||||
send(@insert_element, name, attributes)
|
||||
end
|
||||
|
||||
def insertElementNormal(name, attributes)
|
||||
def insert_elementNormal(name, attributes)
|
||||
element = @elementClass.new(name)
|
||||
element.attributes = attributes
|
||||
@openElements[-1].appendChild(element)
|
||||
@openElements.push(element)
|
||||
@open_elements.last.appendChild(element)
|
||||
@open_elements.push(element)
|
||||
return element
|
||||
end
|
||||
|
||||
# Create an element and insert it into the tree
|
||||
def insertElementTable(name, attributes)
|
||||
def insert_elementTable(name, attributes)
|
||||
element = @elementClass.new(name)
|
||||
element.attributes = attributes
|
||||
if TABLE_INSERT_MODE_ELEMENTS.include?(@openElements[-1].name)
|
||||
if TABLE_INSERT_MODE_ELEMENTS.include?(@open_elements.last.name)
|
||||
#We should be in the InTable mode. This means we want to do
|
||||
#special magic element rearranging
|
||||
parent, insertBefore = getTableMisnestedNodePosition
|
||||
|
@ -246,17 +249,17 @@ module HTML5lib
|
|||
else
|
||||
parent.insertBefore(element, insertBefore)
|
||||
end
|
||||
@openElements.push(element)
|
||||
@open_elements.push(element)
|
||||
else
|
||||
return insertElementNormal(name, attributes)
|
||||
return insert_elementNormal(name, attributes)
|
||||
end
|
||||
return element
|
||||
end
|
||||
|
||||
def insertText(data, parent=nil)
|
||||
parent = @openElements[-1] if parent.nil?
|
||||
parent = @open_elements[-1] if parent.nil?
|
||||
|
||||
if (not(@insertFromTable) or (@insertFromTable and not TABLE_INSERT_MODE_ELEMENTS.include?(@openElements[-1].name)))
|
||||
if (not(@insert_from_table) or (@insert_from_table and not TABLE_INSERT_MODE_ELEMENTS.include?(@open_elements[-1].name)))
|
||||
parent.insertText(data)
|
||||
else
|
||||
#We should be in the InTable mode. This means we want to do
|
||||
|
@ -275,7 +278,7 @@ module HTML5lib
|
|||
lastTable = nil
|
||||
fosterParent = nil
|
||||
insertBefore = nil
|
||||
@openElements.reverse.each do |element|
|
||||
@open_elements.reverse.each do |element|
|
||||
if element.name == "table"
|
||||
lastTable = element
|
||||
break
|
||||
|
@ -288,33 +291,34 @@ module HTML5lib
|
|||
fosterParent = lastTable.parent
|
||||
insertBefore = lastTable
|
||||
else
|
||||
fosterParent = @openElements[@openElements.index(lastTable) - 1]
|
||||
fosterParent = @open_elements[@open_elements.index(lastTable) - 1]
|
||||
end
|
||||
else
|
||||
fosterParent = @openElements[0]
|
||||
fosterParent = @open_elements[0]
|
||||
end
|
||||
return fosterParent, insertBefore
|
||||
end
|
||||
|
||||
def generateImpliedEndTags(exclude=nil)
|
||||
name = @openElements[-1].name
|
||||
name = @open_elements[-1].name
|
||||
|
||||
if (['dd', 'dt', 'li', 'p', 'td', 'th', 'tr'].include?(name) and name != exclude)
|
||||
@openElements.pop
|
||||
# XXX td, th and tr are not actually needed
|
||||
if (%w[dd dt li p td th tr].include?(name) and name != exclude)
|
||||
@open_elements.pop
|
||||
# XXX This is not entirely what the specification says. We should
|
||||
# investigate it more closely.
|
||||
generateImpliedEndTags(exclude)
|
||||
end
|
||||
end
|
||||
|
||||
def getDocument
|
||||
def get_document
|
||||
@document
|
||||
end
|
||||
|
||||
def getFragment
|
||||
#assert @innerHTML
|
||||
def get_fragment
|
||||
#assert @inner_html
|
||||
fragment = @fragmentClass.new
|
||||
@openElements[0].reparentChildren(fragment)
|
||||
@open_elements[0].reparentChildren(fragment)
|
||||
return fragment
|
||||
end
|
||||
|
|
@ -1,14 +1,13 @@
|
|||
require 'html5lib/treebuilders/base'
|
||||
require 'html5/treebuilders/base'
|
||||
require 'rubygems'
|
||||
require 'hpricot'
|
||||
require 'forwardable'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
module TreeBuilders
|
||||
module Hpricot
|
||||
|
||||
class Node < Base::Node
|
||||
|
||||
extend Forwardable
|
||||
|
||||
def_delegators :@hpricot, :name
|
||||
|
@ -22,7 +21,7 @@ module HTML5lib
|
|||
|
||||
def appendChild(node)
|
||||
if node.kind_of?(TextNode) and childNodes.any? and childNodes.last.kind_of?(TextNode)
|
||||
childNodes[-1].hpricot.content = childNodes[-1].hpricot.to_s + node.hpricot.to_s
|
||||
childNodes.last.hpricot.content = childNodes.last.hpricot.content + node.hpricot.content
|
||||
else
|
||||
childNodes << node
|
||||
hpricot.children << node.hpricot
|
||||
|
@ -145,21 +144,27 @@ module HTML5lib
|
|||
end
|
||||
|
||||
class DocumentType < Node
|
||||
def_delegators :@hpricot, :public_id, :system_id
|
||||
|
||||
def self.hpricot_class
|
||||
::Hpricot::DocType
|
||||
end
|
||||
|
||||
def initialize(name)
|
||||
def initialize(name, public_id, system_id)
|
||||
begin
|
||||
super(name)
|
||||
rescue ArgumentError # needs 3...
|
||||
end
|
||||
|
||||
@hpricot = ::Hpricot::DocType.new(name, nil, nil)
|
||||
@hpricot = ::Hpricot::DocType.new(name, public_id, system_id)
|
||||
end
|
||||
|
||||
def printTree(indent=0)
|
||||
if hpricot.target and hpricot.target.any?
|
||||
"\n|#{' ' * indent}<!DOCTYPE #{hpricot.target}>"
|
||||
else
|
||||
"\n|#{' ' * indent}<!DOCTYPE >"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -202,15 +207,20 @@ module HTML5lib
|
|||
@fragmentClass = DocumentFragment
|
||||
end
|
||||
|
||||
def insertDoctype(name, public_id, system_id)
|
||||
doctype = @doctypeClass.new(name, public_id, system_id)
|
||||
@document.appendChild(doctype)
|
||||
end
|
||||
|
||||
def testSerializer(node)
|
||||
node.printTree
|
||||
end
|
||||
|
||||
def getDocument
|
||||
def get_document
|
||||
@document.hpricot
|
||||
end
|
||||
|
||||
def getFragment
|
||||
def get_fragment
|
||||
@document = super
|
||||
return @document.hpricot.children
|
||||
end
|
|
@ -1,8 +1,8 @@
|
|||
require 'html5lib/treebuilders/base'
|
||||
require 'html5/treebuilders/base'
|
||||
require 'rexml/document'
|
||||
require 'forwardable'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
module TreeBuilders
|
||||
module REXML
|
||||
|
||||
|
@ -17,11 +17,9 @@ module HTML5lib
|
|||
end
|
||||
|
||||
def appendChild node
|
||||
if node.kind_of? TextNode and
|
||||
childNodes.length>0 and childNodes[-1].kind_of? TextNode
|
||||
childNodes[-1].rxobj.value =
|
||||
childNodes[-1].rxobj.to_s + node.rxobj.to_s
|
||||
childNodes[-1].rxobj.raw = true
|
||||
if node.kind_of?(TextNode) && childNodes.length > 0 && childNodes.last.kind_of?(TextNode)
|
||||
childNodes.last.rxobj.value = childNodes.last.rxobj.to_s + node.rxobj.to_s
|
||||
childNodes.last.rxobj.raw = true
|
||||
else
|
||||
childNodes.push node
|
||||
rxobj.add node.rxobj
|
||||
|
@ -45,10 +43,8 @@ module HTML5lib
|
|||
|
||||
def insertBefore node, refNode
|
||||
index = childNodes.index(refNode)
|
||||
if node.kind_of? TextNode and index>0 and
|
||||
childNodes[index-1].kind_of? TextNode
|
||||
childNodes[index-1].rxobj.value =
|
||||
childNodes[index-1].rxobj.to_s + node.rxobj.to_s
|
||||
if node.kind_of?(TextNode) and index > 0 && childNodes[index-1].kind_of?(TextNode)
|
||||
childNodes[index-1].rxobj.value = childNodes[index-1].rxobj.to_s + node.rxobj.to_s
|
||||
childNodes[index-1].rxobj.raw = true
|
||||
else
|
||||
childNodes.insert index, node
|
||||
|
@ -57,7 +53,7 @@ module HTML5lib
|
|||
end
|
||||
|
||||
def hasContent
|
||||
return (childNodes.length > 0)
|
||||
(childNodes.length > 0)
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -90,7 +86,7 @@ module HTML5lib
|
|||
for child in childNodes
|
||||
tree += child.printTree(indent)
|
||||
end
|
||||
return tree
|
||||
tree
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -120,10 +116,25 @@ module HTML5lib
|
|||
end
|
||||
|
||||
class DocumentType < Node
|
||||
def_delegator :@rxobj, :public, :public_id
|
||||
|
||||
def_delegator :@rxobj, :system, :system_id
|
||||
|
||||
def self.rxclass
|
||||
::REXML::DocType
|
||||
end
|
||||
|
||||
def initialize name, public_id, system_id
|
||||
super(name)
|
||||
if public_id
|
||||
@rxobj = ::REXML::DocType.new [name, ::REXML::DocType::PUBLIC, public_id, system_id]
|
||||
elsif system_id
|
||||
@rxobj = ::REXML::DocType.new [name, ::REXML::DocType::SYSTEM, nil, system_id]
|
||||
else
|
||||
@rxobj = ::REXML::DocType.new name
|
||||
end
|
||||
end
|
||||
|
||||
def printTree indent=0
|
||||
"\n|#{' ' * indent}<!DOCTYPE #{name}>"
|
||||
end
|
||||
|
@ -173,15 +184,20 @@ module HTML5lib
|
|||
@fragmentClass = DocumentFragment
|
||||
end
|
||||
|
||||
def testSerializer node
|
||||
node.printTree()
|
||||
def insertDoctype(name, public_id, system_id)
|
||||
doctype = @doctypeClass.new(name, public_id, system_id)
|
||||
@document.appendChild(doctype)
|
||||
end
|
||||
|
||||
def getDocument
|
||||
def testSerializer node
|
||||
node.printTree
|
||||
end
|
||||
|
||||
def get_document
|
||||
@document.rxobj
|
||||
end
|
||||
|
||||
def getFragment
|
||||
def get_fragment
|
||||
@document = super
|
||||
return @document.rxobj.children
|
||||
end
|
|
@ -1,6 +1,6 @@
|
|||
require 'html5lib/treebuilders/base'
|
||||
require 'html5/treebuilders/base'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
module TreeBuilders
|
||||
module SimpleTree
|
||||
|
||||
|
@ -25,10 +25,10 @@ module HTML5lib
|
|||
|
||||
def appendChild node
|
||||
if node.kind_of? TextNode and
|
||||
childNodes.length>0 and childNodes[-1].kind_of? TextNode
|
||||
childNodes[-1].value += node.value
|
||||
childNodes.length > 0 and childNodes.last.kind_of? TextNode
|
||||
childNodes.last.value += node.value
|
||||
else
|
||||
childNodes.push node
|
||||
childNodes << node
|
||||
end
|
||||
node.parent = self
|
||||
end
|
||||
|
@ -55,8 +55,7 @@ module HTML5lib
|
|||
|
||||
def insertBefore node, refNode
|
||||
index = childNodes.index(refNode)
|
||||
if node.kind_of? TextNode and index>0 and
|
||||
childNodes[index-1].kind_of? TextNode
|
||||
if node.kind_of?(TextNode) && index > 0 && childNodes[index-1].kind_of?(TextNode)
|
||||
childNodes[index-1].value += node.value
|
||||
else
|
||||
childNodes.insert index, node
|
||||
|
@ -72,7 +71,7 @@ module HTML5lib
|
|||
end
|
||||
|
||||
def hasContent
|
||||
return (childNodes.length > 0)
|
||||
childNodes.length > 0
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -90,7 +89,7 @@ module HTML5lib
|
|||
for child in childNodes
|
||||
tree += child.printTree(indent)
|
||||
end
|
||||
return tree
|
||||
tree
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -108,13 +107,21 @@ module HTML5lib
|
|||
for child in childNodes
|
||||
tree += child.printTree(indent + 2)
|
||||
end
|
||||
return tree
|
||||
tree
|
||||
end
|
||||
end
|
||||
|
||||
class DocumentType < Node
|
||||
attr_accessor :public_id, :system_id
|
||||
|
||||
def to_s
|
||||
"<!DOCTYPE %s>" % name
|
||||
"<!DOCTYPE #{name}>"
|
||||
end
|
||||
|
||||
def initialize name
|
||||
super name
|
||||
@public_id = nil
|
||||
@system_id = nil
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -164,12 +171,12 @@ module HTML5lib
|
|||
end
|
||||
|
||||
def testSerializer node
|
||||
node.printTree()
|
||||
node.printTree
|
||||
end
|
||||
|
||||
def getFragment
|
||||
def get_fragment
|
||||
@document = super
|
||||
return @document.childNodes
|
||||
@document.childNodes
|
||||
end
|
||||
end
|
||||
|
26
vendor/plugins/HTML5lib/lib/html5/treewalkers.rb
vendored
Normal file
26
vendor/plugins/HTML5lib/lib/html5/treewalkers.rb
vendored
Normal file
|
@ -0,0 +1,26 @@
|
|||
require 'html5/treewalkers/base'
|
||||
|
||||
module HTML5
|
||||
module TreeWalkers
|
||||
|
||||
class << self
|
||||
def [](name)
|
||||
case name.to_s.downcase
|
||||
when 'simpletree'
|
||||
require 'html5/treewalkers/simpletree'
|
||||
SimpleTree::TreeWalker
|
||||
when 'rexml'
|
||||
require 'html5/treewalkers/rexml'
|
||||
REXML::TreeWalker
|
||||
when 'hpricot'
|
||||
require 'html5/treewalkers/hpricot'
|
||||
Hpricot::TreeWalker
|
||||
else
|
||||
raise "Unknown TreeWalker #{name}"
|
||||
end
|
||||
end
|
||||
|
||||
alias :get_tree_walker :[]
|
||||
end
|
||||
end
|
||||
end
|
154
vendor/plugins/HTML5lib/lib/html5/treewalkers/base.rb
vendored
Normal file
154
vendor/plugins/HTML5lib/lib/html5/treewalkers/base.rb
vendored
Normal file
|
@ -0,0 +1,154 @@
|
|||
require 'html5/constants'
|
||||
module HTML5
|
||||
module TreeWalkers
|
||||
|
||||
module TokenConstructor
|
||||
def error(msg)
|
||||
{:type => "SerializeError", :data => msg}
|
||||
end
|
||||
|
||||
def normalize_attrs(attrs)
|
||||
attrs.to_a
|
||||
end
|
||||
|
||||
def empty_tag(name, attrs, has_children=false)
|
||||
error(_("Void element has children")) if has_children
|
||||
{:type => :EmptyTag, :name => name, :data => normalize_attrs(attrs)}
|
||||
end
|
||||
|
||||
def start_tag(name, attrs)
|
||||
{:type => :StartTag, :name => name, :data => normalize_attrs(attrs)}
|
||||
end
|
||||
|
||||
def end_tag(name)
|
||||
{:type => :EndTag, :name => name, :data => []}
|
||||
end
|
||||
|
||||
def text(data)
|
||||
if data =~ /\A([#{SPACE_CHARACTERS.join('')}]+)/m
|
||||
yield({:type => :SpaceCharacters, :data => $1})
|
||||
data = data[$1.length .. -1]
|
||||
return if data.empty?
|
||||
end
|
||||
|
||||
if data =~ /([#{SPACE_CHARACTERS.join('')}]+)\Z/m
|
||||
yield({:type => :Characters, :data => data[0 ... -$1.length]})
|
||||
yield({:type => :SpaceCharacters, :data => $1})
|
||||
else
|
||||
yield({:type => :Characters, :data => data})
|
||||
end
|
||||
end
|
||||
|
||||
def comment(data)
|
||||
{:type => :Comment, :data => data}
|
||||
end
|
||||
|
||||
def doctype(name, public_id, system_id, correct=nil)
|
||||
{:type => :Doctype, :name => name, :public_id => public_id, :system_id => system_id, :correct => correct}
|
||||
end
|
||||
|
||||
def unknown(nodeType)
|
||||
error(_("Unknown node type: ") + nodeType.to_s)
|
||||
end
|
||||
|
||||
def _(str)
|
||||
str
|
||||
end
|
||||
end
|
||||
|
||||
class Base
|
||||
include TokenConstructor
|
||||
|
||||
def initialize(tree)
|
||||
@tree = tree
|
||||
end
|
||||
|
||||
def each
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
alias walk each
|
||||
end
|
||||
|
||||
class NonRecursiveTreeWalker < TreeWalkers::Base
|
||||
def node_details(node)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def first_child(node)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def next_sibling(node)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def parent(node)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def each
|
||||
current_node = @tree
|
||||
while current_node != nil
|
||||
details = node_details(current_node)
|
||||
has_children = false
|
||||
|
||||
case details.shift
|
||||
when :DOCTYPE
|
||||
yield doctype(*details)
|
||||
|
||||
when :TEXT
|
||||
text(*details) {|token| yield token}
|
||||
|
||||
when :ELEMENT
|
||||
name, attributes, has_children = details
|
||||
if VOID_ELEMENTS.include?(name)
|
||||
yield empty_tag(name, attributes.to_a, has_children)
|
||||
has_children = false
|
||||
else
|
||||
yield start_tag(name, attributes.to_a)
|
||||
end
|
||||
|
||||
when :COMMENT
|
||||
yield comment(details[0])
|
||||
|
||||
when :DOCUMENT, :DOCUMENT_FRAGMENT
|
||||
has_children = true
|
||||
|
||||
when nil
|
||||
# ignore (REXML::XMLDecl is an example)
|
||||
|
||||
else
|
||||
yield unknown(details[0])
|
||||
end
|
||||
|
||||
first_child = has_children ? first_child(current_node) : nil
|
||||
if first_child != nil
|
||||
current_node = first_child
|
||||
else
|
||||
while current_node != nil
|
||||
details = node_details(current_node)
|
||||
if details.shift == :ELEMENT
|
||||
name, attributes, has_children = details
|
||||
yield end_tag(name) if !VOID_ELEMENTS.include?(name)
|
||||
end
|
||||
|
||||
if @tree == current_node
|
||||
current_node = nil
|
||||
else
|
||||
next_sibling = next_sibling(current_node)
|
||||
if next_sibling != nil
|
||||
current_node = next_sibling
|
||||
break
|
||||
end
|
||||
|
||||
current_node = parent(current_node)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -1,10 +1,10 @@
|
|||
require 'html5lib/treewalkers/base'
|
||||
require 'html5/treewalkers/base'
|
||||
require 'rexml/document'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
module TreeWalkers
|
||||
module Hpricot
|
||||
class TreeWalker < HTML5lib::TreeWalkers::NonRecursiveTreeWalker
|
||||
class TreeWalker < HTML5::TreeWalkers::NonRecursiveTreeWalker
|
||||
|
||||
def node_details(node)
|
||||
case node
|
||||
|
@ -17,13 +17,13 @@ module HTML5lib
|
|||
!node.empty?]
|
||||
end
|
||||
when ::Hpricot::Text
|
||||
[:TEXT, node.to_plain_text]
|
||||
[:TEXT, node.content]
|
||||
when ::Hpricot::Comment
|
||||
[:COMMENT, node.content]
|
||||
when ::Hpricot::Doc
|
||||
[:DOCUMENT]
|
||||
when ::Hpricot::DocType
|
||||
[:DOCTYPE, node.target]
|
||||
[:DOCTYPE, node.target, node.public_id, node.system_id]
|
||||
when ::Hpricot::XMLDecl
|
||||
[nil]
|
||||
else
|
|
@ -1,10 +1,10 @@
|
|||
require 'html5lib/treewalkers/base'
|
||||
require 'html5/treewalkers/base'
|
||||
require 'rexml/document'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
module TreeWalkers
|
||||
module REXML
|
||||
class TreeWalker < HTML5lib::TreeWalkers::NonRecursiveTreeWalker
|
||||
class TreeWalker < HTML5::TreeWalkers::NonRecursiveTreeWalker
|
||||
|
||||
def node_details(node)
|
||||
case node
|
||||
|
@ -23,7 +23,7 @@ module HTML5lib
|
|||
when ::REXML::Comment
|
||||
[:COMMENT, node.string]
|
||||
when ::REXML::DocType
|
||||
[:DOCTYPE, node.name]
|
||||
[:DOCTYPE, node.name, node.public, node.system]
|
||||
when ::REXML::XMLDecl
|
||||
[nil]
|
||||
else
|
|
@ -1,10 +1,10 @@
|
|||
require 'html5lib/treewalkers/base'
|
||||
require 'html5/treewalkers/base'
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
module TreeWalkers
|
||||
module SimpleTree
|
||||
class TreeWalker < HTML5lib::TreeWalkers::Base
|
||||
include HTML5lib::TreeBuilders::SimpleTree
|
||||
class TreeWalker < HTML5::TreeWalkers::Base
|
||||
include HTML5::TreeBuilders::SimpleTree
|
||||
|
||||
def walk(node)
|
||||
case node
|
||||
|
@ -12,20 +12,20 @@ module HTML5lib
|
|||
return
|
||||
|
||||
when DocumentType
|
||||
yield doctype(node.name)
|
||||
yield doctype(node.name, node.public_id, node.system_id)
|
||||
|
||||
when TextNode
|
||||
text(node.value) {|token| yield token}
|
||||
|
||||
when Element
|
||||
if VOID_ELEMENTS.include?(node.name)
|
||||
yield emptyTag(node.name, node.attributes, node.hasContent())
|
||||
yield empty_tag(node.name, node.attributes, node.hasContent())
|
||||
else
|
||||
yield startTag(node.name, node.attributes)
|
||||
yield start_tag(node.name, node.attributes)
|
||||
for child in node.childNodes
|
||||
walk(child) {|token| yield token}
|
||||
end
|
||||
yield endTag(node.name)
|
||||
yield end_tag(node.name)
|
||||
end
|
||||
|
||||
when CommentNode
|
3
vendor/plugins/HTML5lib/lib/html5/version.rb
vendored
Normal file
3
vendor/plugins/HTML5lib/lib/html5/version.rb
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
module HTML5
|
||||
VERSION = '0.1.0'
|
||||
end
|
11
vendor/plugins/HTML5lib/lib/html5lib.rb
vendored
11
vendor/plugins/HTML5lib/lib/html5lib.rb
vendored
|
@ -1,11 +0,0 @@
|
|||
require 'html5lib/html5parser'
|
||||
|
||||
module HTML5lib
|
||||
def self.parse(stream, options={})
|
||||
HTMLParser.parse(stream, options)
|
||||
end
|
||||
|
||||
def self.parseFragment(stream, options={})
|
||||
HTMLParser.parse(stream, options)
|
||||
end
|
||||
end
|
708
vendor/plugins/HTML5lib/lib/html5lib/constants.rb
vendored
708
vendor/plugins/HTML5lib/lib/html5lib/constants.rb
vendored
|
@ -1,708 +0,0 @@
|
|||
module HTML5lib
|
||||
|
||||
class EOF < Exception; end
|
||||
|
||||
CONTENT_MODEL_FLAGS = [
|
||||
:PCDATA,
|
||||
:RCDATA,
|
||||
:CDATA,
|
||||
:PLAINTEXT
|
||||
]
|
||||
|
||||
SCOPING_ELEMENTS = %w[
|
||||
button
|
||||
caption
|
||||
html
|
||||
marquee
|
||||
object
|
||||
table
|
||||
td
|
||||
th
|
||||
]
|
||||
|
||||
FORMATTING_ELEMENTS = %w[
|
||||
a
|
||||
b
|
||||
big
|
||||
em
|
||||
font
|
||||
i
|
||||
nobr
|
||||
s
|
||||
small
|
||||
strike
|
||||
strong
|
||||
tt
|
||||
u
|
||||
]
|
||||
|
||||
SPECIAL_ELEMENTS = %w[
|
||||
address
|
||||
area
|
||||
base
|
||||
basefont
|
||||
bgsound
|
||||
blockquote
|
||||
body
|
||||
br
|
||||
center
|
||||
col
|
||||
colgroup
|
||||
dd
|
||||
dir
|
||||
div
|
||||
dl
|
||||
dt
|
||||
embed
|
||||
fieldset
|
||||
form
|
||||
frame
|
||||
frameset
|
||||
h1
|
||||
h2
|
||||
h3
|
||||
h4
|
||||
h5
|
||||
h6
|
||||
head
|
||||
hr
|
||||
iframe
|
||||
image
|
||||
img
|
||||
input
|
||||
isindex
|
||||
li
|
||||
link
|
||||
listing
|
||||
menu
|
||||
meta
|
||||
noembed
|
||||
noframes
|
||||
noscript
|
||||
ol
|
||||
optgroup
|
||||
option
|
||||
p
|
||||
param
|
||||
plaintext
|
||||
pre
|
||||
script
|
||||
select
|
||||
spacer
|
||||
style
|
||||
tbody
|
||||
textarea
|
||||
tfoot
|
||||
thead
|
||||
title
|
||||
tr
|
||||
ul
|
||||
wbr
|
||||
]
|
||||
|
||||
SPACE_CHARACTERS = %W[
|
||||
\t
|
||||
\n
|
||||
\x0B
|
||||
\x0C
|
||||
\x20
|
||||
\r
|
||||
]
|
||||
|
||||
TABLE_INSERT_MODE_ELEMENTS = %w[
|
||||
table
|
||||
tbody
|
||||
tfoot
|
||||
thead
|
||||
tr
|
||||
]
|
||||
|
||||
ASCII_LOWERCASE = ('a'..'z').to_a.join('')
|
||||
ASCII_UPPERCASE = ('A'..'Z').to_a.join('')
|
||||
ASCII_LETTERS = ASCII_LOWERCASE + ASCII_UPPERCASE
|
||||
DIGITS = '0'..'9'
|
||||
HEX_DIGITS = DIGITS.to_a + ('a'..'f').to_a + ('A'..'F').to_a
|
||||
|
||||
# Heading elements need to be ordered
|
||||
HEADING_ELEMENTS = %w[
|
||||
h1
|
||||
h2
|
||||
h3
|
||||
h4
|
||||
h5
|
||||
h6
|
||||
]
|
||||
|
||||
# XXX What about event-source and command?
|
||||
VOID_ELEMENTS = %w[
|
||||
base
|
||||
link
|
||||
meta
|
||||
hr
|
||||
br
|
||||
img
|
||||
embed
|
||||
param
|
||||
area
|
||||
col
|
||||
input
|
||||
]
|
||||
|
||||
CDATA_ELEMENTS = %w[title textarea]
|
||||
|
||||
RCDATA_ELEMENTS = %w[
|
||||
style
|
||||
script
|
||||
xmp
|
||||
iframe
|
||||
noembed
|
||||
noframes
|
||||
noscript
|
||||
]
|
||||
|
||||
BOOLEAN_ATTRIBUTES = {
|
||||
:global => %w[irrelevant],
|
||||
'style' => %w[scoped],
|
||||
'img' => %w[ismap],
|
||||
'audio' => %w[autoplay controls],
|
||||
'video' => %w[autoplay controls],
|
||||
'script' => %w[defer async],
|
||||
'details' => %w[open],
|
||||
'datagrid' => %w[multiple disabled],
|
||||
'command' => %w[hidden disabled checked default],
|
||||
'menu' => %w[autosubmit],
|
||||
'fieldset' => %w[disabled readonly],
|
||||
'option' => %w[disabled readonly selected],
|
||||
'optgroup' => %w[disabled readonly],
|
||||
'button' => %w[disabled autofocus],
|
||||
'input' => %w[disabled readonly required autofocus checked ismap],
|
||||
'select' => %w[disabled readonly autofocus multiple],
|
||||
'output' => %w[disabled readonly]
|
||||
}
|
||||
|
||||
# entitiesWindows1252 has to be _ordered_ and needs to have an index.
|
||||
ENTITIES_WINDOWS1252 = [
|
||||
8364, # 0x80 0x20AC EURO SIGN
|
||||
65533, # 0x81 UNDEFINED
|
||||
8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK
|
||||
402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK
|
||||
8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK
|
||||
8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS
|
||||
8224, # 0x86 0x2020 DAGGER
|
||||
8225, # 0x87 0x2021 DOUBLE DAGGER
|
||||
710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
8240, # 0x89 0x2030 PER MILLE SIGN
|
||||
352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON
|
||||
8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE
|
||||
65533, # 0x8D UNDEFINED
|
||||
381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON
|
||||
65533, # 0x8F UNDEFINED
|
||||
65533, # 0x90 UNDEFINED
|
||||
8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK
|
||||
8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK
|
||||
8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK
|
||||
8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK
|
||||
8226, # 0x95 0x2022 BULLET
|
||||
8211, # 0x96 0x2013 EN DASH
|
||||
8212, # 0x97 0x2014 EM DASH
|
||||
732, # 0x98 0x02DC SMALL TILDE
|
||||
8482, # 0x99 0x2122 TRADE MARK SIGN
|
||||
353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON
|
||||
8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE
|
||||
65533, # 0x9D UNDEFINED
|
||||
382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON
|
||||
376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
]
|
||||
|
||||
private
|
||||
|
||||
def self.U n
|
||||
[n].pack('U')
|
||||
end
|
||||
|
||||
public
|
||||
|
||||
ENTITIES = {
|
||||
"AElig" => U(0xC6),
|
||||
"Aacute" => U(0xC1),
|
||||
"Acirc" => U(0xC2),
|
||||
"Agrave" => U(0xC0),
|
||||
"Alpha" => U(0x0391),
|
||||
"Aring" => U(0xC5),
|
||||
"Atilde" => U(0xC3),
|
||||
"Auml" => U(0xC4),
|
||||
"Beta" => U(0x0392),
|
||||
"Ccedil" => U(0xC7),
|
||||
"Chi" => U(0x03A7),
|
||||
"Dagger" => U(0x2021),
|
||||
"Delta" => U(0x0394),
|
||||
"ETH" => U(0xD0),
|
||||
"Eacute" => U(0xC9),
|
||||
"Ecirc" => U(0xCA),
|
||||
"Egrave" => U(0xC8),
|
||||
"Epsilon" => U(0x0395),
|
||||
"Eta" => U(0x0397),
|
||||
"Euml" => U(0xCB),
|
||||
"Gamma" => U(0x0393),
|
||||
"Iacute" => U(0xCD),
|
||||
"Icirc" => U(0xCE),
|
||||
"Igrave" => U(0xCC),
|
||||
"Iota" => U(0x0399),
|
||||
"Iuml" => U(0xCF),
|
||||
"Kappa" => U(0x039A),
|
||||
"Lambda" => U(0x039B),
|
||||
"Mu" => U(0x039C),
|
||||
"Ntilde" => U(0xD1),
|
||||
"Nu" => U(0x039D),
|
||||
"OElig" => U(0x0152),
|
||||
"Oacute" => U(0xD3),
|
||||
"Ocirc" => U(0xD4),
|
||||
"Ograve" => U(0xD2),
|
||||
"Omega" => U(0x03A9),
|
||||
"Omicron" => U(0x039F),
|
||||
"Oslash" => U(0xD8),
|
||||
"Otilde" => U(0xD5),
|
||||
"Ouml" => U(0xD6),
|
||||
"Phi" => U(0x03A6),
|
||||
"Pi" => U(0x03A0),
|
||||
"Prime" => U(0x2033),
|
||||
"Psi" => U(0x03A8),
|
||||
"Rho" => U(0x03A1),
|
||||
"Scaron" => U(0x0160),
|
||||
"Sigma" => U(0x03A3),
|
||||
"THORN" => U(0xDE),
|
||||
"Tau" => U(0x03A4),
|
||||
"Theta" => U(0x0398),
|
||||
"Uacute" => U(0xDA),
|
||||
"Ucirc" => U(0xDB),
|
||||
"Ugrave" => U(0xD9),
|
||||
"Upsilon" => U(0x03A5),
|
||||
"Uuml" => U(0xDC),
|
||||
"Xi" => U(0x039E),
|
||||
"Yacute" => U(0xDD),
|
||||
"Yuml" => U(0x0178),
|
||||
"Zeta" => U(0x0396),
|
||||
"aacute" => U(0xE1),
|
||||
"acirc" => U(0xE2),
|
||||
"acute" => U(0xB4),
|
||||
"aelig" => U(0xE6),
|
||||
"agrave" => U(0xE0),
|
||||
"alefsym" => U(0x2135),
|
||||
"alpha" => U(0x03B1),
|
||||
"amp" => U(0x26),
|
||||
"AMP" => U(0x26),
|
||||
"and" => U(0x2227),
|
||||
"ang" => U(0x2220),
|
||||
"apos" => U(0x27),
|
||||
"aring" => U(0xE5),
|
||||
"asymp" => U(0x2248),
|
||||
"atilde" => U(0xE3),
|
||||
"auml" => U(0xE4),
|
||||
"bdquo" => U(0x201E),
|
||||
"beta" => U(0x03B2),
|
||||
"brvbar" => U(0xA6),
|
||||
"bull" => U(0x2022),
|
||||
"cap" => U(0x2229),
|
||||
"ccedil" => U(0xE7),
|
||||
"cedil" => U(0xB8),
|
||||
"cent" => U(0xA2),
|
||||
"chi" => U(0x03C7),
|
||||
"circ" => U(0x02C6),
|
||||
"clubs" => U(0x2663),
|
||||
"cong" => U(0x2245),
|
||||
"copy" => U(0xA9),
|
||||
"COPY" => U(0xA9),
|
||||
"crarr" => U(0x21B5),
|
||||
"cup" => U(0x222A),
|
||||
"curren" => U(0xA4),
|
||||
"dArr" => U(0x21D3),
|
||||
"dagger" => U(0x2020),
|
||||
"darr" => U(0x2193),
|
||||
"deg" => U(0xB0),
|
||||
"delta" => U(0x03B4),
|
||||
"diams" => U(0x2666),
|
||||
"divide" => U(0xF7),
|
||||
"eacute" => U(0xE9),
|
||||
"ecirc" => U(0xEA),
|
||||
"egrave" => U(0xE8),
|
||||
"empty" => U(0x2205),
|
||||
"emsp" => U(0x2003),
|
||||
"ensp" => U(0x2002),
|
||||
"epsilon" => U(0x03B5),
|
||||
"equiv" => U(0x2261),
|
||||
"eta" => U(0x03B7),
|
||||
"eth" => U(0xF0),
|
||||
"euml" => U(0xEB),
|
||||
"euro" => U(0x20AC),
|
||||
"exist" => U(0x2203),
|
||||
"fnof" => U(0x0192),
|
||||
"forall" => U(0x2200),
|
||||
"frac12" => U(0xBD),
|
||||
"frac14" => U(0xBC),
|
||||
"frac34" => U(0xBE),
|
||||
"frasl" => U(0x2044),
|
||||
"gamma" => U(0x03B3),
|
||||
"ge" => U(0x2265),
|
||||
"gt" => U(0x3E),
|
||||
"GT" => U(0x3E),
|
||||
"hArr" => U(0x21D4),
|
||||
"harr" => U(0x2194),
|
||||
"hearts" => U(0x2665),
|
||||
"hellip" => U(0x2026),
|
||||
"iacute" => U(0xED),
|
||||
"icirc" => U(0xEE),
|
||||
"iexcl" => U(0xA1),
|
||||
"igrave" => U(0xEC),
|
||||
"image" => U(0x2111),
|
||||
"infin" => U(0x221E),
|
||||
"int" => U(0x222B),
|
||||
"iota" => U(0x03B9),
|
||||
"iquest" => U(0xBF),
|
||||
"isin" => U(0x2208),
|
||||
"iuml" => U(0xEF),
|
||||
"kappa" => U(0x03BA),
|
||||
"lArr" => U(0x21D0),
|
||||
"lambda" => U(0x03BB),
|
||||
"lang" => U(0x2329),
|
||||
"laquo" => U(0xAB),
|
||||
"larr" => U(0x2190),
|
||||
"lceil" => U(0x2308),
|
||||
"ldquo" => U(0x201C),
|
||||
"le" => U(0x2264),
|
||||
"lfloor" => U(0x230A),
|
||||
"lowast" => U(0x2217),
|
||||
"loz" => U(0x25CA),
|
||||
"lrm" => U(0x200E),
|
||||
"lsaquo" => U(0x2039),
|
||||
"lsquo" => U(0x2018),
|
||||
"lt" => U(0x3C),
|
||||
"LT" => U(0x3C),
|
||||
"macr" => U(0xAF),
|
||||
"mdash" => U(0x2014),
|
||||
"micro" => U(0xB5),
|
||||
"middot" => U(0xB7),
|
||||
"minus" => U(0x2212),
|
||||
"mu" => U(0x03BC),
|
||||
"nabla" => U(0x2207),
|
||||
"nbsp" => U(0xA0),
|
||||
"ndash" => U(0x2013),
|
||||
"ne" => U(0x2260),
|
||||
"ni" => U(0x220B),
|
||||
"not" => U(0xAC),
|
||||
"notin" => U(0x2209),
|
||||
"nsub" => U(0x2284),
|
||||
"ntilde" => U(0xF1),
|
||||
"nu" => U(0x03BD),
|
||||
"oacute" => U(0xF3),
|
||||
"ocirc" => U(0xF4),
|
||||
"oelig" => U(0x0153),
|
||||
"ograve" => U(0xF2),
|
||||
"oline" => U(0x203E),
|
||||
"omega" => U(0x03C9),
|
||||
"omicron" => U(0x03BF),
|
||||
"oplus" => U(0x2295),
|
||||
"or" => U(0x2228),
|
||||
"ordf" => U(0xAA),
|
||||
"ordm" => U(0xBA),
|
||||
"oslash" => U(0xF8),
|
||||
"otilde" => U(0xF5),
|
||||
"otimes" => U(0x2297),
|
||||
"ouml" => U(0xF6),
|
||||
"para" => U(0xB6),
|
||||
"part" => U(0x2202),
|
||||
"permil" => U(0x2030),
|
||||
"perp" => U(0x22A5),
|
||||
"phi" => U(0x03C6),
|
||||
"pi" => U(0x03C0),
|
||||
"piv" => U(0x03D6),
|
||||
"plusmn" => U(0xB1),
|
||||
"pound" => U(0xA3),
|
||||
"prime" => U(0x2032),
|
||||
"prod" => U(0x220F),
|
||||
"prop" => U(0x221D),
|
||||
"psi" => U(0x03C8),
|
||||
"quot" => U(0x22),
|
||||
"QUOT" => U(0x22),
|
||||
"rArr" => U(0x21D2),
|
||||
"radic" => U(0x221A),
|
||||
"rang" => U(0x232A),
|
||||
"raquo" => U(0xBB),
|
||||
"rarr" => U(0x2192),
|
||||
"rceil" => U(0x2309),
|
||||
"rdquo" => U(0x201D),
|
||||
"real" => U(0x211C),
|
||||
"reg" => U(0xAE),
|
||||
"REG" => U(0xAE),
|
||||
"rfloor" => U(0x230B),
|
||||
"rho" => U(0x03C1),
|
||||
"rlm" => U(0x200F),
|
||||
"rsaquo" => U(0x203A),
|
||||
"rsquo" => U(0x2019),
|
||||
"sbquo" => U(0x201A),
|
||||
"scaron" => U(0x0161),
|
||||
"sdot" => U(0x22C5),
|
||||
"sect" => U(0xA7),
|
||||
"shy" => U(0xAD),
|
||||
"sigma" => U(0x03C3),
|
||||
"sigmaf" => U(0x03C2),
|
||||
"sim" => U(0x223C),
|
||||
"spades" => U(0x2660),
|
||||
"sub" => U(0x2282),
|
||||
"sube" => U(0x2286),
|
||||
"sum" => U(0x2211),
|
||||
"sup" => U(0x2283),
|
||||
"sup1" => U(0xB9),
|
||||
"sup2" => U(0xB2),
|
||||
"sup3" => U(0xB3),
|
||||
"supe" => U(0x2287),
|
||||
"szlig" => U(0xDF),
|
||||
"tau" => U(0x03C4),
|
||||
"there4" => U(0x2234),
|
||||
"theta" => U(0x03B8),
|
||||
"thetasym" => U(0x03D1),
|
||||
"thinsp" => U(0x2009),
|
||||
"thorn" => U(0xFE),
|
||||
"tilde" => U(0x02DC),
|
||||
"times" => U(0xD7),
|
||||
"trade" => U(0x2122),
|
||||
"uArr" => U(0x21D1),
|
||||
"uacute" => U(0xFA),
|
||||
"uarr" => U(0x2191),
|
||||
"ucirc" => U(0xFB),
|
||||
"ugrave" => U(0xF9),
|
||||
"uml" => U(0xA8),
|
||||
"upsih" => U(0x03D2),
|
||||
"upsilon" => U(0x03C5),
|
||||
"uuml" => U(0xFC),
|
||||
"weierp" => U(0x2118),
|
||||
"xi" => U(0x03BE),
|
||||
"yacute" => U(0xFD),
|
||||
"yen" => U(0xA5),
|
||||
"yuml" => U(0xFF),
|
||||
"zeta" => U(0x03B6),
|
||||
"zwj" => U(0x200D),
|
||||
"zwnj" => U(0x200C)
|
||||
}
|
||||
|
||||
ENCODINGS = %w[
|
||||
ansi_x3.4-1968
|
||||
iso-ir-6
|
||||
ansi_x3.4-1986
|
||||
iso_646.irv:1991
|
||||
ascii
|
||||
iso646-us
|
||||
us-ascii
|
||||
us
|
||||
ibm367
|
||||
cp367
|
||||
csascii
|
||||
ks_c_5601-1987
|
||||
korean
|
||||
iso-2022-kr
|
||||
csiso2022kr
|
||||
euc-kr
|
||||
iso-2022-jp
|
||||
csiso2022jp
|
||||
iso-2022-jp-2
|
||||
iso-ir-58
|
||||
chinese
|
||||
csiso58gb231280
|
||||
iso_8859-1:1987
|
||||
iso-ir-100
|
||||
iso_8859-1
|
||||
iso-8859-1
|
||||
latin1
|
||||
l1
|
||||
ibm819
|
||||
cp819
|
||||
csisolatin1
|
||||
iso_8859-2:1987
|
||||
iso-ir-101
|
||||
iso_8859-2
|
||||
iso-8859-2
|
||||
latin2
|
||||
l2
|
||||
csisolatin2
|
||||
iso_8859-3:1988
|
||||
iso-ir-109
|
||||
iso_8859-3
|
||||
iso-8859-3
|
||||
latin3
|
||||
l3
|
||||
csisolatin3
|
||||
iso_8859-4:1988
|
||||
iso-ir-110
|
||||
iso_8859-4
|
||||
iso-8859-4
|
||||
latin4
|
||||
l4
|
||||
csisolatin4
|
||||
iso_8859-6:1987
|
||||
iso-ir-127
|
||||
iso_8859-6
|
||||
iso-8859-6
|
||||
ecma-114
|
||||
asmo-708
|
||||
arabic
|
||||
csisolatinarabic
|
||||
iso_8859-7:1987
|
||||
iso-ir-126
|
||||
iso_8859-7
|
||||
iso-8859-7
|
||||
elot_928
|
||||
ecma-118
|
||||
greek
|
||||
greek8
|
||||
csisolatingreek
|
||||
iso_8859-8:1988
|
||||
iso-ir-138
|
||||
iso_8859-8
|
||||
iso-8859-8
|
||||
hebrew
|
||||
csisolatinhebrew
|
||||
iso_8859-5:1988
|
||||
iso-ir-144
|
||||
iso_8859-5
|
||||
iso-8859-5
|
||||
cyrillic
|
||||
csisolatincyrillic
|
||||
iso_8859-9:1989
|
||||
iso-ir-148
|
||||
iso_8859-9
|
||||
iso-8859-9
|
||||
latin5
|
||||
l5
|
||||
csisolatin5
|
||||
iso-8859-10
|
||||
iso-ir-157
|
||||
l6
|
||||
iso_8859-10:1992
|
||||
csisolatin6
|
||||
latin6
|
||||
hp-roman8
|
||||
roman8
|
||||
r8
|
||||
ibm037
|
||||
cp037
|
||||
csibm037
|
||||
ibm424
|
||||
cp424
|
||||
csibm424
|
||||
ibm437
|
||||
cp437
|
||||
437
|
||||
cspc8codepage437
|
||||
ibm500
|
||||
cp500
|
||||
csibm500
|
||||
ibm775
|
||||
cp775
|
||||
cspc775baltic
|
||||
ibm850
|
||||
cp850
|
||||
850
|
||||
cspc850multilingual
|
||||
ibm852
|
||||
cp852
|
||||
852
|
||||
cspcp852
|
||||
ibm855
|
||||
cp855
|
||||
855
|
||||
csibm855
|
||||
ibm857
|
||||
cp857
|
||||
857
|
||||
csibm857
|
||||
ibm860
|
||||
cp860
|
||||
860
|
||||
csibm860
|
||||
ibm861
|
||||
cp861
|
||||
861
|
||||
cp-is
|
||||
csibm861
|
||||
ibm862
|
||||
cp862
|
||||
862
|
||||
cspc862latinhebrew
|
||||
ibm863
|
||||
cp863
|
||||
863
|
||||
csibm863
|
||||
ibm864
|
||||
cp864
|
||||
csibm864
|
||||
ibm865
|
||||
cp865
|
||||
865
|
||||
csibm865
|
||||
ibm866
|
||||
cp866
|
||||
866
|
||||
csibm866
|
||||
ibm869
|
||||
cp869
|
||||
869
|
||||
cp-gr
|
||||
csibm869
|
||||
ibm1026
|
||||
cp1026
|
||||
csibm1026
|
||||
koi8-r
|
||||
cskoi8r
|
||||
koi8-u
|
||||
big5-hkscs
|
||||
ptcp154
|
||||
csptcp154
|
||||
pt154
|
||||
cp154
|
||||
utf-7
|
||||
utf-16be
|
||||
utf-16le
|
||||
utf-16
|
||||
utf-8
|
||||
iso-8859-13
|
||||
iso-8859-14
|
||||
iso-ir-199
|
||||
iso_8859-14:1998
|
||||
iso_8859-14
|
||||
latin8
|
||||
iso-celtic
|
||||
l8
|
||||
iso-8859-15
|
||||
iso_8859-15
|
||||
iso-8859-16
|
||||
iso-ir-226
|
||||
iso_8859-16:2001
|
||||
iso_8859-16
|
||||
latin10
|
||||
l10
|
||||
gbk
|
||||
cp936
|
||||
ms936
|
||||
gb18030
|
||||
shift_jis
|
||||
ms_kanji
|
||||
csshiftjis
|
||||
euc-jp
|
||||
gb2312
|
||||
big5
|
||||
csbig5
|
||||
windows-1250
|
||||
windows-1251
|
||||
windows-1252
|
||||
windows-1253
|
||||
windows-1254
|
||||
windows-1255
|
||||
windows-1256
|
||||
windows-1257
|
||||
windows-1258
|
||||
tis-620
|
||||
hz-gb-2312
|
||||
]
|
||||
|
||||
end
|
|
@ -1 +0,0 @@
|
|||
require 'html5lib/filters/optionaltags'
|
|
@ -1,57 +0,0 @@
|
|||
require 'html5lib/html5parser/phase'
|
||||
|
||||
module HTML5lib
|
||||
class InFramesetPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
|
||||
|
||||
handle_start 'html', 'frameset', 'frame', 'noframes'
|
||||
|
||||
handle_end 'frameset', 'noframes'
|
||||
|
||||
def processCharacters(data)
|
||||
@parser.parseError(_('Unexpected characters in the frameset phase. Characters ignored.'))
|
||||
end
|
||||
|
||||
def startTagFrameset(name, attributes)
|
||||
@tree.insertElement(name, attributes)
|
||||
end
|
||||
|
||||
def startTagFrame(name, attributes)
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.openElements.pop
|
||||
end
|
||||
|
||||
def startTagNoframes(name, attributes)
|
||||
@parser.phases[:inBody].processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
@parser.parseError(_("Unexpected start tag token (#{name}) in the frameset phase. Ignored"))
|
||||
end
|
||||
|
||||
def endTagFrameset(name)
|
||||
if @tree.openElements[-1].name == 'html'
|
||||
# innerHTML case
|
||||
@parser.parseError(_("Unexpected end tag token (frameset) in the frameset phase (innerHTML)."))
|
||||
else
|
||||
@tree.openElements.pop
|
||||
end
|
||||
if (not @parser.innerHTML and
|
||||
@tree.openElements[-1].name != 'frameset')
|
||||
# If we're not in innerHTML mode and the the current node is not a
|
||||
# "frameset" element (anymore) then switch.
|
||||
@parser.phase = @parser.phases[:afterFrameset]
|
||||
end
|
||||
end
|
||||
|
||||
def endTagNoframes(name)
|
||||
@parser.phases[:inBody].processEndTag(name)
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
@parser.parseError(_("Unexpected end tag token (#{name}) in the frameset phase. Ignored."))
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -1,126 +0,0 @@
|
|||
require 'html5lib/html5parser/phase'
|
||||
|
||||
module HTML5lib
|
||||
class InHeadPhase < Phase
|
||||
|
||||
handle_start 'html', 'head', 'title', 'style', 'script', %w( base link meta )
|
||||
|
||||
handle_end 'head'
|
||||
handle_end %w( html body br ) => 'ImplyAfterHead'
|
||||
handle_end %w( title style script )
|
||||
|
||||
def processEOF
|
||||
if ['title', 'style', 'script'].include?(name = @tree.openElements[-1].name)
|
||||
@parser.parseError(_("Unexpected end of file. Expected end tag (#{name})."))
|
||||
@tree.openElements.pop
|
||||
end
|
||||
anythingElse
|
||||
@parser.phase.processEOF
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
if ['title', 'style', 'script'].include?(@tree.openElements[-1].name)
|
||||
@tree.insertText(data)
|
||||
else
|
||||
anythingElse
|
||||
@parser.phase.processCharacters(data)
|
||||
end
|
||||
end
|
||||
|
||||
def startTagHead(name, attributes)
|
||||
@parser.parseError(_('Unexpected start tag head in existing head. Ignored'))
|
||||
end
|
||||
|
||||
def startTagTitle(name, attributes)
|
||||
element = @tree.createElement(name, attributes)
|
||||
appendToHead(element)
|
||||
@tree.openElements.push(element)
|
||||
@parser.tokenizer.contentModelFlag = :RCDATA
|
||||
end
|
||||
|
||||
def startTagStyle(name, attributes)
|
||||
element = @tree.createElement(name, attributes)
|
||||
if @tree.headPointer != nil and @parser.phase == @parser.phases[:inHead]
|
||||
appendToHead(element)
|
||||
else
|
||||
@tree.openElements[-1].appendChild(element)
|
||||
end
|
||||
@tree.openElements.push(element)
|
||||
@parser.tokenizer.contentModelFlag = :CDATA
|
||||
end
|
||||
|
||||
def startTagScript(name, attributes)
|
||||
#XXX Inner HTML case may be wrong
|
||||
element = @tree.createElement(name, attributes)
|
||||
element._flags.push("parser-inserted")
|
||||
if (@tree.headPointer != nil and
|
||||
@parser.phase == @parser.phases[:inHead])
|
||||
appendToHead(element)
|
||||
else
|
||||
@tree.openElements[-1].appendChild(element)
|
||||
end
|
||||
@tree.openElements.push(element)
|
||||
@parser.tokenizer.contentModelFlag = :CDATA
|
||||
end
|
||||
|
||||
def startTagBaseLinkMeta(name, attributes)
|
||||
element = @tree.createElement(name, attributes)
|
||||
if @tree.headPointer != nil and @parser.phase == @parser.phases[:inHead]
|
||||
appendToHead(element)
|
||||
else
|
||||
@tree.openElements[-1].appendChild(element)
|
||||
end
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
anythingElse
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def endTagHead(name)
|
||||
if @tree.openElements[-1].name == 'head'
|
||||
@tree.openElements.pop
|
||||
else
|
||||
@parser.parseError(_("Unexpected end tag (head). Ignored."))
|
||||
end
|
||||
@parser.phase = @parser.phases[:afterHead]
|
||||
end
|
||||
|
||||
def endTagImplyAfterHead(name)
|
||||
anythingElse
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
|
||||
def endTagTitleStyleScript(name)
|
||||
if @tree.openElements[-1].name == name
|
||||
@tree.openElements.pop
|
||||
else
|
||||
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||
end
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||
end
|
||||
|
||||
def anythingElse
|
||||
if @tree.openElements[-1].name == 'head'
|
||||
endTagHead('head')
|
||||
else
|
||||
@parser.phase = @parser.phases[:afterHead]
|
||||
end
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
def appendToHead(element)
|
||||
if @tree.headPointer.nil?
|
||||
assert @parser.innerHTML
|
||||
@tree.openElements[-1].appendChild(element)
|
||||
else
|
||||
@tree.headPointer.appendChild(element)
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -1,84 +0,0 @@
|
|||
require 'html5lib/html5parser/phase'
|
||||
|
||||
module HTML5lib
|
||||
class InSelectPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-select
|
||||
|
||||
handle_start 'html', 'option', 'optgroup', 'select'
|
||||
|
||||
handle_end 'option', 'optgroup', 'select', %w( caption table tbody tfoot thead tr td th ) => 'TableElements'
|
||||
|
||||
def processCharacters(data)
|
||||
@tree.insertText(data)
|
||||
end
|
||||
|
||||
def startTagOption(name, attributes)
|
||||
# We need to imply </option> if <option> is the current node.
|
||||
@tree.openElements.pop if @tree.openElements[-1].name == 'option'
|
||||
@tree.insertElement(name, attributes)
|
||||
end
|
||||
|
||||
def startTagOptgroup(name, attributes)
|
||||
@tree.openElements.pop if @tree.openElements[-1].name == 'option'
|
||||
@tree.openElements.pop if @tree.openElements[-1].name == 'optgroup'
|
||||
@tree.insertElement(name, attributes)
|
||||
end
|
||||
|
||||
def startTagSelect(name, attributes)
|
||||
@parser.parseError(_('Unexpected start tag (select) in the select phase implies select start tag.'))
|
||||
endTagSelect('select')
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
@parser.parseError(_('Unexpected start tag token (#{name}) in the select phase. Ignored.'))
|
||||
end
|
||||
|
||||
def endTagOption(name)
|
||||
if @tree.openElements[-1].name == 'option'
|
||||
@tree.openElements.pop
|
||||
else
|
||||
@parser.parseError(_('Unexpected end tag (option) in the select phase. Ignored.'))
|
||||
end
|
||||
end
|
||||
|
||||
def endTagOptgroup(name)
|
||||
# </optgroup> implicitly closes <option>
|
||||
if @tree.openElements[-1].name == 'option' and @tree.openElements[-2].name == 'optgroup'
|
||||
@tree.openElements.pop
|
||||
end
|
||||
# It also closes </optgroup>
|
||||
if @tree.openElements[-1].name == 'optgroup'
|
||||
@tree.openElements.pop
|
||||
# But nothing else
|
||||
else
|
||||
@parser.parseError(_('Unexpected end tag (optgroup) in the select phase. Ignored.'))
|
||||
end
|
||||
end
|
||||
|
||||
def endTagSelect(name)
|
||||
if in_scope?('select', true)
|
||||
remove_open_elements_until('select')
|
||||
|
||||
@parser.resetInsertionMode
|
||||
else
|
||||
# innerHTML case
|
||||
@parser.parseError
|
||||
end
|
||||
end
|
||||
|
||||
def endTagTableElements(name)
|
||||
@parser.parseError(_("Unexpected table end tag (#{name}) in the select phase."))
|
||||
|
||||
if in_scope?(name, true)
|
||||
endTagSelect('select')
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
@parser.parseError(_("Unexpected end tag token (#{name}) in the select phase. Ignored."))
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -1,36 +0,0 @@
|
|||
require 'html5lib/html5parser/phase'
|
||||
|
||||
module HTML5lib
|
||||
class TrailingEndPhase < Phase
|
||||
|
||||
def processEOF
|
||||
end
|
||||
|
||||
def processComment(data)
|
||||
@tree.insertComment(data, @tree.document)
|
||||
end
|
||||
|
||||
def processSpaceCharacters(data)
|
||||
@parser.lastPhase.processSpaceCharacters(data)
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
@parser.parseError(_('Unexpected non-space characters. Expected end of file.'))
|
||||
@parser.phase = @parser.lastPhase
|
||||
@parser.phase.processCharacters(data)
|
||||
end
|
||||
|
||||
def processStartTag(name, attributes)
|
||||
@parser.parseError(_('Unexpected start tag (#{name}). Expected end of file.'))
|
||||
@parser.phase = @parser.lastPhase
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def processEndTag(name)
|
||||
@parser.parseError(_('Unexpected end tag (#{name}). Expected end of file.'))
|
||||
@parser.phase = @parser.lastPhase
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -1,2 +0,0 @@
|
|||
require 'html5lib/serializer/htmlserializer'
|
||||
require 'html5lib/serializer/xhtmlserializer'
|
|
@ -1,19 +0,0 @@
|
|||
require 'html5lib/serializer/htmlserializer'
|
||||
|
||||
module HTML5lib
|
||||
|
||||
class XHTMLSerializer < HTMLSerializer
|
||||
DEFAULTS = {
|
||||
:quote_attr_values => true,
|
||||
:minimize_boolean_attributes => false,
|
||||
:use_trailing_solidus => true,
|
||||
:escape_lt_in_attrs => true,
|
||||
:omit_optional_tags => false
|
||||
}
|
||||
|
||||
def initialize(options={})
|
||||
super(DEFAULTS.clone.update(options))
|
||||
end
|
||||
end
|
||||
|
||||
end
|
1044
vendor/plugins/HTML5lib/lib/html5lib/tokenizer.rb
vendored
1044
vendor/plugins/HTML5lib/lib/html5lib/tokenizer.rb
vendored
File diff suppressed because it is too large
Load diff
|
@ -1,26 +0,0 @@
|
|||
require 'html5lib/treewalkers/base'
|
||||
|
||||
module HTML5lib
|
||||
module TreeWalkers
|
||||
|
||||
class << self
|
||||
def [](name)
|
||||
case name.to_s.downcase
|
||||
when 'simpletree' then
|
||||
require 'html5lib/treewalkers/simpletree'
|
||||
SimpleTree::TreeWalker
|
||||
when 'rexml' then
|
||||
require 'html5lib/treewalkers/rexml'
|
||||
REXML::TreeWalker
|
||||
when 'hpricot' then
|
||||
require 'html5lib/treewalkers/hpricot'
|
||||
Hpricot::TreeWalker
|
||||
else
|
||||
raise "Unknown TreeWalker #{name}"
|
||||
end
|
||||
end
|
||||
|
||||
alias :getTreeWalker :[]
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,156 +0,0 @@
|
|||
require 'html5lib/constants'
|
||||
module HTML5lib
|
||||
module TreeWalkers
|
||||
|
||||
module TokenConstructor
|
||||
def error(msg)
|
||||
return {:type => "SerializeError", :data => msg}
|
||||
end
|
||||
|
||||
def normalizeAttrs(attrs)
|
||||
attrs.to_a
|
||||
end
|
||||
|
||||
def emptyTag(name, attrs, hasChildren=false)
|
||||
error(_("Void element has children")) if hasChildren
|
||||
return({:type => :EmptyTag, :name => name, \
|
||||
:data => normalizeAttrs(attrs)})
|
||||
end
|
||||
|
||||
def startTag(name, attrs)
|
||||
return {:type => :StartTag, :name => name, \
|
||||
:data => normalizeAttrs(attrs)}
|
||||
end
|
||||
|
||||
def endTag(name)
|
||||
return {:type => :EndTag, :name => name, :data => []}
|
||||
end
|
||||
|
||||
def text(data)
|
||||
if data =~ /\A([#{SPACE_CHARACTERS.join('')}]+)/m
|
||||
yield({:type => :SpaceCharacters, :data => $1})
|
||||
data = data[$1.length .. -1]
|
||||
return if data.empty?
|
||||
end
|
||||
|
||||
if data =~ /([#{SPACE_CHARACTERS.join('')}]+)\Z/m
|
||||
yield({:type => :Characters, :data => data[0 ... -$1.length]})
|
||||
yield({:type => :SpaceCharacters, :data => $1})
|
||||
else
|
||||
yield({:type => :Characters, :data => data})
|
||||
end
|
||||
end
|
||||
|
||||
def comment(data)
|
||||
return {:type => :Comment, :data => data}
|
||||
end
|
||||
|
||||
def doctype(name)
|
||||
return {:type => :Doctype, :name => name, :data => name.upcase() == "HTML"}
|
||||
end
|
||||
|
||||
def unknown(nodeType)
|
||||
return error(_("Unknown node type: ") + nodeType.to_s)
|
||||
end
|
||||
|
||||
def _(str)
|
||||
str
|
||||
end
|
||||
end
|
||||
|
||||
class Base
|
||||
include TokenConstructor
|
||||
|
||||
def initialize(tree)
|
||||
@tree = tree
|
||||
end
|
||||
|
||||
def each
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
alias walk each
|
||||
end
|
||||
|
||||
class NonRecursiveTreeWalker < TreeWalkers::Base
|
||||
def node_details(node)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def first_child(node)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def next_sibling(node)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def parent(node)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def each
|
||||
currentNode = @tree
|
||||
while currentNode != nil
|
||||
details = node_details(currentNode)
|
||||
hasChildren = false
|
||||
|
||||
case details.shift
|
||||
when :DOCTYPE
|
||||
yield doctype(*details)
|
||||
|
||||
when :TEXT
|
||||
text(*details) {|token| yield token}
|
||||
|
||||
when :ELEMENT
|
||||
name, attributes, hasChildren = details
|
||||
if VOID_ELEMENTS.include?(name)
|
||||
yield emptyTag(name, attributes.to_a, hasChildren)
|
||||
hasChildren = false
|
||||
else
|
||||
yield startTag(name, attributes.to_a)
|
||||
end
|
||||
|
||||
when :COMMENT
|
||||
yield comment(details[0])
|
||||
|
||||
when :DOCUMENT, :DOCUMENT_FRAGMENT
|
||||
hasChildren = true
|
||||
|
||||
when nil
|
||||
# ignore (REXML::XMLDecl is an example)
|
||||
|
||||
else
|
||||
yield unknown(details[0])
|
||||
end
|
||||
|
||||
firstChild = hasChildren ? first_child(currentNode) : nil
|
||||
if firstChild != nil
|
||||
currentNode = firstChild
|
||||
else
|
||||
while currentNode != nil
|
||||
details = node_details(currentNode)
|
||||
if details.shift == :ELEMENT
|
||||
name, attributes, hasChildren = details
|
||||
yield endTag(name) if !VOID_ELEMENTS.include?(name)
|
||||
end
|
||||
|
||||
if @tree == currentNode
|
||||
currentNode = nil
|
||||
else
|
||||
nextSibling = next_sibling(currentNode)
|
||||
if nextSibling != nil
|
||||
currentNode = nextSibling
|
||||
break
|
||||
end
|
||||
|
||||
currentNode = parent(currentNode)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
24
vendor/plugins/HTML5lib/parse.rb
vendored
24
vendor/plugins/HTML5lib/parse.rb
vendored
|
@ -26,15 +26,15 @@ def parse(opts, args)
|
|||
exit(1)
|
||||
end
|
||||
|
||||
require 'html5lib/treebuilders'
|
||||
treebuilder = HTML5lib::TreeBuilders[opts.treebuilder]
|
||||
require 'html5/treebuilders'
|
||||
treebuilder = HTML5::TreeBuilders[opts.treebuilder]
|
||||
|
||||
if opts.output == :xml
|
||||
require 'html5lib/liberalxmlparser'
|
||||
p = HTML5lib::XHTMLParser.new(:tree=>treebuilder)
|
||||
require 'html5/liberalxmlparser'
|
||||
p = HTML5::XHTMLParser.new(:tree=>treebuilder)
|
||||
else
|
||||
require 'html5lib/html5parser'
|
||||
p = HTML5lib::HTMLParser.new(:tree=>treebuilder)
|
||||
require 'html5/html5parser'
|
||||
p = HTML5::HTMLParser.new(:tree=>treebuilder)
|
||||
end
|
||||
|
||||
if opts.parsemethod == :parse
|
||||
|
@ -70,10 +70,10 @@ def printOutput(parser, document, opts)
|
|||
when :xml
|
||||
print document
|
||||
when :html
|
||||
require 'html5lib/treewalkers'
|
||||
tokens = HTML5lib::TreeWalkers[opts.treebuilder].new(document)
|
||||
require 'html5lib/serializer'
|
||||
puts HTML5lib::HTMLSerializer.serialize(tokens, opts.serializer)
|
||||
require 'html5/treewalkers'
|
||||
tokens = HTML5::TreeWalkers[opts.treebuilder].new(document)
|
||||
require 'html5/serializer'
|
||||
puts HTML5::HTMLSerializer.serialize(tokens, opts.serializer)
|
||||
when :hilite
|
||||
print document.hilite
|
||||
when :tree
|
||||
|
@ -188,6 +188,10 @@ opts = OptionParser.new do |opts|
|
|||
options.serializer[:escape_lt_in_attrs] = lt
|
||||
end
|
||||
|
||||
opts.on("--[no-]escape-rcdata", "Escape rcdata element values") do |rcdata|
|
||||
options.serializer[:escape_rcdata] = rcdata
|
||||
end
|
||||
|
||||
opts.separator ""
|
||||
opts.separator "Other Options:"
|
||||
|
||||
|
|
|
@ -33,7 +33,6 @@ EUC-jp
|
|||
#encoding
|
||||
EUC-jp
|
||||
|
||||
|
||||
#data
|
||||
<!-- -->
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||||
|
|
|
@ -92,7 +92,8 @@
|
|||
|
||||
{"description": "rcdata",
|
||||
"input": [["StartTag", "script", {}], ["Characters", "a<b>c&d"]],
|
||||
"expected": ["<script>a<b>c&d"]
|
||||
"expected": ["<script>a<b>c&d"],
|
||||
"xhtml": ["<script>a<b>c&d"]
|
||||
},
|
||||
|
||||
{"description": "doctype",
|
||||
|
|
|
@ -49,6 +49,12 @@
|
|||
"options": {"escape_lt_in_attrs": true},
|
||||
"input": [["StartTag", "a", {"title": "a<b>c&d"}]],
|
||||
"expected": ["<a title=\"a<b>c&d\">"]
|
||||
},
|
||||
|
||||
{"description": "rcdata",
|
||||
"options": {"escape_rcdata": true},
|
||||
"input": [["StartTag", "script", {}], ["Characters", "a<b>c&d"]],
|
||||
"expected": ["<script>a<b>c&d"]
|
||||
}
|
||||
|
||||
]}
|
||||
|
|
43
vendor/plugins/HTML5lib/testdata/sniffer/htmlOrFeed.json
vendored
Normal file
43
vendor/plugins/HTML5lib/testdata/sniffer/htmlOrFeed.json
vendored
Normal file
|
@ -0,0 +1,43 @@
|
|||
[
|
||||
{"type": "text/html", "input": ""},
|
||||
{"type": "text/html", "input": "<!---->"},
|
||||
{"type": "text/html", "input": "<!--asdfaslkjdf;laksjdf as;dkfjsd-->"},
|
||||
{"type": "text/html", "input": "<!"},
|
||||
{"type": "text/html", "input": "\t"},
|
||||
{"type": "text/html", "input": "<!>"},
|
||||
{"type": "text/html", "input": "<?"},
|
||||
{"type": "text/html", "input": "<??>"},
|
||||
{"type": "application/rss+xml", "input": "<rss"},
|
||||
{"type": "application/atom+xml", "input": "<feed"},
|
||||
{"type": "text/html", "input": "<html"},
|
||||
{"type": "text/html", "input": "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n<html><head>\n<title>302 Found</title>\n</head><body>\n<h1>Found</h1>\n<p>The document has moved <a href=\"http://feeds.feedburner.com/gofug\">here</a>.</p>\n</body></html>\n"},
|
||||
{"type": "text/html", "input": "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">\r\n<HTML><HEAD>\r\n <link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/289619328/feed.css\" /><link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/431602649/feed.css\" />\r\n<link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/382549546/feed.css\" />\r\n<link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/314618017/feed.css\" /><META http-equiv=\"expires\" content="},
|
||||
{"type": "text/html", "input": "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\r\n<html>\r\n<head>\r\n<title>Xiaxue - Chicken pie blogger.</title><meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\"><style type=\"text/css\">\r\n<style type=\"text/css\">\r\n<!--\r\nbody {\r\n background-color: #FFF2F2;\r\n}\r\n.style1 {font-family: Georgia, \"Times New Roman\", Times, serif}\r\n.style2 {\r\n color: #8a567c;\r\n font-size: 14px;\r\n font-family: Georgia, \"Times New Roman\", Times, serif;\r\n}\r"},
|
||||
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<head> \r\n<title>Google Operating System</title>\r\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"Description\" content=\"Unofficial news and tips about Google. A blog that watches Google's latest developments and the attempts to move your operating system online.\" />\r\n<meta name=\"generator\" c"},
|
||||
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<head>\r\n <title>Assimilated Press</title> <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"MSSmartTagsPreventParsing\" content=\"true\" />\r\n<meta name=\"generator\" content=\"Blogger\" />\r\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"Assimilated Press - Atom\" href=\"http://assimila"},
|
||||
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<head>\r\n <title>PostSecret</title>\r\n<META name=\"keywords\" Content=\"secrets, postcard, secret, postcards, postsecret, postsecrets,online confessional, post secret, post secrets, artomatic, post a secret\"><META name=\"discription\" Content=\"See a Secret...Share a Secret\"> <meta http-equiv=\"Content-Type\" content=\"te"},
|
||||
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns='http://www.w3.org/1999/xhtml' xmlns:b='http://www.google.com/2005/gml/b' xmlns:data='http://www.google.com/2005/gml/data' xmlns:expr='http://www.google.com/2005/gml/expr'>\n <head>\n \n <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'/>\n <meta content='true' name='MSSmartTagsPreventParsing'/>\n <meta content='blogger' name='generator'/>\n <link rel=\"alternate\" typ"},
|
||||
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"ja\">\n<head profile=\"http://gmpg.org/xfn/11\"> \n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" /> \n<title> CMS Lever</title><link rel=\"stylesheet\" type=\"text/css\" media=\"screen\" href=\"http://s.wordpress.com/wp-content/themes/pub/twenty-eight/2813.css\"/>\n<link rel=\"alternate\" type=\"application/rss+xml\" title=\"RSS 2.0\" h"},
|
||||
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"en\"><head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n<title> Park Avenue Peerage</title>\t<meta name=\"generator\" content=\"WordPress.com\" />\t<!-- feeds -->\n\t<link rel=\"alternate\" type=\"application/rss+xml\" title=\"RSS 2.0\" href=\"http://parkavenuepeerage.wordpress.com/feed/\" />\t<link rel=\"pingback\" href="},
|
||||
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"ja\"><head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n<title> \u884c\u96f2\u6d41\u6c34 -like a floating clouds and running water-</title>\t<meta name=\"generator\" content=\"WordPress.com\" />\t<!-- feeds -->\n\t<link rel=\"alternate\" type=\"application/rss+xml\" title=\"RSS 2.0\" href=\"http://shw4.wordpress.com/feed/\" />\t<li"},
|
||||
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n<title>Go Fug Yourself</title><link rel=\"stylesheet\" href=\"http://gofugyourself.typepad.com/go_fug_yourself/styles.css\" type=\"text/css\" />\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"Atom\" "},
|
||||
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"en\"><head profile=\"http://gmpg.org/xfn/11\">\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" /><title> Ladies…</title><meta name=\"generator\" content=\"WordPress.com\" /> <!-- leave this for stats --><link rel=\"stylesheet\" href=\"http://s.wordpress.com/wp-content/themes/default/style.css?1\" type=\"tex"},
|
||||
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\r\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\r\n<head>\r\n <title>The Sartorialist</title> <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"MSSmartTagsPreventParsing\" content=\"true\" />\r\n<meta name=\"generator\" content=\"Blogger\" />\r\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"The Sartorialist - Atom\" href=\"http://thesartorialist.blogspot"},
|
||||
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-1\" />\n<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n<title>Creating Passionate Users</title><link rel=\"stylesheet\" href=\"http://headrush.typepad.com/creating_passionate_users/styles.css\" type=\"text/css\" />\n<link rel=\"alternate\" type"},
|
||||
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n\t\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" id=\"sixapart-standard\">\n<head>\n\t<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n\t<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n\t\n\t\n <meta name=\"keywords\" content=\"marketing, blog, seth, ideas, respect, permission\" />\n <meta name=\"description\" content=\"Seth Godin's riffs on marketing, respect, and the "},
|
||||
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n\t\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" id=\"sixapart-standard\">\n<head>\n\t<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n\t<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n\t\n\t\n \n <meta name=\"description\" content=\" Western Civilization hangs in the balance. This blog is part of the solution,the cure. Get your heads out of the sand and Fight the G"},
|
||||
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=pahrefhttpwwwfeedburnercomtarget_blankimgsrchttpwwwfeedburnercomfbimagespubpowered_by_fbgifaltPoweredbyFeedBurnerstyleborder0ap\" />\n<title> From Under the Rotunda</title>\n<link rel=\"stylesheet\" href=\"http://s.wordpress.com/wp-content/themes/pub/andreas04/style.css\" type=\"text/css\""},
|
||||
{"type": "application/atom+xml", "input": "<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href=\"http://www.blogger.com/styles/atom.css\" type=\"text/css\"?><feed xmlns='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/'><id>tag:blogger.com,1999:blog-10861780</id><updated>2007-07-27T12:38:50.888-07:00</updated><title type='text'>Official Google Blog</title><link rel='alternate' type='text/html' href='http://googleblog.blogspot.com/'/><link rel='next' type='application/atom+xml' href='http://googleblog.blogs"},
|
||||
{"type": "application/rss+xml", "input": "<?xml version='1.0' encoding='UTF-8'?><rss xmlns:atom='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/' version='2.0'><channel><atom:id>tag:blogger.com,1999:blog-10861780</atom:id><lastBuildDate>Fri, 27 Jul 2007 19:38:50 +0000</lastBuildDate><title>Official Google Blog</title><description/><link>http://googleblog.blogspot.com/</link><managingEditor>Eric Case</managingEditor><generator>Blogger</generator><openSearch:totalResults>729</openSearch:totalResults><openSearc"},
|
||||
{"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"pahrefhttpwwwfeedburnercomtarget_blankimgsrchttpwwwfeedburnercomfbimagespubpowered_by_fbgifaltPoweredbyFeedBurnerstyleborder0ap\"?>\n<!-- generator=\"wordpress/MU\" -->\n<rss version=\"2.0\"\n\txmlns:content=\"http://purl.org/rss/1.0/modules/content/\"\n\txmlns:wfw=\"http://wellformedweb.org/CommentAPI/\"\n\txmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n\t><channel>\n\t<title>From Under the Rotunda</title>\n\t<link>http://dannybernardi.wordpress.com</link>\n\t<description>The Monographs of Danny Ber"},
|
||||
{"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!-- generator=\"wordpress/MU\" -->\n<rss version=\"2.0\"\n\txmlns:content=\"http://purl.org/rss/1.0/modules/content/\"\n\txmlns:wfw=\"http://wellformedweb.org/CommentAPI/\"\n\txmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n\t><channel>\n\t<title>CMS Lever</title>\n\t<link>http://kanaguri.wordpress.com</link>\n\t<description>CMS\u306e\u6c17\u306b\u306a\u3063\u305f\u3053\u3068</description>\n\t<pubDate>Wed, 18 Jul 2007 21:26:22 +0000</pubDate>\n\t<generator>http://wordpress.org/?v=MU</generator>\n\t<language>ja</languag"},
|
||||
{"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:thr=\"http://purl.org/syndication/thread/1.0\">\n <title>Atlas Shrugs</title>\n <link rel=\"self\" type=\"application/atom+xml\" href=\"http://atlasshrugs2000.typepad.com/atlas_shrugs/atom.xml\" />\n <link rel=\"alternate\" type=\"text/html\" href=\"http://atlasshrugs2000.typepad.com/atlas_shrugs/\" />\n <id>tag:typepad.com,2003:weblog-132946</id>\n <updated>2007-08-15T16:07:34-04"},
|
||||
{"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atom10full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:thr=\"http://purl.org/syndication/thread/1.0\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\">\r\n <title>Creating Passionate Users</title>\r\n "},
|
||||
{"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atom10full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\">\r\n <title>Seth's Blog</title>\r\n <link rel=\"alternate\" type=\"text/html\" href=\"http://sethgodin.typepad.com/seths_blog/\" />\r\n <link rel=\"s"},
|
||||
{"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atom10full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:openSearch=\"http://a9.com/-/spec/opensearchrss/1.0/\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\"><id>tag:blogger.com,1999:blog-32454861</id><updated>2007-07-31T21:44:09.867+02:00</upd"},
|
||||
{"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atomfull.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://purl.org/atom/ns#\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\" version=\"0.3\">\r\n <title>Go Fug Yourself</title>\r\n <link rel=\"alternate\" type=\"text/html\" href=\"http://go"},
|
||||
{"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/rss2full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><rss xmlns:creativeCommons=\"http://backend.userland.com/creativeCommonsRssModule\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\" version=\"2.0\"><channel><title>Google Operating System</title><link>http://googlesystem.blogspot.com/</link>"},
|
||||
{"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"\"?>\n<!-- generator=\"wordpress/MU\" -->\n<rss version=\"2.0\"\n\txmlns:content=\"http://purl.org/rss/1.0/modules/content/\"\n\txmlns:wfw=\"http://wellformedweb.org/CommentAPI/\"\n\txmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n\t><channel>\n\t<title>Nunublog</title>\n\t<link>http://nunubh.wordpress.com</link>\n\t<description>Just Newbie Blog!</description>\n\t<pubDate>Mon, 09 Jul 2007 18:54:09 +0000</pubDate>\n\t<generator>http://wordpress.org/?v=MU</generator>\n\t<language>id</language>\n\t\t\t<item>\n\t\t<ti"},
|
||||
{"type": "text/html", "input": "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<HEAD>\r\n<TITLE>Design*Sponge</TITLE><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"MSSmartTagsPreventParsing\" content=\"true\" />\r\n<meta name=\"generator\" content=\"Blogger\" />\r\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"Design*Sponge - Atom\" href=\"http://designsponge.blogspot.com/feeds/posts/default\" />\r\n<link rel=\"alternate\" type=\"application/rss+xml\" title=\"Design*Sponge - RSS\" href="},
|
||||
{"type": "text/html", "input": "<HTML>\n<HEAD>\n<TITLE>Moved Temporarily</TITLE>\n</HEAD>\n<BODY BGCOLOR=\"#FFFFFF\" TEXT=\"#000000\">\n<H1>Moved Temporarily</H1>\nThe document has moved <A HREF=\"http://feeds.feedburner.com/thesecretdiaryofstevejobs\">here</A>.\n</BODY>\n</HTML>\n"}
|
||||
]
|
|
@ -11,12 +11,24 @@
|
|||
"input":"foo</bar>",
|
||||
"output":[["Character", "foo"], ["EndTag", "bar"]]},
|
||||
|
||||
{"description":"End tag closing RCDATA or CDATA (case-insensitivity)",
|
||||
"contentModelFlags":["RCDATA", "CDATA"],
|
||||
"lastStartTag":"bar",
|
||||
"input":"foo</bAr>",
|
||||
"output":[["Character", "foo"], ["EndTag", "bar"]]},
|
||||
|
||||
{"description":"End tag with incorrect name in RCDATA or CDATA",
|
||||
"contentModelFlags":["RCDATA", "CDATA"],
|
||||
"lastStartTag":"baz",
|
||||
"input":"</foo>bar</baz>",
|
||||
"output":[["Character", "</foo>bar"], ["EndTag", "baz"]]},
|
||||
|
||||
{"description":"End tag with incorrect name in RCDATA or CDATA (starting like correct name)",
|
||||
"contentModelFlags":["RCDATA", "CDATA"],
|
||||
"lastStartTag":"baz",
|
||||
"input":"</foo>bar</bazaar>",
|
||||
"output":[["Character", "</foo>bar</bazaar>"]]},
|
||||
|
||||
{"description":"End tag closing RCDATA or CDATA, switching back to PCDATA",
|
||||
"contentModelFlags":["RCDATA", "CDATA"],
|
||||
"lastStartTag":"bar",
|
||||
|
|
2335
vendor/plugins/HTML5lib/testdata/tokenizer/entities.test
vendored
Normal file
2335
vendor/plugins/HTML5lib/testdata/tokenizer/entities.test
vendored
Normal file
File diff suppressed because it is too large
Load diff
|
@ -135,7 +135,7 @@
|
|||
|
||||
{"description":"Entity without trailing semicolon (2)",
|
||||
"input":"I'm ¬in",
|
||||
"output":[["Character","I'm "], "ParseError", ["Character", "∉"]]},
|
||||
"output":[["Character","I'm "], "ParseError", ["Character", "¬in"]]},
|
||||
|
||||
{"description":"Partial entity match at end of file",
|
||||
"input":"I'm &no",
|
||||
|
@ -151,6 +151,22 @@
|
|||
|
||||
{"description":"Hexadecimal entity in attribute",
|
||||
"input":"<h a='?'></h>",
|
||||
"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]}
|
||||
"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]},
|
||||
|
||||
{"description":"Entity in attribute without semicolon ending in x",
|
||||
"input":"<h a='¬x'>",
|
||||
"output":["ParseError", ["StartTag", "h", {"a":"¬x"}]]},
|
||||
|
||||
{"description":"Entity in attribute without semicolon ending in 1",
|
||||
"input":"<h a='¬1'>",
|
||||
"output":["ParseError", ["StartTag", "h", {"a":"¬1"}]]},
|
||||
|
||||
{"description":"Entity in attribute without semicolon ending in i",
|
||||
"input":"<h a='¬i'>",
|
||||
"output":["ParseError", ["StartTag", "h", {"a":"¬i"}]]},
|
||||
|
||||
{"description":"Entity in attribute without semicolon",
|
||||
"input":"<h a='©'>",
|
||||
"output":["ParseError", ["StartTag", "h", {"a":"©"}]]}
|
||||
|
||||
]}
|
||||
|
|
|
@ -42,27 +42,23 @@
|
|||
|
||||
{"description":"Numeric entity representing the NUL character",
|
||||
"input":"�",
|
||||
"output":[["Character", "\uFFFD"]]},
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Hexadecimal entity representing the NUL character",
|
||||
"input":"�",
|
||||
"output":[["Character", "\uFFFD"]]},
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
|
||||
"input":"�",
|
||||
"output":[["Character", "\uFFFD"]]},
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)",
|
||||
"input":"�",
|
||||
"output":[["Character", "\uFFFD"]]},
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Numeric entity representing a Windows-1252 'codepoint'",
|
||||
"input":"‰",
|
||||
"output":["ParseError", ["Character", "\u2030"]]},
|
||||
|
||||
{"description":"Hexadecimal entity representing a Windows-1252 'codepoint'",
|
||||
"input":"‰",
|
||||
"output":["ParseError", ["Character", "\u2030"]]},
|
||||
{"description":"Hexadecimal entity pair representing a surrogate pair",
|
||||
"input":"��",
|
||||
"output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Hexadecimal entity with mixed uppercase and lowercase",
|
||||
"input":"ꯍ",
|
||||
|
@ -118,7 +114,15 @@
|
|||
|
||||
{"description":"Null Byte Replacement",
|
||||
"input":"\u0000",
|
||||
"output":[["Character", "\ufffd"]]}
|
||||
"output":["ParseError", ["Character", "\ufffd"]]},
|
||||
|
||||
{"description":"Comment with dash",
|
||||
"input":"<!---x",
|
||||
"output":["ParseError", ["Comment", "-x"]]},
|
||||
|
||||
{"description":"Entity + newline",
|
||||
"input":"\nx\n>\n",
|
||||
"output":[["Character","\nx\n>\n"]]}
|
||||
|
||||
]}
|
||||
|
||||
|
|
367
vendor/plugins/HTML5lib/testdata/tokenizer/test3.test
vendored
Normal file
367
vendor/plugins/HTML5lib/testdata/tokenizer/test3.test
vendored
Normal file
|
@ -0,0 +1,367 @@
|
|||
{"tests": [
|
||||
|
||||
{"description":"<",
|
||||
"input":"<",
|
||||
"output":["ParseError", ["Character", "<"]]},
|
||||
|
||||
{"description":"<>",
|
||||
"input":"<>",
|
||||
"output":["ParseError", ["Character", "<>"]]},
|
||||
|
||||
{"description":"<!",
|
||||
"input":"<!",
|
||||
"output":["ParseError", ["Comment", ""]]},
|
||||
|
||||
{"description":"<!>",
|
||||
"input":"<!>",
|
||||
"output":["ParseError", ["Comment", ""]]},
|
||||
|
||||
{"description":"<!--",
|
||||
"input":"<!--",
|
||||
"output":["ParseError", ["Comment", ""]]},
|
||||
|
||||
{"description":"<!-->",
|
||||
"input":"<!-->",
|
||||
"output":["ParseError", ["Comment", ""]]},
|
||||
|
||||
{"description":"<!---",
|
||||
"input":"<!---",
|
||||
"output":["ParseError", ["Comment", ""]]},
|
||||
|
||||
{"description":"<!--->",
|
||||
"input":"<!--->",
|
||||
"output":["ParseError", ["Comment", ""]]},
|
||||
|
||||
{"description":"<!---->",
|
||||
"input":"<!---->",
|
||||
"output":[["Comment", ""]]},
|
||||
|
||||
{"description":"<!-----",
|
||||
"input":"<!-----",
|
||||
"output":["ParseError", "ParseError", ["Comment", "-"]]},
|
||||
|
||||
{"description":"<!----.",
|
||||
"input":"<!----.",
|
||||
"output":["ParseError", "ParseError", ["Comment", "--."]]},
|
||||
|
||||
{"description":"<!---?",
|
||||
"input":"<!---?",
|
||||
"output":["ParseError", ["Comment", "-?"]]},
|
||||
|
||||
{"description":"<!--?-",
|
||||
"input":"<!--?-",
|
||||
"output":["ParseError", ["Comment", "?"]]},
|
||||
|
||||
{"description":"<!--?--",
|
||||
"input":"<!--?--",
|
||||
"output":["ParseError", ["Comment", "?"]]},
|
||||
|
||||
{"description":"<!--?-.",
|
||||
"input":"<!--?-.",
|
||||
"output":["ParseError", ["Comment", "?-."]]},
|
||||
|
||||
{"description":"<!--?.",
|
||||
"input":"<!--?.",
|
||||
"output":["ParseError", ["Comment", "?."]]},
|
||||
|
||||
{"description":"<?>",
|
||||
"input":"<?>",
|
||||
"output":["ParseError", ["Comment", "?"]]},
|
||||
|
||||
{"description":"<??",
|
||||
"input":"<??",
|
||||
"output":["ParseError", ["Comment", "??"]]},
|
||||
|
||||
{"description":"</",
|
||||
"input":"</",
|
||||
"output":["ParseError", ["Character", "</"]]},
|
||||
|
||||
{"description":"</>",
|
||||
"input":"</>",
|
||||
"output":["ParseError"]},
|
||||
|
||||
{"description":"</?",
|
||||
"input":"</?",
|
||||
"output":["ParseError", ["Comment", "?"]]},
|
||||
|
||||
{"description":">",
|
||||
"input":">",
|
||||
"output":[["Character", ">"]]},
|
||||
|
||||
{"description":"-",
|
||||
"input":"-",
|
||||
"output":[["Character", "-"]]},
|
||||
|
||||
{"description":"?",
|
||||
"input":"?",
|
||||
"output":[["Character", "?"]]},
|
||||
|
||||
{"description":"&",
|
||||
"input":"&",
|
||||
"output":[["Character", "&"]]},
|
||||
|
||||
{"description":"&#",
|
||||
"input":"&#",
|
||||
"output":["ParseError", ["Character", "&#"]]},
|
||||
|
||||
{"description":"	",
|
||||
"input":"	",
|
||||
"output":["ParseError", ["Character", "\t"]]},
|
||||
|
||||
{"description":"<!doctype >",
|
||||
"input":"<!doctype >",
|
||||
"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
|
||||
|
||||
{"description":"<!doctype ",
|
||||
"input":"<!doctype ",
|
||||
"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
|
||||
|
||||
{"description":"<!doctype!>",
|
||||
"input":"<!doctype!>",
|
||||
"output":["ParseError", ["DOCTYPE", "!", null, null, true]]},
|
||||
|
||||
{"description":"<!doctype! >",
|
||||
"input":"<!doctype! >",
|
||||
"output":["ParseError", ["DOCTYPE", "!", null, null, true]]},
|
||||
|
||||
{"description":"<!doctype! ",
|
||||
"input":"<!doctype! ",
|
||||
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
|
||||
|
||||
{"description":"<!doctype! ?>",
|
||||
"input":"<!doctype! ?>",
|
||||
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
|
||||
|
||||
{"description":"<!doctype! ??",
|
||||
"input":"<!doctype! ??",
|
||||
"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
|
||||
|
||||
{"description":"<!doctype!?",
|
||||
"input":"<!doctype!?",
|
||||
"output":["ParseError", "ParseError", ["DOCTYPE", "!?", null, null, false]]},
|
||||
|
||||
{"description":"<!doctype! public>",
|
||||
"input":"<!doctype! public>",
|
||||
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
|
||||
|
||||
{"description":"<!doctype! public ",
|
||||
"input":"<!doctype! public ",
|
||||
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
|
||||
|
||||
{"description":"<!doctype! public?",
|
||||
"input":"<!doctype! public?",
|
||||
"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
|
||||
|
||||
{"description":"<!doctype! public''",
|
||||
"input":"<!doctype! public''",
|
||||
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", null, false]]},
|
||||
|
||||
{"description":"<!doctype! public'(",
|
||||
"input":"<!doctype! public'(",
|
||||
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "(", null, false]]},
|
||||
|
||||
{"description":"<!doctype! public\"\">",
|
||||
"input":"<!doctype! public\"\">",
|
||||
"output":["ParseError", ["DOCTYPE", "!", "", null, true]]},
|
||||
|
||||
{"description":"<!doctype! public\"\" ",
|
||||
"input":"<!doctype! public\"\" ",
|
||||
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", null, false]]},
|
||||
|
||||
{"description":"<!doctype! public\"\"?",
|
||||
"input":"<!doctype! public\"\"?",
|
||||
"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", "", null, false]]},
|
||||
|
||||
{"description":"<!doctype! public\"\"'",
|
||||
"input":"<!doctype! public\"\"'",
|
||||
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", "", false]]},
|
||||
|
||||
{"description":"<!doctype! public\"\"\"",
|
||||
"input":"<!doctype! public\"\"\"",
|
||||
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", "", false]]},
|
||||
|
||||
{"description":"<!doctype! public\"#",
|
||||
"input":"<!doctype! public\"#",
|
||||
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "#", null, false]]},
|
||||
|
||||
{"description":"<!doctype! system>",
|
||||
"input":"<!doctype! system>",
|
||||
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
|
||||
|
||||
{"description":"<!doctype! system ",
|
||||
"input":"<!doctype! system ",
|
||||
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
|
||||
|
||||
{"description":"<!doctype! system?",
|
||||
"input":"<!doctype! system?",
|
||||
"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
|
||||
|
||||
{"description":"<!doctype! system''",
|
||||
"input":"<!doctype! system''",
|
||||
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "", false]]},
|
||||
|
||||
{"description":"<!doctype! system'(",
|
||||
"input":"<!doctype! system'(",
|
||||
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "(", false]]},
|
||||
|
||||
{"description":"<!doctype! system\"\">",
|
||||
"input":"<!doctype! system\"\">",
|
||||
"output":["ParseError", ["DOCTYPE", "!", null, "", true]]},
|
||||
|
||||
{"description":"<!doctype! system\"\" ",
|
||||
"input":"<!doctype! system\"\" ",
|
||||
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "", false]]},
|
||||
|
||||
{"description":"<!doctype! system\"\"?",
|
||||
"input":"<!doctype! system\"\"?",
|
||||
"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, "", false]]},
|
||||
|
||||
{"description":"<!doctype! system\"#",
|
||||
"input":"<!doctype! system\"#",
|
||||
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "#", false]]},
|
||||
|
||||
{"description":"</z",
|
||||
"input":"</z",
|
||||
"output":["ParseError", ["EndTag", "z"]]},
|
||||
|
||||
{"description":"<z>",
|
||||
"input":"<z>",
|
||||
"output":[["StartTag", "z", {}]]},
|
||||
|
||||
{"description":"<z ",
|
||||
"input":"<z ",
|
||||
"output":["ParseError", ["StartTag", "z", {}]]},
|
||||
|
||||
{"description":"<z/>",
|
||||
"input":"<z/>",
|
||||
"output":["ParseError", ["StartTag", "z", {}]]},
|
||||
|
||||
{"description":"<z/ ",
|
||||
"input":"<z/ ",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "z", {}]]},
|
||||
|
||||
{"description":"<z//",
|
||||
"input":"<z//",
|
||||
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {}]]},
|
||||
|
||||
{"description":"<z",
|
||||
"input":"<z",
|
||||
"output":["ParseError", ["StartTag", "z", {}]]},
|
||||
|
||||
{"description":"</z",
|
||||
"input":"</z",
|
||||
"output":["ParseError", ["EndTag", "z"]]},
|
||||
|
||||
{"description":"<z0",
|
||||
"input":"<z0",
|
||||
"output":["ParseError", ["StartTag", "z0", {}]]},
|
||||
|
||||
{"description":"<z/0=>",
|
||||
"input":"<z/0=>",
|
||||
"output":["ParseError", ["StartTag", "z", {"0": ""}]]},
|
||||
|
||||
{"description":"<z/0= ",
|
||||
"input":"<z/0= ",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
|
||||
|
||||
{"description":"<z/0=?>",
|
||||
"input":"<z/0=?>",
|
||||
"output":["ParseError", ["StartTag", "z", {"0": "?"}]]},
|
||||
|
||||
{"description":"<z/0=? ",
|
||||
"input":"<z/0=? ",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "?"}]]},
|
||||
|
||||
{"description":"<z/0=??",
|
||||
"input":"<z/0=??",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "??"}]]},
|
||||
|
||||
{"description":"<z/0=''",
|
||||
"input":"<z/0=''",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
|
||||
|
||||
{"description":"<z/0='&",
|
||||
"input":"<z/0='&",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "&"}]]},
|
||||
|
||||
{"description":"<z/0='%",
|
||||
"input":"<z/0='%",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "%"}]]},
|
||||
|
||||
{"description":"<z/0=\"'",
|
||||
"input":"<z/0=\"'",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "'"}]]},
|
||||
|
||||
{"description":"<z/0=\"\"",
|
||||
"input":"<z/0=\"\"",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
|
||||
|
||||
{"description":"<z/0=\"&",
|
||||
"input":"<z/0=\"&",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "&"}]]},
|
||||
|
||||
{"description":"<z/0=&",
|
||||
"input":"<z/0=&",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "&"}]]},
|
||||
|
||||
{"description":"<z/0>",
|
||||
"input":"<z/0>",
|
||||
"output":["ParseError", ["StartTag", "z", {"0": ""}]]},
|
||||
|
||||
{"description":"<z/0 =",
|
||||
"input":"<z/0 =",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
|
||||
|
||||
{"description":"<z/0 >",
|
||||
"input":"<z/0 >",
|
||||
"output":["ParseError", ["StartTag", "z", {"0": ""}]]},
|
||||
|
||||
{"description":"<z/0 ",
|
||||
"input":"<z/0 ",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
|
||||
|
||||
{"description":"<z/0 /",
|
||||
"input":"<z/0 /",
|
||||
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
|
||||
|
||||
{"description":"<z/0/",
|
||||
"input":"<z/0/",
|
||||
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
|
||||
|
||||
{"description":"<z/00",
|
||||
"input":"<z/00",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "z", {"00": ""}]]},
|
||||
|
||||
{"description":"<z/0 0",
|
||||
"input":"<z/0 0",
|
||||
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
|
||||
|
||||
{"description":"<z/0='	",
|
||||
"input":"<z/0='	",
|
||||
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": "\t"}]]},
|
||||
|
||||
{"description":"<z/0=\"	",
|
||||
"input":"<z/0=\"	",
|
||||
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": "\t"}]]},
|
||||
|
||||
{"description":"<z/0=	",
|
||||
"input":"<z/0=	",
|
||||
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": "\t"}]]},
|
||||
|
||||
{"description":"<z/0z",
|
||||
"input":"<z/0z",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "z", {"0z": ""}]]},
|
||||
|
||||
{"description":"<z/0 z",
|
||||
"input":"<z/0 z",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "z": ""}]]},
|
||||
|
||||
{"description":"<zz",
|
||||
"input":"<zz",
|
||||
"output":["ParseError", ["StartTag", "zz", {}]]},
|
||||
|
||||
{"description":"<z/z",
|
||||
"input":"<z/z",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "z", {"z": ""}]]}
|
||||
|
||||
]}
|
198
vendor/plugins/HTML5lib/testdata/tokenizer/test4.test
vendored
Normal file
198
vendor/plugins/HTML5lib/testdata/tokenizer/test4.test
vendored
Normal file
|
@ -0,0 +1,198 @@
|
|||
{"tests": [
|
||||
|
||||
{"description":"< in attribute name",
|
||||
"input":"<z/0 <",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "<": ""}]]},
|
||||
|
||||
{"description":"< in attribute value",
|
||||
"input":"<z x=<",
|
||||
"output":["ParseError", ["StartTag", "z", {"x": "<"}]]},
|
||||
|
||||
{"description":"CR EOF after doctype name",
|
||||
"input":"<!doctype html \r",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
||||
|
||||
{"description":"CR EOF in tag name",
|
||||
"input":"<z\r",
|
||||
"output":["ParseError", ["StartTag", "z", {}]]},
|
||||
|
||||
{"description":"Zero hex numeric entity",
|
||||
"input":"�",
|
||||
"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Zero decimal numeric entity",
|
||||
"input":"�",
|
||||
"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Zero-prefixed hex numeric entity",
|
||||
"input":"A",
|
||||
"output":[["Character", "A"]]},
|
||||
|
||||
{"description":"Zero-prefixed decimal numeric entity",
|
||||
"input":"A",
|
||||
"output":[["Character", "A"]]},
|
||||
|
||||
{"description":"Empty hex numeric entities",
|
||||
"input":"&#x &#X ",
|
||||
"output":["ParseError", ["Character", "&#x "], "ParseError", ["Character", "&#X "]]},
|
||||
|
||||
{"description":"Empty decimal numeric entities",
|
||||
"input":"&# &#; ",
|
||||
"output":["ParseError", ["Character", "&# "], "ParseError", ["Character", "&#; "]]},
|
||||
|
||||
{"description":"Non-BMP numeric entity",
|
||||
"input":"𐀀",
|
||||
"output":[["Character", "\uD800\uDC00"]]},
|
||||
|
||||
{"description":"Maximum non-BMP numeric entity",
|
||||
"input":"",
|
||||
"output":[["Character", "\uDBFF\uDFFF"]]},
|
||||
|
||||
{"description":"Above maximum numeric entity",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"32-bit hex numeric entity",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"33-bit hex numeric entity",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"33-bit decimal numeric entity",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"65-bit hex numeric entity",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"65-bit decimal numeric entity",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Surrogate code point edge cases",
|
||||
"input":"퟿����",
|
||||
"output":[["Character", "\uD7FF"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD\uE000"]]},
|
||||
|
||||
{"description":"Uppercase start tag name",
|
||||
"input":"<X>",
|
||||
"output":[["StartTag", "x", {}]]},
|
||||
|
||||
{"description":"Uppercase end tag name",
|
||||
"input":"</X>",
|
||||
"output":[["EndTag", "x"]]},
|
||||
|
||||
{"description":"Uppercase attribute name",
|
||||
"input":"<x X>",
|
||||
"output":[["StartTag", "x", { "x":"" }]]},
|
||||
|
||||
{"description":"Tag/attribute name case edge values",
|
||||
"input":"<x@AZ[`az{ @AZ[`az{>",
|
||||
"output":[["StartTag", "x@az[`az{", { "@az[`az{":"" }]]},
|
||||
|
||||
{"description":"Duplicate different-case attributes",
|
||||
"input":"<x x=1 x=2 X=3>",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "x", { "x":"1" }]]},
|
||||
|
||||
{"description":"Uppercase close tag attributes",
|
||||
"input":"</x X>",
|
||||
"output":["ParseError", ["EndTag", "x"]]},
|
||||
|
||||
{"description":"Duplicate close tag attributes",
|
||||
"input":"</x x x>",
|
||||
"output":["ParseError", "ParseError", ["EndTag", "x"]]},
|
||||
|
||||
{"description":"Permitted slash",
|
||||
"input":"<br/>",
|
||||
"output":[["StartTag", "br", {}]]},
|
||||
|
||||
{"description":"Non-permitted slash",
|
||||
"input":"<xr/>",
|
||||
"output":["ParseError", ["StartTag", "xr", {}]]},
|
||||
|
||||
{"description":"Permitted slash but in close tag",
|
||||
"input":"</br/>",
|
||||
"output":["ParseError", ["EndTag", "br"]]},
|
||||
|
||||
{"description":"Doctype public case-sensitivity (1)",
|
||||
"input":"<!DoCtYpE HtMl PuBlIc \"AbC\" \"XyZ\">",
|
||||
"output":[["DOCTYPE", "HtMl", "AbC", "XyZ", true]]},
|
||||
|
||||
{"description":"Doctype public case-sensitivity (2)",
|
||||
"input":"<!dOcTyPe hTmL pUbLiC \"aBc\" \"xYz\">",
|
||||
"output":[["DOCTYPE", "hTmL", "aBc", "xYz", true]]},
|
||||
|
||||
{"description":"Doctype system case-sensitivity (1)",
|
||||
"input":"<!DoCtYpE HtMl SyStEm \"XyZ\">",
|
||||
"output":[["DOCTYPE", "HtMl", null, "XyZ", true]]},
|
||||
|
||||
{"description":"Doctype system case-sensitivity (2)",
|
||||
"input":"<!dOcTyPe hTmL sYsTeM \"xYz\">",
|
||||
"output":[["DOCTYPE", "hTmL", null, "xYz", true]]},
|
||||
|
||||
{"description":"U+0000 in lookahead region after non-matching character",
|
||||
"input":"<!doc>\u0000",
|
||||
"output":["ParseError", ["Comment", "doc"], "ParseError", ["Character", "\uFFFD"]],
|
||||
"ignoreErrorOrder":true},
|
||||
|
||||
{"description":"U+0000 in lookahead region",
|
||||
"input":"<!doc\u0000",
|
||||
"output":["ParseError", "ParseError", ["Comment", "doc\uFFFD"]],
|
||||
"ignoreErrorOrder":true},
|
||||
|
||||
{"description":"CR followed by U+0000",
|
||||
"input":"\r\u0000",
|
||||
"output":["ParseError", ["Character", "\n\uFFFD"]],
|
||||
"ignoreErrorOrder":true},
|
||||
|
||||
{"description":"CR followed by non-LF",
|
||||
"input":"\r?",
|
||||
"output":[["Character", "\n?"]]},
|
||||
|
||||
{"description":"CR at EOF",
|
||||
"input":"\r",
|
||||
"output":[["Character", "\n"]]},
|
||||
|
||||
{"description":"LF at EOF",
|
||||
"input":"\n",
|
||||
"output":[["Character", "\n"]]},
|
||||
|
||||
{"description":"CR LF",
|
||||
"input":"\r\n",
|
||||
"output":[["Character", "\n"]]},
|
||||
|
||||
{"description":"CR CR",
|
||||
"input":"\r\r",
|
||||
"output":[["Character", "\n\n"]]},
|
||||
|
||||
{"description":"LF LF",
|
||||
"input":"\n\n",
|
||||
"output":[["Character", "\n\n"]]},
|
||||
|
||||
{"description":"LF CR",
|
||||
"input":"\n\r",
|
||||
"output":[["Character", "\n\n"]]},
|
||||
|
||||
{"description":"text CR CR CR text",
|
||||
"input":"text\r\r\rtext",
|
||||
"output":[["Character", "text\n\n\ntext"]]},
|
||||
|
||||
{"description":"Doctype publik",
|
||||
"input":"<!DOCTYPE html PUBLIK \"AbC\" \"XyZ\">",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
||||
|
||||
{"description":"Doctype publi",
|
||||
"input":"<!DOCTYPE html PUBLI",
|
||||
"output":["ParseError", "ParseError", ["DOCTYPE", "html", null, null, false]]},
|
||||
|
||||
{"description":"Doctype sistem",
|
||||
"input":"<!DOCTYPE html SISTEM \"AbC\">",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
||||
|
||||
{"description":"Doctype sys",
|
||||
"input":"<!DOCTYPE html SYS",
|
||||
"output":["ParseError", "ParseError", ["DOCTYPE", "html", null, null, false]]}
|
||||
|
||||
]}
|
|
@ -113,7 +113,6 @@ Line1<br>Line2<br>Line3<br>Line4
|
|||
<html><head></body></html>
|
||||
#errors
|
||||
6: missing document type declaration
|
||||
19: unexpected body element end tag in head
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
|
@ -159,7 +158,6 @@ Line1<br>Line2<br>Line3<br>Line4
|
|||
</head>
|
||||
#errors
|
||||
7: missing document type declaration
|
||||
7: unexpected head element end tag
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
|
@ -169,7 +167,6 @@ Line1<br>Line2<br>Line3<br>Line4
|
|||
</body>
|
||||
#errors
|
||||
7: missing document type declaration
|
||||
7: unexpected body element end tag
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
|
@ -285,6 +282,7 @@ Line1<br>Line2<br>Line3<br>Line4
|
|||
| <div>
|
||||
| <b>
|
||||
| <marquee>
|
||||
| <p>
|
||||
| "X"
|
||||
|
||||
#data
|
||||
|
@ -330,6 +328,7 @@ Unexpected end of file
|
|||
| <body>
|
||||
| <p>
|
||||
| <hr>
|
||||
| <p>
|
||||
|
||||
#data
|
||||
<select><b><option><select><option></b></select>X
|
||||
|
@ -435,6 +434,7 @@ Unexpected end of file
|
|||
#data
|
||||
<!DOCTYPE HTML><li>hello<li>world<ul>how<li>do</ul>you</body><!--do-->
|
||||
#errors
|
||||
Unexpected end of file. Expected </li>. XXX
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
|
@ -636,7 +636,6 @@ Unexpected end of file
|
|||
#data
|
||||
<!DOCTYPE HTML><script> <!-- </script> --> </script> EOF
|
||||
#errors
|
||||
52: unexpected script element end tag
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
|
@ -730,6 +729,7 @@ Unexpected end of file
|
|||
#errors
|
||||
6: missing document type declaration
|
||||
29: mismatched font element end tag (misnested tags)
|
||||
AAA </font> tag strikes again
|
||||
35: mismatched body element end tag (premature end of file?)
|
||||
#document
|
||||
| <html>
|
||||
|
@ -1120,6 +1120,7 @@ Unexpected end of file
|
|||
15: missing document type declaration
|
||||
39: unexpected node in table context
|
||||
39: a element start tag implying a element end tag
|
||||
AAA violation: </a>
|
||||
39: unexpected node in table context
|
||||
39: mismatched a element end tag (misnested tags across <table> tag)
|
||||
43: unexpected node in table context
|
||||
|
@ -1175,6 +1176,8 @@ Unexpected end of file
|
|||
7: missing document type declaration
|
||||
22: unexpected node in table context
|
||||
27: unexpected node in table context
|
||||
XXX more table voodoo
|
||||
XXX more table voodoo
|
||||
54: unexpected td element end tag implied other end tags
|
||||
63: unexpected node in table context
|
||||
72: mismatched body element end tag (premature end of file?)
|
||||
|
@ -1299,11 +1302,9 @@ unexpected EOF
|
|||
#errors
|
||||
6: missing document type declaration
|
||||
12: unexpected body element start tag
|
||||
18: base element start tag out of place
|
||||
24: link element start tag out of place
|
||||
30: meta element start tag out of place
|
||||
37: title element start tag out of place
|
||||
54: unexpected body element start tag
|
||||
Missing end tag </p>. XXX
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
|
@ -1344,7 +1345,6 @@ unexpected EOF
|
|||
3: missing document type declaration
|
||||
13: unexpected node in table context
|
||||
13: a element start tag implying a element end tag
|
||||
13: unexpected node in table context
|
||||
13: mismatched a element end tag (misnested tags across <table> tag)
|
||||
21: mismatched table element end tag
|
||||
27: a element start tag implying a element end tag
|
||||
|
@ -1369,13 +1369,14 @@ unexpected EOF
|
|||
<head></p><meta><p>
|
||||
#errors
|
||||
6: missing document type declaration
|
||||
10: unexpected p element end tag in head
|
||||
10: unexpected p element end tag
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <meta>
|
||||
| <body>
|
||||
| <p>
|
||||
| <meta>
|
||||
| <p>
|
||||
|
||||
#data
|
||||
<head></html><meta><p>
|
||||
|
@ -1485,6 +1486,7 @@ unexpected EOF
|
|||
| <div>
|
||||
| <b>
|
||||
| <marquee>
|
||||
| <p>
|
||||
|
||||
#data
|
||||
<script></script></div><title></title><p><p>
|
||||
|
@ -1511,6 +1513,7 @@ unexpected EOF
|
|||
| <body>
|
||||
| <p>
|
||||
| <hr>
|
||||
| <p>
|
||||
|
||||
#data
|
||||
<select><b><option><select><option></b></select>
|
||||
|
@ -1571,6 +1574,8 @@ unexpected EOF
|
|||
<ul><li></li><div><li></div><li><li><div><li><address><li><b><em></b><li></ul>
|
||||
#errors
|
||||
4: missing document type declaration
|
||||
Missing end tag for <div> (nr2)
|
||||
Missing end tag for <address>
|
||||
69: mismatched b element end tag (misnested tags)
|
||||
#document
|
||||
| <html>
|
||||
|
@ -1615,7 +1620,6 @@ unexpected EOF
|
|||
56: unexpected frameset element start tag in body
|
||||
63: unexpected frame element start tag in body
|
||||
74: unexpected frameset element end tag
|
||||
87: unescaped '</' in CDATA or RCDATA block
|
||||
106: unexpected end of file while parsing CDATA section for element noframes
|
||||
#document
|
||||
| <html>
|
||||
|
@ -1630,6 +1634,7 @@ unexpected EOF
|
|||
4: missing document type declaration
|
||||
15: required tr element start tag implied by unexpected td element start tag
|
||||
27: unexpected td element end tag implied other end tags
|
||||
Unexpected </h1> tag. Expected other.
|
||||
Unexpected EOF
|
||||
#document
|
||||
| <html>
|
||||
|
@ -1737,9 +1742,9 @@ Unexpected EOF
|
|||
108: unexpected h4 element end tag
|
||||
113: unexpected h5 element end tag
|
||||
118: unexpected h6 element end tag
|
||||
125: unexpected body element end tag
|
||||
125: unexpected end tag token br in after body phase
|
||||
130: unexpected br element end tag
|
||||
134: unexpected a element end tag
|
||||
134: unexpected a element end tag (AAA)
|
||||
140: unexpected img element end tag
|
||||
148: unexpected title element end tag
|
||||
155: unexpected span element end tag
|
||||
|
@ -1807,6 +1812,7 @@ Unexpected EOF
|
|||
| <head>
|
||||
| <body>
|
||||
| <br>
|
||||
| <p>
|
||||
|
||||
#data
|
||||
<table><tr></strong></b></em></i></u></strike></s></blink></tt></pre></big></small></font></select></h1></h2></h3></h4></h5></h6></body></br></a></img></title></span></style></script></table></th></td></tr></frame></area></link></param></hr></input></col></base></meta></basefont></bgsound></embed></spacer></p></dd></dt></caption></colgroup></tbody></tfoot></thead></address></blockquote></center></dir></div></dl></fieldset></listing></menu></ol></ul></li></nobr></wbr></form></button></marquee></object></html></frameset></head></iframe></image></isindex></noembed></noframes></noscript></optgroup></option></plaintext></textarea>
|
||||
|
@ -1920,6 +1926,9 @@ Unexpected EOF
|
|||
610: unexpected option element end tag
|
||||
622: unexpected plaintext element end tag
|
||||
633: mismatched special end tag textarea
|
||||
XXX
|
||||
XXX
|
||||
XXX
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
|
@ -1928,3 +1937,14 @@ Unexpected EOF
|
|||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <p>
|
||||
|
||||
#data
|
||||
<frameset>
|
||||
#errors
|
||||
10: Start tag seen without seeing a doctype first.
|
||||
11: End of file seen and there were open elements.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
|
|
|
@ -12,7 +12,6 @@
|
|||
<textarea>test</div>test
|
||||
#errors
|
||||
10: missing document type declaration.
|
||||
17: unescaped '</' in CDATA or RCDATA block.
|
||||
25: unexpected end of file while parsing CDATA section for element textarea.
|
||||
#document
|
||||
| <html>
|
||||
|
@ -87,6 +86,8 @@ Expected end tag </frameset>
|
|||
#data
|
||||
<!DOCTYPE HTML><font><p><b>test</font>
|
||||
#errors
|
||||
AAA violation. </font>
|
||||
AAA violation. </font>
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
|
@ -101,6 +102,7 @@ Expected end tag </frameset>
|
|||
#data
|
||||
<!DOCTYPE HTML><dt><div><dd>
|
||||
#errors
|
||||
Missing end tag for <div>.
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
|
@ -114,7 +116,6 @@ Expected end tag </frameset>
|
|||
<script></x
|
||||
#errors
|
||||
no document type
|
||||
</ in script
|
||||
Unexpected end of file. Expected </script> end tag.
|
||||
#document
|
||||
| <html>
|
||||
|
@ -129,6 +130,7 @@ Unexpected end of file. Expected </script> end tag.
|
|||
no document type
|
||||
<plaintext> directly inside table
|
||||
Characters inside table.
|
||||
Characters inside table. (XXX?)
|
||||
Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
|
@ -175,10 +177,10 @@ Unexpected start tag "body"
|
|||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| t4="4"
|
||||
| t1="1"
|
||||
| t2="2"
|
||||
| t3="3"
|
||||
| t1="1"
|
||||
| t4="4"
|
||||
|
||||
#data
|
||||
</b test
|
||||
|
@ -195,7 +197,6 @@ Unexpected end tag.
|
|||
#data
|
||||
<!DOCTYPE HTML></b test<b &=&>X
|
||||
#errors
|
||||
Unexpected < in attribute
|
||||
End tag contains attributes.
|
||||
Unexpected end tag.
|
||||
Named entity didn't end with ;
|
||||
|
@ -224,7 +225,6 @@ Unexpected EOF in (end) tag name
|
|||
&
|
||||
#errors
|
||||
No doctype.
|
||||
Unfinished entity.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
|
@ -349,11 +349,11 @@ Unexpected end EOF. Missing closing tags.
|
|||
| <b>
|
||||
| <i>
|
||||
| <u>
|
||||
| " "
|
||||
| <p>
|
||||
| <b>
|
||||
| <i>
|
||||
| <u>
|
||||
| " "
|
||||
| <p>
|
||||
| "X"
|
||||
|
||||
#data
|
||||
|
@ -540,8 +540,8 @@ No doctype
|
|||
| <label>
|
||||
| "This is a searchable index. Insert your search keywords here: "
|
||||
| <input>
|
||||
| test="x"
|
||||
| name="isindex"
|
||||
| test="x"
|
||||
| <hr>
|
||||
|
||||
#data
|
||||
|
@ -571,19 +571,18 @@ Unexpected EOF.
|
|||
| <b>
|
||||
| <i>
|
||||
| <u>
|
||||
| "
|
||||
"
|
||||
| <p>
|
||||
| <b>
|
||||
| <i>
|
||||
| <u>
|
||||
| "
|
||||
"
|
||||
| <p>
|
||||
| "X"
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><body><title>test</body></title>
|
||||
#errors
|
||||
Unexpected start tag that belongs in the head.
|
||||
Expected closing tag after </.
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
|
@ -596,10 +595,7 @@ Expected closing tag after </.
|
|||
<!DOCTYPE HTML><body><title>X</title><meta name=z><link rel=foo><style>
|
||||
x { content:"</style" } </style>
|
||||
#errors
|
||||
Unexpected start tag that belongs in head.
|
||||
Unexpected start tag that belongs in head.
|
||||
Unexpected start tag that belongs in head.
|
||||
Expected closing tag after </.
|
||||
Unexpected start tag that belongs in head. <title>
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
|
@ -632,8 +628,6 @@ x { content:"</style" } "
|
|||
#errors
|
||||
No doctype.
|
||||
#document
|
||||
| "
|
||||
"
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
@ -643,7 +637,6 @@ No doctype.
|
|||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| " "
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
@ -749,8 +742,8 @@ Solidus (/) incorrectly placed.
|
|||
| <body>
|
||||
| "X"
|
||||
| <p>
|
||||
| y=""
|
||||
| x=""
|
||||
| y=""
|
||||
| z=""
|
||||
|
||||
#data
|
||||
|
@ -777,3 +770,4 @@ Unexpected </p> end tag.
|
|||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| <p>
|
||||
|
|
|
@ -61,7 +61,6 @@ No DOCTYPE
|
|||
|
||||
#data
|
||||
<!DOCTYPE htML><html><head></head><body><pre>
|
||||
|
||||
foo</pre></body></html>
|
||||
#errors
|
||||
#document
|
||||
|
@ -72,10 +71,22 @@ foo</pre></body></html>
|
|||
| <pre>
|
||||
| "foo"
|
||||
|
||||
|
||||
#data
|
||||
<!DOCTYPE htML><html><head></head><body><pre>
|
||||
|
||||
foo</pre></body></html>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <pre>
|
||||
| "
|
||||
foo"
|
||||
|
||||
#data
|
||||
<!DOCTYPE htML><html><head></head><body><pre>
|
||||
foo
|
||||
</pre></body></html>
|
||||
#errors
|
||||
|
@ -120,6 +131,7 @@ y"
|
|||
<!DOCTYPE htML><html><head></head><body><pre>x<div>
|
||||
y</pre></body></html>
|
||||
#errors
|
||||
End tag <pre> seen too early. Expected other end tag.
|
||||
#document
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
|
@ -129,11 +141,12 @@ y</pre></body></html>
|
|||
| "x"
|
||||
| <div>
|
||||
| "
|
||||
| y"
|
||||
y"
|
||||
|
||||
#data
|
||||
<!DOCTYPE htML><HTML><META><HEAD></HEAD></HTML>
|
||||
#errors
|
||||
Unexpected start tag HEAD in HEAD. Ignored.
|
||||
#document
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
|
@ -144,6 +157,7 @@ y</pre></body></html>
|
|||
#data
|
||||
<!DOCTYPE htML><HTML><HEAD><head></HEAD></HTML>
|
||||
#errors
|
||||
Unexpected start tag HEAD in HEAD. Ignored.
|
||||
#document
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
|
@ -153,6 +167,8 @@ y</pre></body></html>
|
|||
#data
|
||||
<textarea>foo<span>bar</span><i>baz
|
||||
#errors
|
||||
Unexpected start tag. Expected DOCTYPE.
|
||||
Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
|
@ -163,6 +179,8 @@ y</pre></body></html>
|
|||
#data
|
||||
<title>foo<span>bar</em><i>baz
|
||||
#errors
|
||||
Unexpected start tag. Expected DOCTYPE.
|
||||
Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
|
@ -183,7 +201,6 @@ y</pre></body></html>
|
|||
|
||||
#data
|
||||
<!DOCTYPE htML><textarea>
|
||||
|
||||
foo</textarea>
|
||||
#errors
|
||||
#document
|
||||
|
@ -194,6 +211,20 @@ foo</textarea>
|
|||
| <textarea>
|
||||
| "foo"
|
||||
|
||||
#data
|
||||
<!DOCTYPE htML><textarea>
|
||||
|
||||
foo</textarea>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <textarea>
|
||||
| "
|
||||
foo"
|
||||
|
||||
#data
|
||||
<!DOCTYPE htML><html><head></head><body><ul><li><div><p><li></ul></body></html>
|
||||
#errors
|
||||
|
@ -212,6 +243,8 @@ Missing end tag (div)
|
|||
#data
|
||||
<!doctype html><nobr><nobr><nobr>
|
||||
#errors
|
||||
Unexpected <nobr> tag.
|
||||
Unexpected <nobr> tag.
|
||||
Unexpected end of file.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
|
@ -225,6 +258,7 @@ Unexpected end of file.
|
|||
#data
|
||||
<!doctype html><nobr><nobr></nobr><nobr>
|
||||
#errors
|
||||
Unexpected <nobr> tag.
|
||||
Unexpected end of file.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
|
|
|
@ -1,37 +1,50 @@
|
|||
#data
|
||||
direct div content
|
||||
#errors
|
||||
#document-fragment div
|
||||
#document-fragment
|
||||
div
|
||||
#document
|
||||
| "direct div content"
|
||||
|
||||
#data
|
||||
direct textarea content
|
||||
#errors
|
||||
#document-fragment textarea
|
||||
#document-fragment
|
||||
textarea
|
||||
#document
|
||||
| "direct textarea content"
|
||||
|
||||
#data
|
||||
textarea content with <em>pseudo</em> <foo>markup
|
||||
#errors
|
||||
#document-fragment textarea
|
||||
#document-fragment
|
||||
textarea
|
||||
#document
|
||||
| "textarea content with <em>pseudo</em> <foo>markup"
|
||||
|
||||
#data
|
||||
this is CDATA inside a <style> element
|
||||
#errors
|
||||
#document-fragment style
|
||||
#document-fragment
|
||||
style
|
||||
#document
|
||||
| "this is CDATA inside a <style> element"
|
||||
|
||||
#data
|
||||
</plaintext>
|
||||
#errors
|
||||
#document-fragment plaintext
|
||||
#document-fragment
|
||||
plaintext
|
||||
#document
|
||||
| "</plaintext>"
|
||||
|
||||
#data
|
||||
setting html's innerHTML
|
||||
#errors
|
||||
#document-fragment html
|
||||
XXX innerHTML EOF
|
||||
#document-fragment
|
||||
html
|
||||
#document
|
||||
| <head>
|
||||
| <body>
|
||||
| "setting html's innerHTML"
|
||||
|
@ -39,6 +52,9 @@ setting html's innerHTML
|
|||
#data
|
||||
<title>setting head's innerHTML</title>
|
||||
#errors
|
||||
#document-fragment head
|
||||
Unexpected title element that belongs in head.
|
||||
#document-fragment
|
||||
head
|
||||
#document
|
||||
| <title>
|
||||
| "setting head's innerHTML"
|
||||
|
|
|
@ -110,7 +110,6 @@ No DOCTYPE
|
|||
<style> <!</-- </style>x
|
||||
#errors
|
||||
No DOCTYPE
|
||||
Unexpected end of file
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
|
@ -118,3 +117,59 @@ Unexpected end of file
|
|||
| " <!</-- "
|
||||
| <body>
|
||||
| "x"
|
||||
|
||||
#data
|
||||
<xmp> <!-- > --> </xmp>
|
||||
#errors
|
||||
No DOCTYPE
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <xmp>
|
||||
| " <!-- > --> "
|
||||
|
||||
#data
|
||||
<title>&</title>
|
||||
#errors
|
||||
No DOCTYPE
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <title>
|
||||
| "&"
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<title><!--&--></title>
|
||||
#errors
|
||||
No DOCTYPE
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <title>
|
||||
| "<!--&-->"
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<title><!--</title>
|
||||
#errors
|
||||
No DOCTYPE
|
||||
Unexpected EOF
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <title>
|
||||
| "<!--</title>"
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<noscript><!--</noscript>--></noscript>
|
||||
#errors
|
||||
No DOCTYPE
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <noscript>
|
||||
| "<!--</noscript>-->"
|
||||
| <body>
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#data
|
||||
<!doctype html></head> <head>
|
||||
#errors
|
||||
Unexpected start tag head. Ignored.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
|
@ -11,6 +12,9 @@
|
|||
#data
|
||||
<!doctype html></html> <head>
|
||||
#errors
|
||||
Unexpected start tag head.
|
||||
Unexpected start tag head in after body phase.
|
||||
Unexpected start tag head. Ignored.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
|
@ -21,9 +25,69 @@
|
|||
#data
|
||||
<!doctype html></body><meta>
|
||||
#errors
|
||||
Unexpected meta element in after body phase.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <meta>
|
||||
|
||||
#data
|
||||
<!doctype HTml><form><div></form><div>
|
||||
#errors
|
||||
Form end tag ignored.
|
||||
Unexpected end of file.
|
||||
#document
|
||||
| <!DOCTYPE HTml>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <form>
|
||||
| <div>
|
||||
| <div>
|
||||
|
||||
#data
|
||||
<!doctype HTml><title>&</title>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTml>
|
||||
| <html>
|
||||
| <head>
|
||||
| <title>
|
||||
| "&"
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!doctype HTml><title><!--&--></title>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTml>
|
||||
| <html>
|
||||
| <head>
|
||||
| <title>
|
||||
| "<!--&-->"
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!doctype>
|
||||
#errors
|
||||
No space after "doctype"
|
||||
Unexpected ">"
|
||||
Incorrect doctype
|
||||
#document
|
||||
| <!DOCTYPE >
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!---x
|
||||
#errors
|
||||
End of file in comment
|
||||
End of file before doctype
|
||||
#document
|
||||
| <!-- -x -->
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
|
1035
vendor/plugins/HTML5lib/testdata/validator/attributes.test
vendored
Executable file
1035
vendor/plugins/HTML5lib/testdata/validator/attributes.test
vendored
Executable file
File diff suppressed because it is too large
Load diff
159
vendor/plugins/HTML5lib/testdata/validator/classattribute.test
vendored
Executable file
159
vendor/plugins/HTML5lib/testdata/validator/classattribute.test
vendored
Executable file
|
@ -0,0 +1,159 @@
|
|||
{"tests": [
|
||||
|
||||
{"description": "valid single class attribute value",
|
||||
"input": "<span class=a>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid single class attribute value with leading space",
|
||||
"input": "<span class=' a'>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid single class attribute value with trailing space",
|
||||
"input": "<span class='a '>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid single class attribute value with leading and trailing space",
|
||||
"input": "<span class=' a '>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid single class attribute value with leading tab",
|
||||
"input": "<span class=' a'>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid single class attribute value with trailing tab",
|
||||
"input": "<span class='a '>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid single class attribute value with leading and trailing tab",
|
||||
"input": "<span class=' a '>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid single class attribute value with leading LF",
|
||||
"input": "<span class='
|
||||
a'>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid single class attribute value with trailing LF",
|
||||
"input": "<span class='a
|
||||
'>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid single class attribute value with leading and trailing LF",
|
||||
"input": "<span class='
|
||||
a
|
||||
'>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid single class attribute value with leading LT",
|
||||
"input": "<span class='a'>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid single class attribute value with trailing LT",
|
||||
"input": "<span class='a'>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid single class attribute value with leading and trailing LT",
|
||||
"input": "<span class='a'>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid single class attribute value with leading FF",
|
||||
"input": "<span class='a'>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid single class attribute value with trailing FF",
|
||||
"input": "<span class='a'>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid single class attribute value with leading and trailing FF",
|
||||
"input": "<span class='a'>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid single class attribute value with leading CR",
|
||||
"input": "<span class='
a'>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid single class attribute value with trailing CR",
|
||||
"input": "<span class='a
'>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid single class attribute value with leading and trailing CR",
|
||||
"input": "<span class='
a
'>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid double class attribute value separated by space",
|
||||
"input": "<span class='a b'>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid double class attribute value separated by tab",
|
||||
"input": "<span class='a b'>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid double class attribute value separated by LF",
|
||||
"input": "<span class='a
|
||||
b'>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid double class attribute value separated by LT",
|
||||
"input": "<span class='ab'>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid double class attribute value separated by FF",
|
||||
"input": "<span class='ab'>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid double class attribute value separated by CR",
|
||||
"input": "<span class='a
b'>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "invalid duplicated class attribute value separated by space",
|
||||
"input": "<span class='a a'>",
|
||||
"fail-unless": "invalid-attribute-value"},
|
||||
|
||||
{"description": "invalid duplicated class attribute value separated by tab",
|
||||
"input": "<span class='a a'>",
|
||||
"fail-unless": "invalid-attribute-value"},
|
||||
|
||||
{"description": "invalid duplicated class attribute value separated by LF",
|
||||
"input": "<span class='a
|
||||
a'>",
|
||||
"fail-unless": "invalid-attribute-value"},
|
||||
|
||||
{"description": "invalid duplicated class attribute value separated by LT",
|
||||
"input": "<span class='aa'>",
|
||||
"fail-unless": "invalid-attribute-value"},
|
||||
|
||||
{"description": "invalid duplicated class attribute value separated by FF",
|
||||
"input": "<span class='aa'>",
|
||||
"fail-unless": "invalid-attribute-value"},
|
||||
|
||||
{"description": "invalid duplicated class attribute value separated by CR",
|
||||
"input": "<span class='a
a'>",
|
||||
"fail-unless": "duplicate-value-in-token-list"},
|
||||
|
||||
{"description": "invalid duplicated class attribute value separated by space",
|
||||
"input": "<span class='a a'>",
|
||||
"fail-unless": "duplicate-value-in-token-list"},
|
||||
|
||||
{"description": "invalid duplicated class attribute value separated by tab",
|
||||
"input": "<span class='a a'>",
|
||||
"fail-unless": "duplicate-value-in-token-list"},
|
||||
|
||||
{"description": "invalid duplicated class attribute value separated by LF",
|
||||
"input": "<span class='a
|
||||
a'>",
|
||||
"fail-unless": "duplicate-value-in-token-list"},
|
||||
|
||||
{"description": "invalid duplicated class attribute value separated by LT",
|
||||
"input": "<span class='aa'>",
|
||||
"fail-unless": "duplicate-value-in-token-list"},
|
||||
|
||||
{"description": "invalid duplicated class attribute value separated by FF",
|
||||
"input": "<span class='aa'>",
|
||||
"fail-unless": "duplicate-value-in-token-list"},
|
||||
|
||||
{"description": "invalid duplicated class attribute value separated by CR",
|
||||
"input": "<span class='a
a'>",
|
||||
"fail-unless": "duplicate-value-in-token-list"}
|
||||
|
||||
]}
|
||||
|
59
vendor/plugins/HTML5lib/testdata/validator/contenteditableattribute.test
vendored
Executable file
59
vendor/plugins/HTML5lib/testdata/validator/contenteditableattribute.test
vendored
Executable file
|
@ -0,0 +1,59 @@
|
|||
{"tests": [
|
||||
|
||||
{"description": "valid contenteditable attribute value 'true'",
|
||||
"input": "<span contenteditable=true>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid contenteditable attribute value 'TRUE'",
|
||||
"input": "<span contenteditable=TRUE>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid contenteditable attribute value 'TrUe'",
|
||||
"input": "<span contenteditable=TrUe>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid contenteditable attribute value 'false'",
|
||||
"input": "<span contenteditable=false>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid contenteditable attribute value 'FALSE'",
|
||||
"input": "<span contenteditable=FALSE>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid contenteditable attribute value 'FalSe'",
|
||||
"input": "<span contenteditable=FalSe>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid contenteditable attribute value ''",
|
||||
"input": "<span contenteditable=''>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid contenteditable attribute value (not specified)",
|
||||
"input": "<span contenteditable>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "invalid contenteditable attribute value 'foo'",
|
||||
"input": "<span contenteditable=foo>",
|
||||
"fail-unless": "invalid-attribute-value"},
|
||||
|
||||
{"description": "invalid contenteditable attribute value '0'",
|
||||
"input": "<span contenteditable=0>",
|
||||
"fail-unless": "invalid-attribute-value"},
|
||||
|
||||
{"description": "invalid contenteditable attribute value '1'",
|
||||
"input": "<span contenteditable=1>",
|
||||
"fail-unless": "invalid-attribute-value"},
|
||||
|
||||
{"description": "invalid contenteditable attribute value 'yes'",
|
||||
"input": "<span contenteditable=yes>",
|
||||
"fail-unless": "invalid-attribute-value"},
|
||||
|
||||
{"description": "invalid contenteditable attribute value 'no'",
|
||||
"input": "<span contenteditable=no>",
|
||||
"fail-unless": "invalid-attribute-value"},
|
||||
|
||||
{"description": "invalid contenteditable attribute value 'inherit'",
|
||||
"input": "<span contenteditable=inherit>",
|
||||
"fail-unless": "invalid-attribute-value"}
|
||||
|
||||
]}
|
118
vendor/plugins/HTML5lib/testdata/validator/contextmenuattribute.test
vendored
Executable file
118
vendor/plugins/HTML5lib/testdata/validator/contextmenuattribute.test
vendored
Executable file
|
@ -0,0 +1,118 @@
|
|||
{"tests": [
|
||||
|
||||
{"description": "contextmenu points to valid ID earlier",
|
||||
"input": "<menu id=a><span contextmenu=a>",
|
||||
"fail-if": "id-does-not-exist"},
|
||||
|
||||
{"description": "contextmenu points to valid ID later",
|
||||
"input": "<span contextmenu=a><menu id=a>",
|
||||
"fail-if": "id-does-not-exist"},
|
||||
|
||||
{"description": "contextmenu points to non-existent ID",
|
||||
"input": "<span contextmenu=a>",
|
||||
"fail-unless": "id-does-not-exist"},
|
||||
|
||||
{"description": "contextmenu points to ID on non-menu element",
|
||||
"input": "<span id=a><span contextmenu=a>",
|
||||
"fail-unless": "contextmenu-must-point-to-menu"},
|
||||
|
||||
{"description": "uppercase contextmenu points to ID on non-menu element",
|
||||
"input": "<span id=a><span CONTEXTMENU=a>",
|
||||
"fail-unless": "contextmenu-must-point-to-menu"},
|
||||
|
||||
{"description": "valid ID 'a'",
|
||||
"input": "<span contextmenu=a>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid ID '1'",
|
||||
"input": "<span contextmenu=1>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "wacky but valid ID",
|
||||
"input": "<span contextmenu='<html><head><title>a</title></head><body><p>b</p></body></html>'>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "invalid blank ID",
|
||||
"input": "<span id>",
|
||||
"fail-unless": "attribute-value-can-not-be-blank"},
|
||||
|
||||
{"description": "invalid blank ID with quotes",
|
||||
"input": "<span contextmenu=''>",
|
||||
"fail-unless": "attribute-value-can-not-be-blank"},
|
||||
|
||||
{"description": "invalid ID because of leading space",
|
||||
"input": "<span contextmenu=' a'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of trailing space",
|
||||
"input": "<span contextmenu='a '>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of space in value",
|
||||
"input": "<span contextmenu='a b'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of leading tab",
|
||||
"input": "<span contextmenu=' a'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of trailing tab",
|
||||
"input": "<span contextmenu='a '>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of tab in value",
|
||||
"input": "<span contextmenu='a b'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of leading LF",
|
||||
"input": "<span contextmenu='
|
||||
a'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of trailing LF",
|
||||
"input": "<span contextmenu='a
|
||||
'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of LF in value",
|
||||
"input": "<span contextmenu='a
|
||||
b'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of leading LT",
|
||||
"input": "<span contextmenu='a'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of trailing LT",
|
||||
"input": "<span contextmenu='a'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of LT in value",
|
||||
"input": "<span contextmenu='ab'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of leading FF",
|
||||
"input": "<span contextmenu='a'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of trailing FF",
|
||||
"input": "<span contextmenu='a'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of FF in value",
|
||||
"input": "<span contextmenu='ab'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of leading CR",
|
||||
"input": "<span contextmenu='
a'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of trailing CR",
|
||||
"input": "<span contextmenu='a
'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of CR in value",
|
||||
"input": "<span contextmenu='a
b'>",
|
||||
"fail-unless": "space-in-id"}
|
||||
|
||||
]}
|
118
vendor/plugins/HTML5lib/testdata/validator/idattribute.test
vendored
Executable file
118
vendor/plugins/HTML5lib/testdata/validator/idattribute.test
vendored
Executable file
|
@ -0,0 +1,118 @@
|
|||
{"tests": [
|
||||
|
||||
{"description": "valid ID 'a'",
|
||||
"input": "<span id=a>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "valid ID '1'",
|
||||
"input": "<span id=1>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "wacky but valid ID",
|
||||
"input": "<span id='<html><head><title>a</title></head><body><p>b</p></body></html>'>",
|
||||
"fail-if": "invalid-attribute-value"},
|
||||
|
||||
{"description": "invalid blank ID",
|
||||
"input": "<span id>",
|
||||
"fail-unless": "attribute-value-can-not-be-blank"},
|
||||
|
||||
{"description": "invalid blank ID with quotes",
|
||||
"input": "<span id=''>",
|
||||
"fail-unless": "attribute-value-can-not-be-blank"},
|
||||
|
||||
{"description": "invalid ID because of leading space",
|
||||
"input": "<span id=' a'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of trailing space",
|
||||
"input": "<span id='a '>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of space in value",
|
||||
"input": "<span id='a b'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of leading tab",
|
||||
"input": "<span id=' a'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of trailing tab",
|
||||
"input": "<span id='a '>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of tab in value",
|
||||
"input": "<span id='a b'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of leading LF",
|
||||
"input": "<span id='
|
||||
a'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of trailing LF",
|
||||
"input": "<span id='a
|
||||
'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of LF in value",
|
||||
"input": "<span id='a
|
||||
b'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of leading LT",
|
||||
"input": "<span id='a'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of trailing LT",
|
||||
"input": "<span id='a'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of LT in value",
|
||||
"input": "<span id='ab'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of leading FF",
|
||||
"input": "<span id='a'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of trailing FF",
|
||||
"input": "<span id='a'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of FF in value",
|
||||
"input": "<span id='ab'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of leading CR",
|
||||
"input": "<span id='
a'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of trailing CR",
|
||||
"input": "<span id='a
'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "invalid ID because of CR in value",
|
||||
"input": "<span id='a
b'>",
|
||||
"fail-unless": "space-in-id"},
|
||||
|
||||
{"description": "duplicate ID values",
|
||||
"input": "<span id=a><span id=a>",
|
||||
"fail-unless": "duplicate-id"},
|
||||
|
||||
{"description": "duplicate ID values with spaces (weird but true)",
|
||||
"input": "<span id='a '><span id='a '>",
|
||||
"fail-unless": "duplicate-id"},
|
||||
|
||||
{"description": "not duplicate ID values because spaces don't match",
|
||||
"input": "<span id=a><span id='a '>",
|
||||
"fail-if": "duplicate-id"},
|
||||
|
||||
{"description": "not duplicate ID values because spaces don't match",
|
||||
"input": "<span id=' a'><span id='a '>",
|
||||
"fail-if": "duplicate-id"},
|
||||
|
||||
{"description": "not duplicate ID values because case doesn't match",
|
||||
"input": "<span id=a><span id=A>",
|
||||
"fail-if": "duplicate-id"}
|
||||
|
||||
]}
|
2795
vendor/plugins/HTML5lib/testdata/validator/inputattributes.test
vendored
Executable file
2795
vendor/plugins/HTML5lib/testdata/validator/inputattributes.test
vendored
Executable file
File diff suppressed because it is too large
Load diff
375
vendor/plugins/HTML5lib/testdata/validator/starttags.test
vendored
Executable file
375
vendor/plugins/HTML5lib/testdata/validator/starttags.test
vendored
Executable file
|
@ -0,0 +1,375 @@
|
|||
{"tests": [
|
||||
|
||||
{"description": "unknown start tag <foo>",
|
||||
"input": "<foo>",
|
||||
"fail-unless": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <code>",
|
||||
"input": "<code>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <kbd>",
|
||||
"input": "<kbd>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <aside>",
|
||||
"input": "<aside>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <datagrid>",
|
||||
"input": "<datagrid>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <font>",
|
||||
"input": "<font>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <noscript>",
|
||||
"input": "<noscript>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <style>",
|
||||
"input": "<style>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <img>",
|
||||
"input": "<img>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <title>",
|
||||
"input": "<title>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <menu>",
|
||||
"input": "<menu>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <tr>",
|
||||
"input": "<tr>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <param>",
|
||||
"input": "<param>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <li>",
|
||||
"input": "<li>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <source>",
|
||||
"input": "<source>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <tfoot>",
|
||||
"input": "<tfoot>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <th>",
|
||||
"input": "<th>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <td>",
|
||||
"input": "<td>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <dl>",
|
||||
"input": "<dl>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <blockquote>",
|
||||
"input": "<blockquote>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <dd>",
|
||||
"input": "<dd>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <abbr>",
|
||||
"input": "<abbr>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <dt>",
|
||||
"input": "<dt>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <noembed>",
|
||||
"input": "<noembed>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <p>",
|
||||
"input": "<p>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <small>",
|
||||
"input": "<small>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <meter>",
|
||||
"input": "<meter>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <em>",
|
||||
"input": "<em>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <meta>",
|
||||
"input": "<meta>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <video>",
|
||||
"input": "<video>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <div>",
|
||||
"input": "<div>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <canvas>",
|
||||
"input": "<canvas>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <sub>",
|
||||
"input": "<sub>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <section>",
|
||||
"input": "<section>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <sup>",
|
||||
"input": "<sup>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <progress>",
|
||||
"input": "<progress>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <body>",
|
||||
"input": "<body>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <base>",
|
||||
"input": "<base>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <br>",
|
||||
"input": "<br>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <address>",
|
||||
"input": "<address>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <article>",
|
||||
"input": "<article>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <strong>",
|
||||
"input": "<strong>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <legend>",
|
||||
"input": "<legend>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <event-source>",
|
||||
"input": "<event-source>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <ol>",
|
||||
"input": "<ol>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <script>",
|
||||
"input": "<script>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <caption>",
|
||||
"input": "<caption>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <dialog>",
|
||||
"input": "<dialog>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <col>",
|
||||
"input": "<col>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <h2>",
|
||||
"input": "<h2>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <h3>",
|
||||
"input": "<h3>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <h1>",
|
||||
"input": "<h1>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <h6>",
|
||||
"input": "<h6>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <h4>",
|
||||
"input": "<h4>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <h5>",
|
||||
"input": "<h5>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <header>",
|
||||
"input": "<header>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <table>",
|
||||
"input": "<table>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <span>",
|
||||
"input": "<span>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <area>",
|
||||
"input": "<area>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <dfn>",
|
||||
"input": "<dfn>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <var>",
|
||||
"input": "<var>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <cite>",
|
||||
"input": "<cite>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <thead>",
|
||||
"input": "<thead>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <head>",
|
||||
"input": "<head>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <hr>",
|
||||
"input": "<hr>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <link>",
|
||||
"input": "<link>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <datatemplate>",
|
||||
"input": "<datatemplate>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <b>",
|
||||
"input": "<b>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <colgroup>",
|
||||
"input": "<colgroup>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <ul>",
|
||||
"input": "<ul>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <del>",
|
||||
"input": "<del>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <iframe>",
|
||||
"input": "<iframe>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <pre>",
|
||||
"input": "<pre>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <figure>",
|
||||
"input": "<figure>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <ins>",
|
||||
"input": "<ins>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <tbody>",
|
||||
"input": "<tbody>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <html>",
|
||||
"input": "<html>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <nav>",
|
||||
"input": "<nav>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <details>",
|
||||
"input": "<details>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <samp>",
|
||||
"input": "<samp>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <map>",
|
||||
"input": "<map>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <nest>",
|
||||
"input": "<nest>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <object>",
|
||||
"input": "<object>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <a>",
|
||||
"input": "<a>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <footer>",
|
||||
"input": "<footer>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <i>",
|
||||
"input": "<i>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <m>",
|
||||
"input": "<m>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <rule>",
|
||||
"input": "<rule>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <q>",
|
||||
"input": "<q>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <command>",
|
||||
"input": "<command>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <time>",
|
||||
"input": "<time>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <audio>",
|
||||
"input": "<audio>",
|
||||
"fail-if": "unknown-start-tag"},
|
||||
|
||||
{"description": "allowed start tag <bdo>",
|
||||
"input": "<bdo>",
|
||||
"fail-if": "unknown-start-tag"}
|
||||
|
||||
]}
|
89
vendor/plugins/HTML5lib/tests/preamble.rb
vendored
89
vendor/plugins/HTML5lib/tests/preamble.rb
vendored
|
@ -1,9 +1,9 @@
|
|||
require 'test/unit'
|
||||
|
||||
HTML5LIB_BASE = File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__))))
|
||||
HTML5_BASE = File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__))))
|
||||
|
||||
if File.exists?(File.join(HTML5LIB_BASE, 'testdata'))
|
||||
TESTDATA_DIR = File.join(HTML5LIB_BASE, 'testdata')
|
||||
if File.exists?(File.join(HTML5_BASE, 'testdata'))
|
||||
TESTDATA_DIR = File.join(HTML5_BASE, 'testdata')
|
||||
else
|
||||
TESTDATA_DIR = File.join(File.dirname(File.dirname(File.expand_path(__FILE__))), 'testdata')
|
||||
end
|
||||
|
@ -12,60 +12,15 @@ $:.unshift File.join(File.dirname(File.dirname(__FILE__)),'lib')
|
|||
|
||||
$:.unshift File.dirname(__FILE__)
|
||||
|
||||
def html5lib_test_files(subdirectory)
|
||||
def html5_test_files(subdirectory)
|
||||
Dir[File.join(TESTDATA_DIR, subdirectory, '*.*')]
|
||||
end
|
||||
|
||||
begin
|
||||
require 'rubygems'
|
||||
require 'json'
|
||||
rescue LoadError
|
||||
class JSON
|
||||
def self.parse json
|
||||
json.gsub!(/"\s*:/, '"=>')
|
||||
json.gsub!(/\\u[0-9a-fA-F]{4}/) {|x| [x[2..-1].to_i(16)].pack('U')}
|
||||
null = nil
|
||||
eval json
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
module HTML5lib
|
||||
module HTML5
|
||||
module TestSupport
|
||||
def self.startswith?(a, b)
|
||||
b[0... a.length] == a
|
||||
end
|
||||
|
||||
def self.parseTestcase(data)
|
||||
innerHTML = nil
|
||||
input = []
|
||||
output = []
|
||||
errors = []
|
||||
currentList = input
|
||||
data.split(/\n/).each do |line|
|
||||
if !line.empty? and !startswith?("#errors", line) and
|
||||
!startswith?("#document", line) and
|
||||
!startswith?("#data", line) and
|
||||
!startswith?("#document-fragment", line)
|
||||
|
||||
if currentList == output and startswith?("|", line)
|
||||
currentList.push(line[2..-1])
|
||||
else
|
||||
currentList.push(line)
|
||||
end
|
||||
elsif line == "#errors"
|
||||
currentList = errors
|
||||
elsif line == "#document" or startswith?("#document-fragment", line)
|
||||
if startswith?("#document-fragment", line)
|
||||
innerHTML = line[19..-1]
|
||||
raise AssertionError unless innerHTML
|
||||
end
|
||||
currentList = output
|
||||
end
|
||||
end
|
||||
return innerHTML, input.join("\n"), output.join("\n"), errors
|
||||
end
|
||||
|
||||
# convert the output of str(document) to the format used in the testcases
|
||||
def convertTreeDump(treedump)
|
||||
treedump.split(/\n/)[1..-1].map { |line| (line.length > 2 and line[0] == ?|) ? line[3..-1] : line }.join("\n")
|
||||
|
@ -77,5 +32,39 @@ module HTML5lib
|
|||
end
|
||||
end
|
||||
|
||||
class TestData
|
||||
include Enumerable
|
||||
|
||||
def initialize(filename, sections)
|
||||
@f = open(filename)
|
||||
@sections = sections
|
||||
end
|
||||
|
||||
def each
|
||||
data = {}
|
||||
key=nil
|
||||
@f.each_line do |line|
|
||||
if line[0] == ?# and @sections.include?(line[1..-2])
|
||||
heading = line[1..-2]
|
||||
if data.any? and heading == @sections[0]
|
||||
data[key].chomp! #Remove trailing newline
|
||||
yield normaliseOutput(data)
|
||||
data = {}
|
||||
end
|
||||
key = heading
|
||||
data[key]=""
|
||||
elsif key
|
||||
data[key] += line
|
||||
end
|
||||
end
|
||||
yield normaliseOutput(data) if data
|
||||
end
|
||||
|
||||
def normaliseOutput(data)
|
||||
#Remove trailing newlines
|
||||
data.keys.each { |key| data[key].chomp! }
|
||||
@sections.map {|heading| data[heading]}
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
16
vendor/plugins/HTML5lib/tests/test_encoding.rb
vendored
16
vendor/plugins/HTML5lib/tests/test_encoding.rb
vendored
|
@ -1,8 +1,10 @@
|
|||
require File.join(File.dirname(__FILE__), 'preamble')
|
||||
|
||||
require 'html5lib/inputstream'
|
||||
require 'html5/inputstream'
|
||||
|
||||
class Html5EncodingTestCase < Test::Unit::TestCase
|
||||
include HTML5
|
||||
include TestSupport
|
||||
|
||||
begin
|
||||
require 'rubygems'
|
||||
|
@ -10,23 +12,21 @@ class Html5EncodingTestCase < Test::Unit::TestCase
|
|||
|
||||
def test_chardet
|
||||
file = File.open(File.join(TESTDATA_DIR, 'encoding', 'chardet', 'test_big5.txt'), 'r')
|
||||
stream = HTML5lib::HTMLInputStream.new(file, :chardet => true)
|
||||
stream = HTML5::HTMLInputStream.new(file, :chardet => true)
|
||||
assert_equal 'big5', stream.char_encoding.downcase
|
||||
rescue LoadError
|
||||
puts "chardet not found, skipping chardet tests"
|
||||
end
|
||||
end
|
||||
|
||||
html5lib_test_files('encoding').each do |test_file|
|
||||
html5_test_files('encoding').each do |test_file|
|
||||
test_name = File.basename(test_file).sub('.dat', '').tr('-', '')
|
||||
|
||||
File.read(test_file).split("#data\n").each_with_index do |data, index|
|
||||
next if data.empty?
|
||||
input, encoding = data.split(/\n#encoding\s+/, 2)
|
||||
encoding = encoding.split[0]
|
||||
TestData.new(test_file, %w(data encoding)).
|
||||
each_with_index do |(input, encoding), index|
|
||||
|
||||
define_method 'test_%s_%d' % [ test_name, index + 1 ] do
|
||||
stream = HTML5lib::HTMLInputStream.new(input, :chardet => false)
|
||||
stream = HTML5::HTMLInputStream.new(input, :chardet => false)
|
||||
assert_equal encoding.downcase, stream.char_encoding.downcase, input
|
||||
end
|
||||
end
|
||||
|
|
93
vendor/plugins/HTML5lib/tests/test_lxp.rb
vendored
93
vendor/plugins/HTML5lib/tests/test_lxp.rb
vendored
|
@ -1,23 +1,23 @@
|
|||
require File.join(File.dirname(__FILE__), 'preamble')
|
||||
|
||||
require 'html5lib/liberalxmlparser'
|
||||
require 'html5/liberalxmlparser'
|
||||
|
||||
XMLELEM = /<(\w+\s*)((?:[-:\w]+="[^"]*"\s*)+)(\/?)>/
|
||||
SORTATTRS = '<#{$1+$2.split.sort.join(' ')+$3}>'
|
||||
|
||||
def assert_xml_equal(input, expected=nil, parser=HTML5lib::XMLParser)
|
||||
document = parser.parse(input.chomp).root
|
||||
def assert_xml_equal(input, expected=nil, parser=HTML5::XMLParser)
|
||||
sortattrs = proc {"<#{$1+$2.split.sort.join(' ')+$3}>"}
|
||||
document = parser.parse(input.chomp, :lowercase_attr_name => false, :lowercase_element_name => false).root
|
||||
if not expected
|
||||
expected = input.chomp.gsub(XMLELEM,SORTATTRS)
|
||||
expected = input.chomp.gsub(XMLELEM,&sortattrs)
|
||||
expected = expected.gsub(/&#(\d+);/) {[$1.to_i].pack('U')}
|
||||
output = document.to_s.gsub(/'/,'"').gsub(XMLELEM,SORTATTRS)
|
||||
output = document.to_s.gsub(/'/,'"').gsub(XMLELEM,&sortattrs)
|
||||
assert_equal(expected, output)
|
||||
else
|
||||
assert_equal(expected, document.to_s.gsub(/'/,'"'))
|
||||
end
|
||||
end
|
||||
|
||||
def assert_xhtml_equal(input, expected=nil, parser=HTML5lib::XHTMLParser)
|
||||
def assert_xhtml_equal(input, expected=nil, parser=HTML5::XHTMLParser)
|
||||
assert_xml_equal(input, expected, parser)
|
||||
end
|
||||
|
||||
|
@ -34,10 +34,10 @@ class BasicXhtml5Test < Test::Unit::TestCase
|
|||
|
||||
def test_title_body_named_charref
|
||||
assert_xhtml_equal(
|
||||
'<title>mdash</title>A &mdash B',
|
||||
'<title>ntilde</title>A ñ B',
|
||||
'<html xmlns="http://www.w3.org/1999/xhtml">' +
|
||||
'<head><title>mdash</title></head>' +
|
||||
'<body>A '+ [0x2014].pack('U') + ' B</body>' +
|
||||
'<head><title>ntilde</title></head>' +
|
||||
'<body>A '+ [0xF1].pack('U') + ' B</body>' +
|
||||
'</html>')
|
||||
end
|
||||
end
|
||||
|
@ -193,20 +193,87 @@ EOX
|
|||
def test_br
|
||||
assert_xhtml_equal <<EOX1
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head><title>XLINK</title></head>
|
||||
<head><title>BR</title></head>
|
||||
<body>
|
||||
<br/>
|
||||
</body></html>
|
||||
EOX1
|
||||
end
|
||||
|
||||
def xtest_strong
|
||||
def test_strong
|
||||
assert_xhtml_equal <<EOX
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head><title>XLINK</title></head>
|
||||
<head><title>STRONG</title></head>
|
||||
<body>
|
||||
<strong></strong>
|
||||
</body></html>
|
||||
EOX
|
||||
end
|
||||
|
||||
def test_script
|
||||
assert_xhtml_equal <<EOX
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head><title>SCRIPT</title></head>
|
||||
<body>
|
||||
<script>1 < 2 & 3</script>
|
||||
</body></html>
|
||||
EOX
|
||||
end
|
||||
|
||||
def test_script_src
|
||||
assert_xhtml_equal <<EOX1, <<EOX2.strip
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head><title>SCRIPT</title><script src="http://example.com"/></head>
|
||||
<body>
|
||||
<script>1 < 2 & 3</script>
|
||||
</body></html>
|
||||
EOX1
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head><title>SCRIPT</title><script src="http://example.com"></script></head>
|
||||
<body>
|
||||
<script>1 < 2 & 3</script>
|
||||
</body></html>
|
||||
EOX2
|
||||
end
|
||||
|
||||
def test_title
|
||||
assert_xhtml_equal <<EOX
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head><title>1 < 2 & 3</title></head>
|
||||
<body>
|
||||
</body></html>
|
||||
EOX
|
||||
end
|
||||
|
||||
def test_prolog
|
||||
assert_xhtml_equal <<EOX1, <<EOX2.strip
|
||||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head><title>PROLOG</title></head>
|
||||
<body>
|
||||
</body></html>
|
||||
EOX1
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head><title>PROLOG</title></head>
|
||||
<body>
|
||||
</body></html>
|
||||
EOX2
|
||||
end
|
||||
|
||||
def test_tagsoup
|
||||
assert_xhtml_equal <<EOX1, <<EOX2.strip
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head><title>TAGSOUP</title></head>
|
||||
<body>
|
||||
<u><blockquote><p></u>
|
||||
</body></html>
|
||||
EOX1
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head><title>TAGSOUP</title></head>
|
||||
<body>
|
||||
<u/><blockquote><u/><p><u/>
|
||||
</p></blockquote></body></html>
|
||||
EOX2
|
||||
end
|
||||
|
||||
end
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue