diff --git a/app/controllers/application.rb b/app/controllers/application.rb
index 1f085d20..c328ef58 100644
--- a/app/controllers/application.rb
+++ b/app/controllers/application.rb
@@ -152,8 +152,7 @@ class ApplicationController < ActionController::Base
elsif %w(tex).include?(action_name)
response.headers['Content-Type'] = 'text/plain; charset=UTF-8'
elsif request.env['HTTP_USER_AGENT'] =~ /Validator/ or request.env.include?('HTTP_ACCEPT') &&
- Mime::Type.parse(request.env["HTTP_ACCEPT"]).include?(Mime::XHTML) &&
- !(request.env['HTTP_USER_AGENT'] =~ /Safari/ and %w(s5).include?(action_name))
+ Mime::Type.parse(request.env["HTTP_ACCEPT"]).include?(Mime::XHTML)
response.headers['Content-Type'] = 'application/xhtml+xml; charset=UTF-8'
elsif request.env['HTTP_USER_AGENT'] =~ /MathPlayer/
response.headers['Content-Type'] = 'application/xhtml+xml'
diff --git a/app/views/wiki/atom.rxml b/app/views/wiki/atom.rxml
index e356d9ae..c1273c7f 100644
--- a/app/views/wiki/atom.rxml
+++ b/app/views/wiki/atom.rxml
@@ -18,7 +18,7 @@ xml.feed('xmlns' => "http://www.w3.org/2005/Atom", "xml:lang" => 'en') do
xml.name(page.author)
end
if @hide_description
- xml.summary('Content suppressed.', 'type' => 'text')
+ xml.summary("Updated by #{page.author} on #{page.updated_at.getgm.strftime("%Y-%m-%d")} at #{page.updated_at.getgm.strftime("%H:%M:%SZ")}.", 'type' => 'text')
else
xml.content('type' => 'xhtml', 'xml:base' => url_for(:only_path => false, :web => @web_name, :action => @link_action, :id => page.name) ) do
xml.div('xmlns' => 'http://www.w3.org/1999/xhtml' ) do
diff --git a/app/views/wiki/tex.rhtml b/app/views/wiki/tex.rhtml
index 2f2e5e52..9edf7a7b 100644
--- a/app/views/wiki/tex.rhtml
+++ b/app/views/wiki/tex.rhtml
@@ -11,6 +11,16 @@
%----Macros----------
\newcommand{\gt}{>}
\newcommand{\lt}{<}
+\newcommand{\darr}{\downarrow}
+\newcommand{\nearr}{\nearrow}
+\newcommand{\nwarr}{\nwarrow}
+\newcommand{\searr}{\searrow}
+\newcommand{\swarr}{\swarrow}
+\newcommand{\iff}{\Longleftrightarrow}
+\newcommand{\impliedby}{\Leftarrow}
+\newcommand{\map}{\mapsto}
+\newcommand{\embedsin}{\hookrightarrow}
+\newcommand{\implies}{\Rightarrow}
\newcommand{\qed}{\blacksquare}
%-------------------------------------------------------------------
diff --git a/lib/chunks/category.rb b/lib/chunks/category.rb
index d08d8636..33e51bc9 100644
--- a/lib/chunks/category.rb
+++ b/lib/chunks/category.rb
@@ -16,7 +16,7 @@ class Category < Chunk::Abstract
def initialize(match_data, content)
super(match_data, content)
@hidden = match_data[1]
- @list = match_data[2].split(',').map { |c| c.strip }
+ @list = match_data[2].split(',').map { |c| html_escape(c.strip) }
@unmask_text = ''
if @hidden
@unmask_text = ''
diff --git a/lib/chunks/chunk.rb b/lib/chunks/chunk.rb
index 18de7d0c..f3384943 100644
--- a/lib/chunks/chunk.rb
+++ b/lib/chunks/chunk.rb
@@ -74,6 +74,13 @@ module Chunk
@content.delete_chunk(self)
end
+ def html_escape(string)
+ string.gsub( /&/, "&" ).
+ gsub( /, "<" ).
+ gsub( />/, ">" ).
+ gsub( /"/, """ )
+ end
+
end
end
diff --git a/lib/sanitize.rb b/lib/sanitize.rb
index c36e7583..b9fa2449 100644
--- a/lib/sanitize.rb
+++ b/lib/sanitize.rb
@@ -25,14 +25,14 @@
module Sanitize
- require 'html5lib/html5parser'
- require 'html5lib/liberalxmlparser'
- require 'html5lib/treewalkers'
- require 'html5lib/treebuilders'
- require 'html5lib/serializer'
- require 'html5lib/sanitizer'
+ require 'html5/html5parser'
+ require 'html5/liberalxmlparser'
+ require 'html5/treewalkers'
+ require 'html5/treebuilders'
+ require 'html5/serializer'
+ require 'html5/sanitizer'
- include HTML5lib
+ include HTML5
# Sanitize a string, parsed using XHTML parsing rules.
#
@@ -52,12 +52,12 @@ module Sanitize
options.each do |name, value|
next unless %w(encoding treebuilder to_tree).include? name.to_s
if name.to_s == 'treebuilder'
- @treebuilder = HTML5lib::TreeBuilders.getTreeBuilder(value)
+ @treebuilder = HTML5lib::TreeBuilders.get_tree_builder(value)
else
instance_variable_set("@#{name}", value)
end
end
- parsed = XHTMLParser.parseFragment(html.to_ncr, {:tokenizer => HTMLSanitizer,
+ parsed = XHTMLParser.parse_fragment(html.to_ncr, {:tokenizer => HTMLSanitizer,
:encoding => @encoding, :tree => @treebuilder })
return parsed if @to_tree
return parsed.to_s
@@ -81,12 +81,12 @@ module Sanitize
options.each do |name, value|
next unless %w(encoding treebuilder to_tree).include? name.to_s
if name.to_s == 'treebuilder'
- @treebuilder = HTML5lib::TreeBuilders.getTreeBuilder(value)
+ @treebuilder = HTML5lib::TreeBuilders.get_tree_builder(value)
else
instance_variable_set("@#{name}", value)
end
end
- parsed = HTMLParser.parseFragment(html.to_ncr, {:tokenizer => HTMLSanitizer,
+ parsed = HTMLParser.parse_fragment(html.to_ncr, {:tokenizer => HTMLSanitizer,
:encoding => @encoding, :tree => @treebuilder })
return parsed if @to_tree
return parsed.to_s
@@ -98,13 +98,9 @@ module Sanitize
# sanitize_rexml(tree) -> string
#
def sanitize_rexml(tree)
- tokens = TreeWalkers.getTreeWalker('rexml').new(tree.to_ncr)
- HTMLSerializer.serialize(tokens, {:encoding=>'utf-8',
- :quote_attr_values => true,
- :minimize_boolean_attributes => false,
- :use_trailing_solidus => true,
+ tokens = TreeWalkers.get_tree_walker('rexml').new(tree.to_ncr)
+ XHTMLSerializer.serialize(tokens, {:encoding=>'utf-8',
:space_before_trailing_solidus => true,
- :omit_optional_tags => false,
:inject_meta_charset => false,
:sanitize => true})
end
diff --git a/public/s5/ui/default/math.css b/public/s5/ui/default/math.css
index 15927aaf..b03bcbb1 100644
--- a/public/s5/ui/default/math.css
+++ b/public/s5/ui/default/math.css
@@ -16,4 +16,4 @@ table.plaintable {
text-align:center;
margin-left:30px;
}
-
+.noborder td, .noborder th {border:0}
diff --git a/public/s5/ui/default/pretty.css b/public/s5/ui/default/pretty.css
index 536d1d6b..9f9d0cb9 100644
--- a/public/s5/ui/default/pretty.css
+++ b/public/s5/ui/default/pretty.css
@@ -1,6 +1,6 @@
/* Following are the presentation styles -- edit away! */
-body {background: #FFF; color: #000; font-size: 2em;}
+body {background: #FFF; color: #000; font-size: 1.6em;}
:link, :visited {text-decoration: none; color: #00C;}
#controls :active {color: #8A8 !important;}
#controls :focus {outline: 1px dotted #272;}
diff --git a/public/s5/ui/default/slides.js b/public/s5/ui/default/slides.js
index bdae97d6..704d29cd 100644
--- a/public/s5/ui/default/slides.js
+++ b/public/s5/ui/default/slides.js
@@ -1,4 +1,5 @@
-// S5 v1.2a1 slides.js -- released into the Public Domain
+// S5 v1.2a2 slides.js -- released into the Public Domain
+// Many modifications by Jacques Distler to allow operation as real XHTML.
//
// Please see http://www.meyerweb.com/eric/tools/s5/credits.html for information
// about all the wonderful and talented contributors to this code!
@@ -30,6 +31,7 @@ var countdown = {
var isIE = navigator.appName == 'Microsoft Internet Explorer' && navigator.userAgent.indexOf('Opera') < 1 ? 1 : 0;
var isOp = navigator.userAgent.indexOf('Opera') > -1 ? 1 : 0;
+var isSa = navigator.userAgent.indexOf('Safari') > -1 ? 1 : 0;
var isGe = navigator.userAgent.indexOf('Gecko') > -1 && navigator.userAgent.indexOf('Safari') < 1 ? 1 : 0;
function hasClass(object, className) {
@@ -111,7 +113,14 @@ function slideLabel() {
for (var o = 0; o < menunodes.length; o++) {
otext += nodeValue(menunodes[o]);
}
- list.options[list.length] = new Option(n + ' : ' + otext, n);
+ if (isSa) {
+ var option = createElement('option');
+ option.setAttribute('value', n);
+ option.appendChild(document.createTextNode(n + ' : ' + otext) );
+ list.appendChild(option);
+ } else {
+ list.options[list.length] = new Option(n + ' : ' + otext, n);
+ }
}
}
@@ -122,12 +131,12 @@ function currentSlide() {
} else {
cs = document.currentSlide;
}
- var plink = document.createElement('a');
+ var plink = createElement('a');
plink.id = 'plink';
plink.setAttribute('href', '');
- var csHere = document.createElement('span');
- var csSep = document.createElement('span');
- var csTotal = document.createElement('span');
+ var csHere = createElement('span');
+ var csSep = createElement('span');
+ var csTotal = createElement('span');
csHere.id = 'csHere';
csSep.id = 'csSep';
csTotal.id = 'csTotal';
@@ -376,7 +385,7 @@ function slideJump() {
function fixLinks() {
var thisUri = window.location.href;
thisUri = thisUri.slice(0, thisUri.length - window.location.hash.length);
- var aelements = document.getElementsByTagName('A');
+ var aelements = document.getElementsByTagName('a');
for (var i = 0; i < aelements.length; i++) {
var a = aelements[i].href;
var slideID = a.match('\#slide[0-9]{1,2}');
@@ -418,43 +427,43 @@ function permaLink() {
function createControls() {
var controlsDiv = document.getElementById("controls");
if (!controlsDiv) return;
- var controlForm = document.createElement('form');
+ var controlForm = createElement('form');
controlForm.id = 'controlForm';
controlForm.setAttribute('action', '#');
if (controlVis == 'hidden') {
controlForm.setAttribute('onmouseover', 'showHide(\'s\');');
controlForm.setAttribute('onmouseout', 'showHide(\'h\');');
}
- var navLinks = document.createElement('div');
+ var navLinks = createElement('div');
navLinks.id = 'navLinks';
- var showNotes = document.createElement('a');
+ var showNotes = createElement('a');
showNotes.id = 'show-notes';
showNotes.setAttribute('accesskey', 'n');
showNotes.setAttribute('href', 'javascript:createNotesWindow();');
showNotes.setAttribute('title', 'Show Notes');
showNotes.appendChild(document.createTextNode('\u2261'));
- var toggle = document.createElement('a');
+ var toggle = createElement('a');
toggle.id = 'toggle';
toggle.setAttribute('accesskey', 't');
toggle.setAttribute('href', 'javascript:toggle();');
toggle.appendChild(document.createTextNode('\u00D8'));
- var prev = document.createElement('a');
+ var prev = createElement('a');
prev.id = 'prev';
prev.setAttribute('accesskey', 'z');
prev.setAttribute('href', 'javascript:go(-1);');
prev.appendChild(document.createTextNode('\u00AB'));
- var next = document.createElement('a');
+ var next = createElement('a');
next.id = 'next';
next.setAttribute('accesskey', 'x');
next.setAttribute('href', 'javascript:go(1);');
next.appendChild(document.createTextNode('\u00BB'));
- var navList = document.createElement('div');
+ var navList = createElement('div');
navList.id = 'navList';
if (controlVis != 'hidden') {
navList.setAttribute('onmouseover', 'showHide(\'s\');');
navList.setAttribute('onmouseout', 'showHide(\'h\');');
}
- var jumplist = document.createElement('select');
+ var jumplist = createElement('select');
jumplist.id = 'jumplist';
jumplist.setAttribute('onchange', 'go(\'j\');');
navList.appendChild(jumplist);
@@ -503,7 +512,7 @@ function fontScale() { // causes layout problems in FireFox that get fixed if b
function fontSize(value) {
if (!(s5ss = document.getElementById('s5ss'))) {
if (!document.createStyleSheet) {
- document.getElementsByTagName('head')[0].appendChild(s5ss = document.createElement('style'));
+ document.getElementsByTagName('head')[0].appendChild(s5ss = createElement('style'));
s5ss.setAttribute('media','screen, projection');
s5ss.setAttribute('id','s5ss');
} else {
@@ -784,6 +793,14 @@ function readTime(val) {
}
}
+function createElement(element) {
+ if (typeof document.createElementNS != 'undefined') {
+ return document.createElementNS('http://www.w3.org/1999/xhtml', element);
+ } else {
+ return document.createElement(element);
+ }
+}
+
function windowChange() {
fontScale();
}
diff --git a/public/s5/ui/s5-notes.xhtml b/public/s5/ui/s5-notes.xhtml
new file mode 100644
index 00000000..3d8cc136
--- /dev/null
+++ b/public/s5/ui/s5-notes.xhtml
@@ -0,0 +1,64 @@
+
+
+
+
+
+Notes
+
+
+
+
+
+
+
+
+
+
+-
+
Presentation
+00:00:00
+
+-
+
Current Slide
+00:00:00
+
+
+
+
+
+
+
+
+-
+00:00:00
++
+
+
+
+
+...
+
+
+...
+
+
+
+
diff --git a/vendor/plugins/HTML5lib/History.txt b/vendor/plugins/HTML5lib/History.txt
new file mode 100644
index 00000000..d64c86c3
--- /dev/null
+++ b/vendor/plugins/HTML5lib/History.txt
@@ -0,0 +1,5 @@
+== 0.1.0 / 2007-08-07
+
+* 1 major enhancement
+ * Birthday!
+
diff --git a/vendor/plugins/HTML5lib/Manifest.txt b/vendor/plugins/HTML5lib/Manifest.txt
new file mode 100644
index 00000000..8a8a1bca
--- /dev/null
+++ b/vendor/plugins/HTML5lib/Manifest.txt
@@ -0,0 +1,59 @@
+History.txt
+Manifest.txt
+README
+Rakefile.rb
+lib/html5.rb
+lib/html5/constants.rb
+lib/html5/filters/base.rb
+lib/html5/filters/inject_meta_charset.rb
+lib/html5/filters/optionaltags.rb
+lib/html5/filters/sanitizer.rb
+lib/html5/filters/whitespace.rb
+lib/html5/html5parser.rb
+lib/html5/html5parser/after_body_phase.rb
+lib/html5/html5parser/after_frameset_phase.rb
+lib/html5/html5parser/after_head_phase.rb
+lib/html5/html5parser/before_head_phase.rb
+lib/html5/html5parser/in_body_phase.rb
+lib/html5/html5parser/in_caption_phase.rb
+lib/html5/html5parser/in_cell_phase.rb
+lib/html5/html5parser/in_column_group_phase.rb
+lib/html5/html5parser/in_frameset_phase.rb
+lib/html5/html5parser/in_head_phase.rb
+lib/html5/html5parser/in_row_phase.rb
+lib/html5/html5parser/in_select_phase.rb
+lib/html5/html5parser/in_table_body_phase.rb
+lib/html5/html5parser/in_table_phase.rb
+lib/html5/html5parser/initial_phase.rb
+lib/html5/html5parser/phase.rb
+lib/html5/html5parser/root_element_phase.rb
+lib/html5/html5parser/trailing_end_phase.rb
+lib/html5/inputstream.rb
+lib/html5/liberalxmlparser.rb
+lib/html5/sanitizer.rb
+lib/html5/serializer.rb
+lib/html5/serializer/htmlserializer.rb
+lib/html5/serializer/xhtmlserializer.rb
+lib/html5/tokenizer.rb
+lib/html5/treebuilders.rb
+lib/html5/treebuilders/base.rb
+lib/html5/treebuilders/hpricot.rb
+lib/html5/treebuilders/rexml.rb
+lib/html5/treebuilders/simpletree.rb
+lib/html5/treewalkers.rb
+lib/html5/treewalkers/base.rb
+lib/html5/treewalkers/hpricot.rb
+lib/html5/treewalkers/rexml.rb
+lib/html5/treewalkers/simpletree.rb
+lib/html5/version.rb
+parse.rb
+tests/preamble.rb
+tests/test_encoding.rb
+tests/test_lxp.rb
+tests/test_parser.rb
+tests/test_sanitizer.rb
+tests/test_serializer.rb
+tests/test_stream.rb
+tests/test_tokenizer.rb
+tests/test_treewalkers.rb
+tests/tokenizer_test_parser.rb
diff --git a/vendor/plugins/HTML5lib/README b/vendor/plugins/HTML5lib/README
index c9b3304d..f1d9991a 100644
--- a/vendor/plugins/HTML5lib/README
+++ b/vendor/plugins/HTML5lib/README
@@ -1,9 +1,45 @@
-= HTML5lib
+html5
+ by Ryan King, et al
+ http://code.google.com/p/html5lib
-== Basic Usage
+== DESCRIPTION:
- require 'html5lib'
+A ruby implementation of the parsing algorithm in HTML5.
- doc = HTML5lib.parse('...')
- doc.class # REXML::Document
\ No newline at end of file
+== FEATURES/PROBLEMS:
+
+
+
+== SYNOPSIS:
+
+ TODO
+
+== REQUIREMENTS:
+
+* chardet, only tested with 0.9.0
+
+== INSTALL:
+
+* sudo gem install html5
+
+== LICENSE:
+
+Copyright (c) 2006-2007 The Authors
+
+Contributers:
+James Graham - jg307@cam.ac.uk
+Anne van Kesteren - annevankesteren@gmail.com
+Lachlan Hunt - lachlan.hunt@lachy.id.au
+Matt McDonald - kanashii@kanashii.ca
+Sam Ruby - rubys@intertwingly.net
+Ian Hickson (Google) - ian@hixie.ch
+Thomas Broyer - t.broyer@ltgt.net
+Jacques Distler - distler@golem.ph.utexas.edu
+Ryan King - ryan@theryanking.com
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/vendor/plugins/HTML5lib/Rakefile.rb b/vendor/plugins/HTML5lib/Rakefile.rb
index 90a72824..65b20295 100644
--- a/vendor/plugins/HTML5lib/Rakefile.rb
+++ b/vendor/plugins/HTML5lib/Rakefile.rb
@@ -1,7 +1,33 @@
require 'rake'
-require 'rake/testtask'
+require 'hoe'
+require 'lib/html5/version'
-Rake::TestTask.new do |task|
- task.pattern = 'tests/test_*.rb'
- task.verbose = true
+Hoe.new("html5", HTML5::VERSION) do |p|
+ p.name = "html5"
+ p.description = p.paragraphs_of('README', 2..5).join("\n\n")
+ p.summary = "HTML5 parser/tokenizer."
+
+ p.author = ['Ryan King'] # TODO: add more names
+ p.email = 'ryan@theryanking.com'
+ p.url = 'http://code.google.com/p/html5lib'
+ p.need_zip = true
+
+ p.extra_deps << ['chardet', '>= 0.9.0']
+ p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
end
+
+require 'rcov/rcovtask'
+
+namespace :test do
+ namespace :coverage do
+ desc "Delete aggregate coverage data."
+ task(:clean) { rm_f "coverage.data" }
+ end
+ desc 'Aggregate code coverage for unit, functional and integration tests'
+ Rcov::RcovTask.new(:coverage => "test:coverage:clean") do |t|
+ t.libs << "tests"
+ t.test_files = FileList["tests/test_*.rb"]
+ t.output_dir = "tests/coverage/"
+ t.verbose = true
+ end
+end
\ No newline at end of file
diff --git a/vendor/plugins/HTML5lib/bin/html5 b/vendor/plugins/HTML5lib/bin/html5
new file mode 100755
index 00000000..2680aea3
--- /dev/null
+++ b/vendor/plugins/HTML5lib/bin/html5
@@ -0,0 +1,215 @@
+#!/usr/bin/env ruby
+
+$:.unshift File.dirname(__FILE__), 'lib'
+
+def parse(opts, args)
+ encoding = nil
+
+ f = args[-1]
+ if f
+ begin
+ if f[0..6] == 'http://'
+ require 'open-uri'
+ f = URI.parse(f).open
+ encoding = f.charset
+ elsif f == '-'
+ f = $stdin
+ else
+ f = open(f)
+ end
+ rescue
+ end
+ else
+ $stderr.write("No filename provided. Use -h for help\n")
+ exit(1)
+ end
+
+ require 'html5/treebuilders'
+ treebuilder = HTML5::TreeBuilders[opts.treebuilder]
+
+ if opts.output == :xml
+ require 'html5/liberalxmlparser'
+ p = HTML5::XMLParser.new(:tree=>treebuilder)
+ else
+ require 'html5/html5parser'
+ p = HTML5::HTMLParser.new(:tree=>treebuilder)
+ end
+
+ if opts.parsemethod == :parse
+ args = [f, encoding]
+ else
+ args = [f, 'div', encoding]
+ end
+
+ if opts.profile
+ require 'profiler'
+ Profiler__::start_profile
+ p.send(opts.parsemethod, *args)
+ Profiler__::stop_profile
+ Profiler__::print_profile($stderr)
+ elsif opts.time
+ require 'time' # TODO: switch to benchmark
+ t0 = Time.new
+ document = p.send(opts.parsemethod, *args)
+ t1 = Time.new
+ print_output(p, document, opts)
+ t2 = Time.new
+ puts "\n\nRun took: %fs (plus %fs to print the output)"%[t1-t0, t2-t1]
+ else
+ document = p.send(opts.parsemethod, *args)
+ print_output(p, document, opts)
+ end
+end
+
+def print_output(parser, document, opts)
+ puts "Encoding: #{parser.tokenizer.stream.char_encoding}" if opts.encoding
+
+ case opts.output
+ when :xml
+ print document
+ when :html
+ require 'html5/treewalkers'
+ tokens = HTML5::TreeWalkers[opts.treebuilder].new(document)
+ require 'html5/serializer'
+ puts HTML5::HTMLSerializer.serialize(tokens, opts.serializer)
+ when :hilite
+ print document.hilite
+ when :tree
+ document = [document] unless document.respond_to?(:each)
+ document.each {|fragment| puts parser.tree.testSerializer(fragment)}
+ end
+
+ if opts.error
+ errList=[]
+ for pos, message in parser.errors
+ errList << ("Line %i Col %i"%pos + " " + message)
+ end
+ $stdout.write("\nParse errors:\n" + errList.join("\n")+"\n")
+ end
+end
+
+require 'ostruct'
+options = OpenStruct.new
+options.profile = false
+options.time = false
+options.output = :html
+options.treebuilder = 'simpletree'
+options.error = false
+options.encoding = false
+options.parsemethod = :parse
+options.serializer = {
+ :encoding => 'utf-8',
+ :omit_optional_tags => false,
+ :inject_meta_charset => false
+}
+
+require 'optparse'
+opts = OptionParser.new do |opts|
+ opts.separator ""
+ opts.separator "Parse Options:"
+
+ opts.on("-b", "--treebuilder NAME") do |treebuilder|
+ options.treebuilder = treebuilder
+ end
+
+ opts.on("-f", "--fragment", "Parse as a fragment") do |parse|
+ options.parsemethod = :parse_fragment
+ end
+
+ opts.separator ""
+ opts.separator "Filter Options:"
+
+ opts.on("--[no-]inject-meta-charset", "inject ") do |inject|
+ options.serializer[:inject_meta_charset] = inject
+ end
+
+ opts.on("--[no-]strip-whitespace", "strip unnecessary whitespace") do |strip|
+ options.serializer[:strip_whitespace] = strip
+ end
+
+ opts.on("--[no-]sanitize", "escape unsafe tags") do |sanitize|
+ options.serializer[:sanitize] = sanitize
+ end
+
+ opts.separator ""
+ opts.separator "Output Options:"
+
+ opts.on("--tree", "output as debug tree") do |tree|
+ options.output = :tree
+ end
+
+ opts.on("-x", "--xml", "output as xml") do |xml|
+ options.output = :xml
+ options.treebuilder = "rexml"
+ end
+
+ opts.on("--[no-]html", "Output as html") do |html|
+ options.output = (html ? :html : nil)
+ end
+
+ opts.on("--hilite", "Output as formatted highlighted code.") do |hilite|
+ options.output = :hilite
+ end
+
+ opts.on("-e", "--error", "Print a list of parse errors") do |error|
+ options.error = error
+ end
+
+ opts.separator ""
+ opts.separator "Serialization Options:"
+
+ opts.on("--[no-]omit-optional-tags", "Omit optional tags") do |omit|
+ options.serializer[:omit_optional_tags] = omit
+ end
+
+ opts.on("--[no-]quote-attr-values", "Quote attribute values") do |quote|
+ options.serializer[:quote_attr_values] = quote
+ end
+
+ opts.on("--[no-]use-best-quote-char", "Use best quote character") do |best|
+ options.serializer[:use_best_quote_char] = best
+ end
+
+ opts.on("--quote-char C", "Use specified quote character") do |c|
+ options.serializer[:quote_char] = c
+ end
+
+ opts.on("--[no-]minimize-boolean-attributes", "Minimize boolean attributes") do |min|
+ options.serializer[:minimize_boolean_attributes] = min
+ end
+
+ opts.on("--[no-]use-trailing-solidus", "Use trailing solidus") do |slash|
+ options.serializer[:use_trailing_solidus] = slash
+ end
+
+ opts.on("--[no-]escape-lt-in-attrs", "Escape less than signs in attribute values") do |lt|
+ options.serializer[:escape_lt_in_attrs] = lt
+ end
+
+ opts.on("--[no-]escape-rcdata", "Escape rcdata element values") do |rcdata|
+ options.serializer[:escape_rcdata] = rcdata
+ end
+
+ opts.separator ""
+ opts.separator "Other Options:"
+
+ opts.on("-p", "--[no-]profile", "Profile the run") do |profile|
+ options.profile = profile
+ end
+
+ opts.on("-t", "--[no-]time", "Time the run") do |time|
+ options.time = time
+ end
+
+ opts.on("-c", "--[no-]encoding", "Print character encoding used") do |encoding|
+ options.encoding = encoding
+ end
+
+ opts.on_tail("-h", "--help", "Show this message") do
+ puts opts
+ exit
+ end
+end
+
+opts.parse!(ARGV)
+parse options, ARGV
diff --git a/vendor/plugins/HTML5lib/lib/html5.rb b/vendor/plugins/HTML5lib/lib/html5.rb
new file mode 100644
index 00000000..7ca2ee61
--- /dev/null
+++ b/vendor/plugins/HTML5lib/lib/html5.rb
@@ -0,0 +1,13 @@
+require 'html5/html5parser'
+require 'html5/version'
+
+module HTML5
+
+ def self.parse(stream, options={})
+ HTMLParser.parse(stream, options)
+ end
+
+ def self.parse_fragment(stream, options={})
+ HTMLParser.parse(stream, options)
+ end
+end
diff --git a/vendor/plugins/HTML5lib/lib/html5/constants.rb b/vendor/plugins/HTML5lib/lib/html5/constants.rb
new file mode 100755
index 00000000..8ccaf66d
--- /dev/null
+++ b/vendor/plugins/HTML5lib/lib/html5/constants.rb
@@ -0,0 +1,818 @@
+module HTML5
+
+ class EOF < Exception; end
+
+ CONTENT_MODEL_FLAGS = [
+ :PCDATA,
+ :RCDATA,
+ :CDATA,
+ :PLAINTEXT
+ ]
+
+ SCOPING_ELEMENTS = %w[
+ button
+ caption
+ html
+ marquee
+ object
+ table
+ td
+ th
+ ]
+
+ FORMATTING_ELEMENTS = %w[
+ a
+ b
+ big
+ em
+ font
+ i
+ nobr
+ s
+ small
+ strike
+ strong
+ tt
+ u
+ ]
+
+ SPECIAL_ELEMENTS = %w[
+ address
+ area
+ base
+ basefont
+ bgsound
+ blockquote
+ body
+ br
+ center
+ col
+ colgroup
+ dd
+ dir
+ div
+ dl
+ dt
+ embed
+ fieldset
+ form
+ frame
+ frameset
+ h1
+ h2
+ h3
+ h4
+ h5
+ h6
+ head
+ hr
+ iframe
+ image
+ img
+ input
+ isindex
+ li
+ link
+ listing
+ menu
+ meta
+ noembed
+ noframes
+ noscript
+ ol
+ optgroup
+ option
+ p
+ param
+ plaintext
+ pre
+ script
+ select
+ spacer
+ style
+ tbody
+ textarea
+ tfoot
+ thead
+ title
+ tr
+ ul
+ wbr
+ ]
+
+ SPACE_CHARACTERS = %W[
+ \t
+ \n
+ \x0B
+ \x0C
+ \x20
+ \r
+ ]
+
+ TABLE_INSERT_MODE_ELEMENTS = %w[
+ table
+ tbody
+ tfoot
+ thead
+ tr
+ ]
+
+ ASCII_LOWERCASE = ('a'..'z').to_a.join('')
+ ASCII_UPPERCASE = ('A'..'Z').to_a.join('')
+ ASCII_LETTERS = ASCII_LOWERCASE + ASCII_UPPERCASE
+ DIGITS = '0'..'9'
+ HEX_DIGITS = DIGITS.to_a + ('a'..'f').to_a + ('A'..'F').to_a
+
+ # Heading elements need to be ordered
+ HEADING_ELEMENTS = %w[
+ h1
+ h2
+ h3
+ h4
+ h5
+ h6
+ ]
+
+ # XXX What about event-source and command?
+ VOID_ELEMENTS = %w[
+ base
+ link
+ meta
+ hr
+ br
+ img
+ embed
+ param
+ area
+ col
+ input
+ ]
+
+ CDATA_ELEMENTS = %w[title textarea]
+
+ RCDATA_ELEMENTS = %w[
+ style
+ script
+ xmp
+ iframe
+ noembed
+ noframes
+ noscript
+ ]
+
+ BOOLEAN_ATTRIBUTES = {
+ :global => %w[irrelevant],
+ 'style' => %w[scoped],
+ 'img' => %w[ismap],
+ 'audio' => %w[autoplay controls],
+ 'video' => %w[autoplay controls],
+ 'script' => %w[defer async],
+ 'details' => %w[open],
+ 'datagrid' => %w[multiple disabled],
+ 'command' => %w[hidden disabled checked default],
+ 'menu' => %w[autosubmit],
+ 'fieldset' => %w[disabled readonly],
+ 'option' => %w[disabled readonly selected],
+ 'optgroup' => %w[disabled readonly],
+ 'button' => %w[disabled autofocus],
+ 'input' => %w[disabled readonly required autofocus checked ismap],
+ 'select' => %w[disabled readonly autofocus multiple],
+ 'output' => %w[disabled readonly]
+
+ }
+
+ # entitiesWindows1252 has to be _ordered_ and needs to have an index.
+ ENTITIES_WINDOWS1252 = [
+ 8364, # 0x80 0x20AC EURO SIGN
+ 65533, # 0x81 UNDEFINED
+ 8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK
+ 402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK
+ 8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK
+ 8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS
+ 8224, # 0x86 0x2020 DAGGER
+ 8225, # 0x87 0x2021 DOUBLE DAGGER
+ 710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT
+ 8240, # 0x89 0x2030 PER MILLE SIGN
+ 352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON
+ 8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ 338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE
+ 65533, # 0x8D UNDEFINED
+ 381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON
+ 65533, # 0x8F UNDEFINED
+ 65533, # 0x90 UNDEFINED
+ 8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK
+ 8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK
+ 8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK
+ 8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK
+ 8226, # 0x95 0x2022 BULLET
+ 8211, # 0x96 0x2013 EN DASH
+ 8212, # 0x97 0x2014 EM DASH
+ 732, # 0x98 0x02DC SMALL TILDE
+ 8482, # 0x99 0x2122 TRADE MARK SIGN
+ 353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON
+ 8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ 339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE
+ 65533, # 0x9D UNDEFINED
+ 382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON
+ 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
+ ]
+
+ # ENTITIES was generated from Python using the following code:
+ #
+ # import constants
+ # entities = constants.entities.items()
+ # entities.sort()
+ # list = [ ' '.join([repr(entity), '=>', ord(value)<128 and
+ # repr(str(value)) or repr(value.encode('utf-8')).replace("'",'"')])
+ # for entity, value in entities]
+ # print ' ENTITIES = {\n ' + ',\n '.join(list) + '\n }'
+
+ ENTITIES = {
+ 'AElig' => "\xc3\x86",
+ 'AElig;' => "\xc3\x86",
+ 'AMP' => '&',
+ 'AMP;' => '&',
+ 'Aacute' => "\xc3\x81",
+ 'Aacute;' => "\xc3\x81",
+ 'Acirc' => "\xc3\x82",
+ 'Acirc;' => "\xc3\x82",
+ 'Agrave' => "\xc3\x80",
+ 'Agrave;' => "\xc3\x80",
+ 'Alpha;' => "\xce\x91",
+ 'Aring' => "\xc3\x85",
+ 'Aring;' => "\xc3\x85",
+ 'Atilde' => "\xc3\x83",
+ 'Atilde;' => "\xc3\x83",
+ 'Auml' => "\xc3\x84",
+ 'Auml;' => "\xc3\x84",
+ 'Beta;' => "\xce\x92",
+ 'COPY' => "\xc2\xa9",
+ 'COPY;' => "\xc2\xa9",
+ 'Ccedil' => "\xc3\x87",
+ 'Ccedil;' => "\xc3\x87",
+ 'Chi;' => "\xce\xa7",
+ 'Dagger;' => "\xe2\x80\xa1",
+ 'Delta;' => "\xce\x94",
+ 'ETH' => "\xc3\x90",
+ 'ETH;' => "\xc3\x90",
+ 'Eacute' => "\xc3\x89",
+ 'Eacute;' => "\xc3\x89",
+ 'Ecirc' => "\xc3\x8a",
+ 'Ecirc;' => "\xc3\x8a",
+ 'Egrave' => "\xc3\x88",
+ 'Egrave;' => "\xc3\x88",
+ 'Epsilon;' => "\xce\x95",
+ 'Eta;' => "\xce\x97",
+ 'Euml' => "\xc3\x8b",
+ 'Euml;' => "\xc3\x8b",
+ 'GT' => '>',
+ 'GT;' => '>',
+ 'Gamma;' => "\xce\x93",
+ 'Iacute' => "\xc3\x8d",
+ 'Iacute;' => "\xc3\x8d",
+ 'Icirc' => "\xc3\x8e",
+ 'Icirc;' => "\xc3\x8e",
+ 'Igrave' => "\xc3\x8c",
+ 'Igrave;' => "\xc3\x8c",
+ 'Iota;' => "\xce\x99",
+ 'Iuml' => "\xc3\x8f",
+ 'Iuml;' => "\xc3\x8f",
+ 'Kappa;' => "\xce\x9a",
+ 'LT' => '<',
+ 'LT;' => '<',
+ 'Lambda;' => "\xce\x9b",
+ 'Mu;' => "\xce\x9c",
+ 'Ntilde' => "\xc3\x91",
+ 'Ntilde;' => "\xc3\x91",
+ 'Nu;' => "\xce\x9d",
+ 'OElig;' => "\xc5\x92",
+ 'Oacute' => "\xc3\x93",
+ 'Oacute;' => "\xc3\x93",
+ 'Ocirc' => "\xc3\x94",
+ 'Ocirc;' => "\xc3\x94",
+ 'Ograve' => "\xc3\x92",
+ 'Ograve;' => "\xc3\x92",
+ 'Omega;' => "\xce\xa9",
+ 'Omicron;' => "\xce\x9f",
+ 'Oslash' => "\xc3\x98",
+ 'Oslash;' => "\xc3\x98",
+ 'Otilde' => "\xc3\x95",
+ 'Otilde;' => "\xc3\x95",
+ 'Ouml' => "\xc3\x96",
+ 'Ouml;' => "\xc3\x96",
+ 'Phi;' => "\xce\xa6",
+ 'Pi;' => "\xce\xa0",
+ 'Prime;' => "\xe2\x80\xb3",
+ 'Psi;' => "\xce\xa8",
+ 'QUOT' => '"',
+ 'QUOT;' => '"',
+ 'REG' => "\xc2\xae",
+ 'REG;' => "\xc2\xae",
+ 'Rho;' => "\xce\xa1",
+ 'Scaron;' => "\xc5\xa0",
+ 'Sigma;' => "\xce\xa3",
+ 'THORN' => "\xc3\x9e",
+ 'THORN;' => "\xc3\x9e",
+ 'TRADE;' => "\xe2\x84\xa2",
+ 'Tau;' => "\xce\xa4",
+ 'Theta;' => "\xce\x98",
+ 'Uacute' => "\xc3\x9a",
+ 'Uacute;' => "\xc3\x9a",
+ 'Ucirc' => "\xc3\x9b",
+ 'Ucirc;' => "\xc3\x9b",
+ 'Ugrave' => "\xc3\x99",
+ 'Ugrave;' => "\xc3\x99",
+ 'Upsilon;' => "\xce\xa5",
+ 'Uuml' => "\xc3\x9c",
+ 'Uuml;' => "\xc3\x9c",
+ 'Xi;' => "\xce\x9e",
+ 'Yacute' => "\xc3\x9d",
+ 'Yacute;' => "\xc3\x9d",
+ 'Yuml;' => "\xc5\xb8",
+ 'Zeta;' => "\xce\x96",
+ 'aacute' => "\xc3\xa1",
+ 'aacute;' => "\xc3\xa1",
+ 'acirc' => "\xc3\xa2",
+ 'acirc;' => "\xc3\xa2",
+ 'acute' => "\xc2\xb4",
+ 'acute;' => "\xc2\xb4",
+ 'aelig' => "\xc3\xa6",
+ 'aelig;' => "\xc3\xa6",
+ 'agrave' => "\xc3\xa0",
+ 'agrave;' => "\xc3\xa0",
+ 'alefsym;' => "\xe2\x84\xb5",
+ 'alpha;' => "\xce\xb1",
+ 'amp' => '&',
+ 'amp;' => '&',
+ 'and;' => "\xe2\x88\xa7",
+ 'ang;' => "\xe2\x88\xa0",
+ 'apos;' => "'",
+ 'aring' => "\xc3\xa5",
+ 'aring;' => "\xc3\xa5",
+ 'asymp;' => "\xe2\x89\x88",
+ 'atilde' => "\xc3\xa3",
+ 'atilde;' => "\xc3\xa3",
+ 'auml' => "\xc3\xa4",
+ 'auml;' => "\xc3\xa4",
+ 'bdquo;' => "\xe2\x80\x9e",
+ 'beta;' => "\xce\xb2",
+ 'brvbar' => "\xc2\xa6",
+ 'brvbar;' => "\xc2\xa6",
+ 'bull;' => "\xe2\x80\xa2",
+ 'cap;' => "\xe2\x88\xa9",
+ 'ccedil' => "\xc3\xa7",
+ 'ccedil;' => "\xc3\xa7",
+ 'cedil' => "\xc2\xb8",
+ 'cedil;' => "\xc2\xb8",
+ 'cent' => "\xc2\xa2",
+ 'cent;' => "\xc2\xa2",
+ 'chi;' => "\xcf\x87",
+ 'circ;' => "\xcb\x86",
+ 'clubs;' => "\xe2\x99\xa3",
+ 'cong;' => "\xe2\x89\x85",
+ 'copy' => "\xc2\xa9",
+ 'copy;' => "\xc2\xa9",
+ 'crarr;' => "\xe2\x86\xb5",
+ 'cup;' => "\xe2\x88\xaa",
+ 'curren' => "\xc2\xa4",
+ 'curren;' => "\xc2\xa4",
+ 'dArr;' => "\xe2\x87\x93",
+ 'dagger;' => "\xe2\x80\xa0",
+ 'darr;' => "\xe2\x86\x93",
+ 'deg' => "\xc2\xb0",
+ 'deg;' => "\xc2\xb0",
+ 'delta;' => "\xce\xb4",
+ 'diams;' => "\xe2\x99\xa6",
+ 'divide' => "\xc3\xb7",
+ 'divide;' => "\xc3\xb7",
+ 'eacute' => "\xc3\xa9",
+ 'eacute;' => "\xc3\xa9",
+ 'ecirc' => "\xc3\xaa",
+ 'ecirc;' => "\xc3\xaa",
+ 'egrave' => "\xc3\xa8",
+ 'egrave;' => "\xc3\xa8",
+ 'empty;' => "\xe2\x88\x85",
+ 'emsp;' => "\xe2\x80\x83",
+ 'ensp;' => "\xe2\x80\x82",
+ 'epsilon;' => "\xce\xb5",
+ 'equiv;' => "\xe2\x89\xa1",
+ 'eta;' => "\xce\xb7",
+ 'eth' => "\xc3\xb0",
+ 'eth;' => "\xc3\xb0",
+ 'euml' => "\xc3\xab",
+ 'euml;' => "\xc3\xab",
+ 'euro;' => "\xe2\x82\xac",
+ 'exist;' => "\xe2\x88\x83",
+ 'fnof;' => "\xc6\x92",
+ 'forall;' => "\xe2\x88\x80",
+ 'frac12' => "\xc2\xbd",
+ 'frac12;' => "\xc2\xbd",
+ 'frac14' => "\xc2\xbc",
+ 'frac14;' => "\xc2\xbc",
+ 'frac34' => "\xc2\xbe",
+ 'frac34;' => "\xc2\xbe",
+ 'frasl;' => "\xe2\x81\x84",
+ 'gamma;' => "\xce\xb3",
+ 'ge;' => "\xe2\x89\xa5",
+ 'gt' => '>',
+ 'gt;' => '>',
+ 'hArr;' => "\xe2\x87\x94",
+ 'harr;' => "\xe2\x86\x94",
+ 'hearts;' => "\xe2\x99\xa5",
+ 'hellip;' => "\xe2\x80\xa6",
+ 'iacute' => "\xc3\xad",
+ 'iacute;' => "\xc3\xad",
+ 'icirc' => "\xc3\xae",
+ 'icirc;' => "\xc3\xae",
+ 'iexcl' => "\xc2\xa1",
+ 'iexcl;' => "\xc2\xa1",
+ 'igrave' => "\xc3\xac",
+ 'igrave;' => "\xc3\xac",
+ 'image;' => "\xe2\x84\x91",
+ 'infin;' => "\xe2\x88\x9e",
+ 'int;' => "\xe2\x88\xab",
+ 'iota;' => "\xce\xb9",
+ 'iquest' => "\xc2\xbf",
+ 'iquest;' => "\xc2\xbf",
+ 'isin;' => "\xe2\x88\x88",
+ 'iuml' => "\xc3\xaf",
+ 'iuml;' => "\xc3\xaf",
+ 'kappa;' => "\xce\xba",
+ 'lArr;' => "\xe2\x87\x90",
+ 'lambda;' => "\xce\xbb",
+ 'lang;' => "\xe3\x80\x88",
+ 'laquo' => "\xc2\xab",
+ 'laquo;' => "\xc2\xab",
+ 'larr;' => "\xe2\x86\x90",
+ 'lceil;' => "\xe2\x8c\x88",
+ 'ldquo;' => "\xe2\x80\x9c",
+ 'le;' => "\xe2\x89\xa4",
+ 'lfloor;' => "\xe2\x8c\x8a",
+ 'lowast;' => "\xe2\x88\x97",
+ 'loz;' => "\xe2\x97\x8a",
+ 'lrm;' => "\xe2\x80\x8e",
+ 'lsaquo;' => "\xe2\x80\xb9",
+ 'lsquo;' => "\xe2\x80\x98",
+ 'lt' => '<',
+ 'lt;' => '<',
+ 'macr' => "\xc2\xaf",
+ 'macr;' => "\xc2\xaf",
+ 'mdash;' => "\xe2\x80\x94",
+ 'micro' => "\xc2\xb5",
+ 'micro;' => "\xc2\xb5",
+ 'middot' => "\xc2\xb7",
+ 'middot;' => "\xc2\xb7",
+ 'minus;' => "\xe2\x88\x92",
+ 'mu;' => "\xce\xbc",
+ 'nabla;' => "\xe2\x88\x87",
+ 'nbsp' => "\xc2\xa0",
+ 'nbsp;' => "\xc2\xa0",
+ 'ndash;' => "\xe2\x80\x93",
+ 'ne;' => "\xe2\x89\xa0",
+ 'ni;' => "\xe2\x88\x8b",
+ 'not' => "\xc2\xac",
+ 'not;' => "\xc2\xac",
+ 'notin;' => "\xe2\x88\x89",
+ 'nsub;' => "\xe2\x8a\x84",
+ 'ntilde' => "\xc3\xb1",
+ 'ntilde;' => "\xc3\xb1",
+ 'nu;' => "\xce\xbd",
+ 'oacute' => "\xc3\xb3",
+ 'oacute;' => "\xc3\xb3",
+ 'ocirc' => "\xc3\xb4",
+ 'ocirc;' => "\xc3\xb4",
+ 'oelig;' => "\xc5\x93",
+ 'ograve' => "\xc3\xb2",
+ 'ograve;' => "\xc3\xb2",
+ 'oline;' => "\xe2\x80\xbe",
+ 'omega;' => "\xcf\x89",
+ 'omicron;' => "\xce\xbf",
+ 'oplus;' => "\xe2\x8a\x95",
+ 'or;' => "\xe2\x88\xa8",
+ 'ordf' => "\xc2\xaa",
+ 'ordf;' => "\xc2\xaa",
+ 'ordm' => "\xc2\xba",
+ 'ordm;' => "\xc2\xba",
+ 'oslash' => "\xc3\xb8",
+ 'oslash;' => "\xc3\xb8",
+ 'otilde' => "\xc3\xb5",
+ 'otilde;' => "\xc3\xb5",
+ 'otimes;' => "\xe2\x8a\x97",
+ 'ouml' => "\xc3\xb6",
+ 'ouml;' => "\xc3\xb6",
+ 'para' => "\xc2\xb6",
+ 'para;' => "\xc2\xb6",
+ 'part;' => "\xe2\x88\x82",
+ 'permil;' => "\xe2\x80\xb0",
+ 'perp;' => "\xe2\x8a\xa5",
+ 'phi;' => "\xcf\x86",
+ 'pi;' => "\xcf\x80",
+ 'piv;' => "\xcf\x96",
+ 'plusmn' => "\xc2\xb1",
+ 'plusmn;' => "\xc2\xb1",
+ 'pound' => "\xc2\xa3",
+ 'pound;' => "\xc2\xa3",
+ 'prime;' => "\xe2\x80\xb2",
+ 'prod;' => "\xe2\x88\x8f",
+ 'prop;' => "\xe2\x88\x9d",
+ 'psi;' => "\xcf\x88",
+ 'quot' => '"',
+ 'quot;' => '"',
+ 'rArr;' => "\xe2\x87\x92",
+ 'radic;' => "\xe2\x88\x9a",
+ 'rang;' => "\xe3\x80\x89",
+ 'raquo' => "\xc2\xbb",
+ 'raquo;' => "\xc2\xbb",
+ 'rarr;' => "\xe2\x86\x92",
+ 'rceil;' => "\xe2\x8c\x89",
+ 'rdquo;' => "\xe2\x80\x9d",
+ 'real;' => "\xe2\x84\x9c",
+ 'reg' => "\xc2\xae",
+ 'reg;' => "\xc2\xae",
+ 'rfloor;' => "\xe2\x8c\x8b",
+ 'rho;' => "\xcf\x81",
+ 'rlm;' => "\xe2\x80\x8f",
+ 'rsaquo;' => "\xe2\x80\xba",
+ 'rsquo;' => "\xe2\x80\x99",
+ 'sbquo;' => "\xe2\x80\x9a",
+ 'scaron;' => "\xc5\xa1",
+ 'sdot;' => "\xe2\x8b\x85",
+ 'sect' => "\xc2\xa7",
+ 'sect;' => "\xc2\xa7",
+ 'shy' => "\xc2\xad",
+ 'shy;' => "\xc2\xad",
+ 'sigma;' => "\xcf\x83",
+ 'sigmaf;' => "\xcf\x82",
+ 'sim;' => "\xe2\x88\xbc",
+ 'spades;' => "\xe2\x99\xa0",
+ 'sub;' => "\xe2\x8a\x82",
+ 'sube;' => "\xe2\x8a\x86",
+ 'sum;' => "\xe2\x88\x91",
+ 'sup1' => "\xc2\xb9",
+ 'sup1;' => "\xc2\xb9",
+ 'sup2' => "\xc2\xb2",
+ 'sup2;' => "\xc2\xb2",
+ 'sup3' => "\xc2\xb3",
+ 'sup3;' => "\xc2\xb3",
+ 'sup;' => "\xe2\x8a\x83",
+ 'supe;' => "\xe2\x8a\x87",
+ 'szlig' => "\xc3\x9f",
+ 'szlig;' => "\xc3\x9f",
+ 'tau;' => "\xcf\x84",
+ 'there4;' => "\xe2\x88\xb4",
+ 'theta;' => "\xce\xb8",
+ 'thetasym;' => "\xcf\x91",
+ 'thinsp;' => "\xe2\x80\x89",
+ 'thorn' => "\xc3\xbe",
+ 'thorn;' => "\xc3\xbe",
+ 'tilde;' => "\xcb\x9c",
+ 'times' => "\xc3\x97",
+ 'times;' => "\xc3\x97",
+ 'trade;' => "\xe2\x84\xa2",
+ 'uArr;' => "\xe2\x87\x91",
+ 'uacute' => "\xc3\xba",
+ 'uacute;' => "\xc3\xba",
+ 'uarr;' => "\xe2\x86\x91",
+ 'ucirc' => "\xc3\xbb",
+ 'ucirc;' => "\xc3\xbb",
+ 'ugrave' => "\xc3\xb9",
+ 'ugrave;' => "\xc3\xb9",
+ 'uml' => "\xc2\xa8",
+ 'uml;' => "\xc2\xa8",
+ 'upsih;' => "\xcf\x92",
+ 'upsilon;' => "\xcf\x85",
+ 'uuml' => "\xc3\xbc",
+ 'uuml;' => "\xc3\xbc",
+ 'weierp;' => "\xe2\x84\x98",
+ 'xi;' => "\xce\xbe",
+ 'yacute' => "\xc3\xbd",
+ 'yacute;' => "\xc3\xbd",
+ 'yen' => "\xc2\xa5",
+ 'yen;' => "\xc2\xa5",
+ 'yuml' => "\xc3\xbf",
+ 'yuml;' => "\xc3\xbf",
+ 'zeta;' => "\xce\xb6",
+ 'zwj;' => "\xe2\x80\x8d",
+ 'zwnj;' => "\xe2\x80\x8c"
+ }
+
+ ENCODINGS = %w[
+ ansi_x3.4-1968
+ iso-ir-6
+ ansi_x3.4-1986
+ iso_646.irv:1991
+ ascii
+ iso646-us
+ us-ascii
+ us
+ ibm367
+ cp367
+ csascii
+ ks_c_5601-1987
+ korean
+ iso-2022-kr
+ csiso2022kr
+ euc-kr
+ iso-2022-jp
+ csiso2022jp
+ iso-2022-jp-2
+ iso-ir-58
+ chinese
+ csiso58gb231280
+ iso_8859-1:1987
+ iso-ir-100
+ iso_8859-1
+ iso-8859-1
+ latin1
+ l1
+ ibm819
+ cp819
+ csisolatin1
+ iso_8859-2:1987
+ iso-ir-101
+ iso_8859-2
+ iso-8859-2
+ latin2
+ l2
+ csisolatin2
+ iso_8859-3:1988
+ iso-ir-109
+ iso_8859-3
+ iso-8859-3
+ latin3
+ l3
+ csisolatin3
+ iso_8859-4:1988
+ iso-ir-110
+ iso_8859-4
+ iso-8859-4
+ latin4
+ l4
+ csisolatin4
+ iso_8859-6:1987
+ iso-ir-127
+ iso_8859-6
+ iso-8859-6
+ ecma-114
+ asmo-708
+ arabic
+ csisolatinarabic
+ iso_8859-7:1987
+ iso-ir-126
+ iso_8859-7
+ iso-8859-7
+ elot_928
+ ecma-118
+ greek
+ greek8
+ csisolatingreek
+ iso_8859-8:1988
+ iso-ir-138
+ iso_8859-8
+ iso-8859-8
+ hebrew
+ csisolatinhebrew
+ iso_8859-5:1988
+ iso-ir-144
+ iso_8859-5
+ iso-8859-5
+ cyrillic
+ csisolatincyrillic
+ iso_8859-9:1989
+ iso-ir-148
+ iso_8859-9
+ iso-8859-9
+ latin5
+ l5
+ csisolatin5
+ iso-8859-10
+ iso-ir-157
+ l6
+ iso_8859-10:1992
+ csisolatin6
+ latin6
+ hp-roman8
+ roman8
+ r8
+ ibm037
+ cp037
+ csibm037
+ ibm424
+ cp424
+ csibm424
+ ibm437
+ cp437
+ 437
+ cspc8codepage437
+ ibm500
+ cp500
+ csibm500
+ ibm775
+ cp775
+ cspc775baltic
+ ibm850
+ cp850
+ 850
+ cspc850multilingual
+ ibm852
+ cp852
+ 852
+ cspcp852
+ ibm855
+ cp855
+ 855
+ csibm855
+ ibm857
+ cp857
+ 857
+ csibm857
+ ibm860
+ cp860
+ 860
+ csibm860
+ ibm861
+ cp861
+ 861
+ cp-is
+ csibm861
+ ibm862
+ cp862
+ 862
+ cspc862latinhebrew
+ ibm863
+ cp863
+ 863
+ csibm863
+ ibm864
+ cp864
+ csibm864
+ ibm865
+ cp865
+ 865
+ csibm865
+ ibm866
+ cp866
+ 866
+ csibm866
+ ibm869
+ cp869
+ 869
+ cp-gr
+ csibm869
+ ibm1026
+ cp1026
+ csibm1026
+ koi8-r
+ cskoi8r
+ koi8-u
+ big5-hkscs
+ ptcp154
+ csptcp154
+ pt154
+ cp154
+ utf-7
+ utf-16be
+ utf-16le
+ utf-16
+ utf-8
+ iso-8859-13
+ iso-8859-14
+ iso-ir-199
+ iso_8859-14:1998
+ iso_8859-14
+ latin8
+ iso-celtic
+ l8
+ iso-8859-15
+ iso_8859-15
+ iso-8859-16
+ iso-ir-226
+ iso_8859-16:2001
+ iso_8859-16
+ latin10
+ l10
+ gbk
+ cp936
+ ms936
+ gb18030
+ shift_jis
+ ms_kanji
+ csshiftjis
+ euc-jp
+ gb2312
+ big5
+ csbig5
+ windows-1250
+ windows-1251
+ windows-1252
+ windows-1253
+ windows-1254
+ windows-1255
+ windows-1256
+ windows-1257
+ windows-1258
+ tis-620
+ hz-gb-2312
+ ]
+
+end
diff --git a/vendor/plugins/HTML5lib/lib/html5/filters.rb b/vendor/plugins/HTML5lib/lib/html5/filters.rb
new file mode 100644
index 00000000..74c7f0e0
--- /dev/null
+++ b/vendor/plugins/HTML5lib/lib/html5/filters.rb
@@ -0,0 +1 @@
+require 'html5/filters/optionaltags'
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/filters/base.rb b/vendor/plugins/HTML5lib/lib/html5/filters/base.rb
similarity index 89%
rename from vendor/plugins/HTML5lib/lib/html5lib/filters/base.rb
rename to vendor/plugins/HTML5lib/lib/html5/filters/base.rb
index c1a5c660..0cb023d2 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters/base.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/filters/base.rb
@@ -1,7 +1,7 @@
require 'delegate'
require 'enumerator'
-module HTML5lib
+module HTML5
module Filters
class Base < SimpleDelegator
include Enumerable
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/filters/inject_meta_charset.rb b/vendor/plugins/HTML5lib/lib/html5/filters/inject_meta_charset.rb
similarity index 65%
rename from vendor/plugins/HTML5lib/lib/html5lib/filters/inject_meta_charset.rb
rename to vendor/plugins/HTML5lib/lib/html5/filters/inject_meta_charset.rb
index 00dc980d..c998bf9c 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters/inject_meta_charset.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/filters/inject_meta_charset.rb
@@ -1,6 +1,6 @@
-require 'html5lib/filters/base'
+require 'html5/filters/base'
-module HTML5lib
+module HTML5
module Filters
class InjectMetaCharset < Base
def initialize(source, encoding)
@@ -21,9 +21,9 @@ module HTML5lib
when :EmptyTag
if token[:name].downcase == "meta"
# replace charset with actual encoding
- token[:data].each_with_index do |(name,value),index|
+ token[:data].each_with_index do |(name, value), index|
if name == 'charset'
- token[:data][index][1]=@encoding
+ token[:data][index][1] = @encoding
meta_found = true
end
end
@@ -31,7 +31,7 @@ module HTML5lib
# replace charset with actual encoding
has_http_equiv_content_type = false
content_index = -1
- token[:data].each_with_index do |(name,value),i|
+ token[:data].each_with_index do |(name, value), i|
if name.downcase == 'charset'
token[:data][i] = ['charset', @encoding]
meta_found = true
@@ -43,30 +43,27 @@ module HTML5lib
end
end
- if not meta_found
- if has_http_equiv_content_type and content_index >= 0
- token[:data][content_index][1] =
- 'text/html; charset=%s' % @encoding
+ if !meta_found
+ if has_http_equiv_content_type && content_index >= 0
+ token[:data][content_index][1] = 'text/html; charset=%s' % @encoding
meta_found = true
end
end
- elsif token[:name].downcase == "head" and not meta_found
+ elsif token[:name].downcase == "head" && !meta_found
# insert meta into empty head
- yield(:type => :StartTag, :name => "head", :data => token[:data])
- yield(:type => :EmptyTag, :name => "meta",
- :data => [["charset", @encoding]])
- yield(:type => :EndTag, :name => "head")
+ yield :type => :StartTag, :name => "head", :data => token[:data]
+ yield :type => :EmptyTag, :name => "meta", :data => [["charset", @encoding]]
+ yield :type => :EndTag, :name => "head"
meta_found = true
next
end
when :EndTag
- if token[:name].downcase == "head" and pending.any?
+ if token[:name].downcase == "head" && pending.any?
# insert meta into head (if necessary) and flush pending queue
yield pending.shift
- yield(:type => :EmptyTag, :name => "meta",
- :data => [["charset", @encoding]]) if not meta_found
+ yield :type => :EmptyTag, :name => "meta", :data => [["charset", @encoding]] if !meta_found
yield pending.shift while pending.any?
meta_found = true
state = :post_head
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/filters/optionaltags.rb b/vendor/plugins/HTML5lib/lib/html5/filters/optionaltags.rb
similarity index 97%
rename from vendor/plugins/HTML5lib/lib/html5lib/filters/optionaltags.rb
rename to vendor/plugins/HTML5lib/lib/html5/filters/optionaltags.rb
index aacf3b73..ba9a11b0 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters/optionaltags.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/filters/optionaltags.rb
@@ -1,7 +1,7 @@
-require 'html5lib/constants'
-require 'html5lib/filters/base'
+require 'html5/constants'
+require 'html5/filters/base'
-module HTML5lib
+module HTML5
module Filters
class OptionalTagFilter < Base
@@ -75,8 +75,7 @@ module HTML5lib
if type == :StartTag
# omit the thead and tfoot elements' end tag when they are
# immediately followed by a tbody element. See is_optional_end.
- if previous and previous[:type] == :EndTag and \
- %w(tbody thead tfoot).include?(previous[:name])
+ if previous and previous[:type] == :EndTag && %w(tbody thead tfoot).include?(previous[:name])
return false
end
@@ -85,7 +84,7 @@ module HTML5lib
return false
end
end
- return false
+ return false
end
def is_optional_end(tagname, nexttok)
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/filters/sanitizer.rb b/vendor/plugins/HTML5lib/lib/html5/filters/sanitizer.rb
similarity index 73%
rename from vendor/plugins/HTML5lib/lib/html5lib/filters/sanitizer.rb
rename to vendor/plugins/HTML5lib/lib/html5/filters/sanitizer.rb
index db9a12e0..8e25f594 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters/sanitizer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/filters/sanitizer.rb
@@ -1,7 +1,7 @@
-require 'html5lib/filters/base'
-require 'html5lib/sanitizer'
+require 'html5/filters/base'
+require 'html5/sanitizer'
-module HTML5lib
+module HTML5
module Filters
class HTMLSanitizeFilter < Base
include HTMLSanitizeModule
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/filters/whitespace.rb b/vendor/plugins/HTML5lib/lib/html5/filters/whitespace.rb
similarity index 84%
rename from vendor/plugins/HTML5lib/lib/html5lib/filters/whitespace.rb
rename to vendor/plugins/HTML5lib/lib/html5/filters/whitespace.rb
index 3b85fd7b..18b07b59 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters/whitespace.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/filters/whitespace.rb
@@ -1,7 +1,7 @@
-require 'html5lib/constants'
-require 'html5lib/filters/base'
+require 'html5/constants'
+require 'html5/filters/base'
-module HTML5lib
+module HTML5
module Filters
class WhitespaceFilter < Base
@@ -21,7 +21,7 @@ module HTML5lib
preserve -= 1 if preserve > 0
when :SpaceCharacters
- next if preserve == 0
+ token[:data] = " " if preserve == 0 && token[:data]
when :Characters
token[:data] = token[:data].sub(SPACES,' ') if preserve == 0
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser.rb
similarity index 63%
rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser.rb
rename to vendor/plugins/HTML5lib/lib/html5/html5parser.rb
index bf48930a..b20238b8 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/html5parser.rb
@@ -1,246 +1,248 @@
-require 'html5lib/constants'
-require 'html5lib/tokenizer'
-require 'html5lib/treebuilders/rexml'
-
-Dir.glob(File.join(File.dirname(__FILE__), 'html5parser', '*_phase.rb')).each do |path|
- require 'html5lib/html5parser/' + File.basename(path)
-end
-
-module HTML5lib
-
- # Error in parsed document
- class ParseError < Exception; end
- class AssertionError < Exception; end
-
- # HTML parser. Generates a tree structure from a stream of (possibly malformed) HTML
- #
- class HTMLParser
-
- attr_accessor :phase, :firstStartTag, :innerHTML, :lastPhase, :insertFromTable
-
- attr_reader :phases, :tokenizer, :tree, :errors
-
- def self.parse(stream, options = {})
- encoding = options.delete(:encoding)
- new(options).parse(stream,encoding)
- end
-
- def self.parseFragment(stream, options = {})
- container = options.delete(:container) || 'div'
- encoding = options.delete(:encoding)
- new(options).parseFragment(stream,container,encoding)
- end
-
- @@phases = %w( initial rootElement beforeHead inHead afterHead inBody inTable inCaption
- inColumnGroup inTableBody inRow inCell inSelect afterBody inFrameset afterFrameset trailingEnd )
-
- # :strict - raise an exception when a parse error is encountered
- # :tree - a treebuilder class controlling the type of tree that will be
- # returned. Built in treebuilders can be accessed through
- # HTML5lib::TreeBuilders[treeType]
- def initialize(options = {})
- @strict = false
- @errors = []
-
- @tokenizer = HTMLTokenizer
- @tree = TreeBuilders::REXML::TreeBuilder
-
- options.each { |name, value| instance_variable_set("@#{name}", value) }
-
- @tree = @tree.new
-
- @phases = @@phases.inject({}) do |phases, phase_name|
- phase_class_name = phase_name.sub(/(.)/) { $1.upcase } + 'Phase'
- phases[phase_name.to_sym] = HTML5lib.const_get(phase_class_name).new(self, @tree)
- phases
- end
- end
-
- def _parse(stream, innerHTML, encoding, container = 'div')
- @tree.reset
- @firstStartTag = false
- @errors = []
-
- @tokenizer = @tokenizer.class unless Class === @tokenizer
- @tokenizer = @tokenizer.new(stream, :encoding => encoding,
- :parseMeta => !innerHTML)
-
- if innerHTML
- case @innerHTML = container.downcase
- when 'title', 'textarea'
- @tokenizer.contentModelFlag = :RCDATA
- when 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript'
- @tokenizer.contentModelFlag = :CDATA
- when 'plaintext'
- @tokenizer.contentModelFlag = :PLAINTEXT
- else
- # contentModelFlag already is PCDATA
- #@tokenizer.contentModelFlag = :PCDATA
- end
-
- @phase = @phases[:rootElement]
- @phase.insertHtmlElement
- resetInsertionMode
- else
- @innerHTML = false
- @phase = @phases[:initial]
- end
-
- # We only seem to have InBodyPhase testcases where the following is
- # relevant ... need others too
- @lastPhase = nil
-
- # XXX This is temporary for the moment so there isn't any other
- # changes needed for the parser to work with the iterable tokenizer
- @tokenizer.each do |token|
- token = normalizeToken(token)
-
- method = 'process%s' % token[:type]
-
- case token[:type]
- when :Characters, :SpaceCharacters, :Comment
- @phase.send method, token[:data]
- when :StartTag
- @phase.send method, token[:name], token[:data]
- when :EndTag
- @phase.send method, token[:name]
- when :Doctype
- @phase.send method, token[:name], token[:publicId],
- token[:systemId], token[:correct]
- else
- parseError(token[:data])
- end
- end
-
- # When the loop finishes it's EOF
- @phase.processEOF
- end
-
- # Parse a HTML document into a well-formed tree
- #
- # stream - a filelike object or string containing the HTML to be parsed
- #
- # The optional encoding parameter must be a string that indicates
- # the encoding. If specified, that encoding will be used,
- # regardless of any BOM or later declaration (such as in a meta
- # element)
- def parse(stream, encoding=nil)
- _parse(stream, false, encoding)
- return @tree.getDocument
- end
-
- # Parse a HTML fragment into a well-formed tree fragment
-
- # container - name of the element we're setting the innerHTML property
- # if set to nil, default to 'div'
- #
- # stream - a filelike object or string containing the HTML to be parsed
- #
- # The optional encoding parameter must be a string that indicates
- # the encoding. If specified, that encoding will be used,
- # regardless of any BOM or later declaration (such as in a meta
- # element)
- def parseFragment(stream, container='div', encoding=nil)
- _parse(stream, true, encoding, container)
- return @tree.getFragment
- end
-
- def parseError(data = 'XXX ERROR MESSAGE NEEDED')
- # XXX The idea is to make data mandatory.
- @errors.push([@tokenizer.stream.position, data])
- raise ParseError if @strict
- end
-
- # HTML5 specific normalizations to the token stream
- def normalizeToken(token)
-
- if token[:type] == :EmptyTag
- # When a solidus (/) is encountered within a tag name what happens
- # depends on whether the current tag name matches that of a void
- # element. If it matches a void element atheists did the wrong
- # thing and if it doesn't it's wrong for everyone.
-
- unless VOID_ELEMENTS.include?(token[:name])
- parseError(_('Solidus (/) incorrectly placed in tag.'))
- end
-
- token[:type] = :StartTag
- end
-
- if token[:type] == :StartTag
- token[:name] = token[:name].tr(ASCII_UPPERCASE,ASCII_LOWERCASE)
-
- # We need to remove the duplicate attributes and convert attributes
- # to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
-
- unless token[:data].empty?
- data = token[:data].reverse.map { |attr, value| [attr.tr(ASCII_UPPERCASE, ASCII_LOWERCASE), value] }
- token[:data] = Hash[*data.flatten]
- end
-
- elsif token[:type] == :EndTag
- parseError(_('End tag contains unexpected attributes.')) unless token[:data].empty?
- token[:name] = token[:name].downcase
- end
-
- return token
- end
-
- @@new_modes = {
- 'select' => :inSelect,
- 'td' => :inCell,
- 'th' => :inCell,
- 'tr' => :inRow,
- 'tbody' => :inTableBody,
- 'thead' => :inTableBody,
- 'tfoot' => :inTableBody,
- 'caption' => :inCaption,
- 'colgroup' => :inColumnGroup,
- 'table' => :inTable,
- 'head' => :inBody,
- 'body' => :inBody,
- 'frameset' => :inFrameset
- }
-
- def resetInsertionMode
- # The name of this method is mostly historical. (It's also used in the
- # specification.)
- last = false
-
- @tree.openElements.reverse.each do |node|
- nodeName = node.name
-
- if node == @tree.openElements[0]
- last = true
- unless ['td', 'th'].include?(nodeName)
- # XXX
- # assert @innerHTML
- nodeName = @innerHTML
- end
- end
-
- # Check for conditions that should only happen in the innerHTML
- # case
- if ['select', 'colgroup', 'head', 'frameset'].include?(nodeName)
- # XXX
- # assert @innerHTML
- end
-
- if @@new_modes.has_key?(nodeName)
- @phase = @phases[@@new_modes[nodeName]]
- elsif nodeName == 'html'
- @phase = @phases[@tree.headPointer.nil?? :beforeHead : :afterHead]
- elsif last
- @phase = @phases[:inBody]
- else
- next
- end
-
- break
- end
- end
-
- def _(string); string; end
- end
-
-end
+require 'html5/constants'
+require 'html5/tokenizer'
+require 'html5/treebuilders/rexml'
+
+Dir.glob(File.join(File.dirname(__FILE__), 'html5parser', '*_phase.rb')).each do |path|
+ require 'html5/html5parser/' + File.basename(path)
+end
+
+module HTML5
+
+ # Error in parsed document
+ class ParseError < Exception; end
+ class AssertionError < Exception; end
+
+ # HTML parser. Generates a tree structure from a stream of (possibly malformed) HTML
+ #
+ class HTMLParser
+
+ attr_accessor :phase, :first_start_tag, :inner_html, :last_phase, :insert_from_table
+
+ attr_reader :phases, :tokenizer, :tree, :errors
+
+ def self.parse(stream, options = {})
+ encoding = options.delete(:encoding)
+ new(options).parse(stream,encoding)
+ end
+
+ def self.parse_fragment(stream, options = {})
+ container = options.delete(:container) || 'div'
+ encoding = options.delete(:encoding)
+ new(options).parse_fragment(stream, container, encoding)
+ end
+
+ @@phases = %w( initial rootElement beforeHead inHead afterHead inBody inTable inCaption
+ inColumnGroup inTableBody inRow inCell inSelect afterBody inFrameset afterFrameset trailingEnd )
+
+ # :strict - raise an exception when a parse error is encountered
+ # :tree - a treebuilder class controlling the type of tree that will be
+ # returned. Built in treebuilders can be accessed through
+ # HTML5::TreeBuilders[treeType]
+ def initialize(options = {})
+ @strict = false
+ @errors = []
+
+ @tokenizer = HTMLTokenizer
+ @tree = TreeBuilders::REXML::TreeBuilder
+
+ options.each {|name, value| instance_variable_set("@#{name}", value) }
+ @lowercase_attr_name = nil unless instance_variables.include?("@lowercase_attr_name")
+ @lowercase_element_name = nil unless instance_variables.include?("@lowercase_element_name")
+
+ @tree = @tree.new
+
+ @phases = @@phases.inject({}) do |phases, phase_name|
+ phase_class_name = phase_name.sub(/(.)/) { $1.upcase } + 'Phase'
+ phases[phase_name.to_sym] = HTML5.const_get(phase_class_name).new(self, @tree)
+ phases
+ end
+ end
+
+ def _parse(stream, inner_html, encoding, container = 'div')
+ @tree.reset
+ @first_start_tag = false
+ @errors = []
+
+ @tokenizer = @tokenizer.class unless Class === @tokenizer
+ @tokenizer = @tokenizer.new(stream, :encoding => encoding,
+ :parseMeta => !inner_html, :lowercase_attr_name => @lowercase_attr_name, :lowercase_element_name => @lowercase_element_name)
+
+ if inner_html
+ case @inner_html = container.downcase
+ when 'title', 'textarea'
+ @tokenizer.content_model_flag = :RCDATA
+ when 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript'
+ @tokenizer.content_model_flag = :CDATA
+ when 'plaintext'
+ @tokenizer.content_model_flag = :PLAINTEXT
+ else
+ # content_model_flag already is PCDATA
+ #@tokenizer.content_model_flag = :PCDATA
+ end
+
+ @phase = @phases[:rootElement]
+ @phase.insert_html_element
+ reset_insertion_mode
+ else
+ @inner_html = false
+ @phase = @phases[:initial]
+ end
+
+ # We only seem to have InBodyPhase testcases where the following is
+ # relevant ... need others too
+ @last_phase = nil
+
+ # XXX This is temporary for the moment so there isn't any other
+ # changes needed for the parser to work with the iterable tokenizer
+ @tokenizer.each do |token|
+ token = normalize_token(token)
+
+ method = 'process%s' % token[:type]
+
+ case token[:type]
+ when :Characters, :SpaceCharacters, :Comment
+ @phase.send method, token[:data]
+ when :StartTag
+ @phase.send method, token[:name], token[:data]
+ when :EndTag
+ @phase.send method, token[:name]
+ when :Doctype
+ @phase.send method, token[:name], token[:publicId],
+ token[:systemId], token[:correct]
+ else
+ parse_error(token[:data])
+ end
+ end
+
+ # When the loop finishes it's EOF
+ @phase.process_eof
+ end
+
+ # Parse a HTML document into a well-formed tree
+ #
+ # stream - a filelike object or string containing the HTML to be parsed
+ #
+ # The optional encoding parameter must be a string that indicates
+ # the encoding. If specified, that encoding will be used,
+ # regardless of any BOM or later declaration (such as in a meta
+ # element)
+ def parse(stream, encoding=nil)
+ _parse(stream, false, encoding)
+ @tree.get_document
+ end
+
+ # Parse a HTML fragment into a well-formed tree fragment
+
+ # container - name of the element we're setting the inner_html property
+ # if set to nil, default to 'div'
+ #
+ # stream - a filelike object or string containing the HTML to be parsed
+ #
+ # The optional encoding parameter must be a string that indicates
+ # the encoding. If specified, that encoding will be used,
+ # regardless of any BOM or later declaration (such as in a meta
+ # element)
+ def parse_fragment(stream, container='div', encoding=nil)
+ _parse(stream, true, encoding, container)
+ @tree.get_fragment
+ end
+
+ def parse_error(data = 'XXX ERROR MESSAGE NEEDED')
+ # XXX The idea is to make data mandatory.
+ @errors.push([@tokenizer.stream.position, data])
+ raise ParseError if @strict
+ end
+
+ # HTML5 specific normalizations to the token stream
+ def normalize_token(token)
+
+ if token[:type] == :EmptyTag
+ # When a solidus (/) is encountered within a tag name what happens
+ # depends on whether the current tag name matches that of a void
+ # element. If it matches a void element atheists did the wrong
+ # thing and if it doesn't it's wrong for everyone.
+
+ unless VOID_ELEMENTS.include?(token[:name])
+ parse_error(_('Solidus (/) incorrectly placed in tag.'))
+ end
+
+ token[:type] = :StartTag
+ end
+
+ if token[:type] == :StartTag
+ token[:name] = token[:name].downcase
+
+ # We need to remove the duplicate attributes and convert attributes
+ # to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
+
+ unless token[:data].empty?
+ data = token[:data].reverse.map {|attr, value| [attr.downcase, value] }
+ token[:data] = Hash[*data.flatten]
+ end
+
+ elsif token[:type] == :EndTag
+ parse_error(_('End tag contains unexpected attributes.')) unless token[:data].empty?
+ token[:name] = token[:name].downcase
+ end
+
+ token
+ end
+
+ @@new_modes = {
+ 'select' => :inSelect,
+ 'td' => :inCell,
+ 'th' => :inCell,
+ 'tr' => :inRow,
+ 'tbody' => :inTableBody,
+ 'thead' => :inTableBody,
+ 'tfoot' => :inTableBody,
+ 'caption' => :inCaption,
+ 'colgroup' => :inColumnGroup,
+ 'table' => :inTable,
+ 'head' => :inBody,
+ 'body' => :inBody,
+ 'frameset' => :inFrameset
+ }
+
+ def reset_insertion_mode
+ # The name of this method is mostly historical. (It's also used in the
+ # specification.)
+ last = false
+
+ @tree.open_elements.reverse.each do |node|
+ node_name = node.name
+
+ if node == @tree.open_elements.first
+ last = true
+ unless ['td', 'th'].include?(node_name)
+ # XXX
+ # assert @inner_html
+ node_name = @inner_html
+ end
+ end
+
+ # Check for conditions that should only happen in the inner_html
+ # case
+ if ['select', 'colgroup', 'head', 'frameset'].include?(node_name)
+ # XXX
+ # assert @inner_html
+ end
+
+ if @@new_modes.has_key?(node_name)
+ @phase = @phases[@@new_modes[node_name]]
+ elsif node_name == 'html'
+ @phase = @phases[@tree.head_pointer.nil?? :beforeHead : :afterHead]
+ elsif last
+ @phase = @phases[:inBody]
+ else
+ next
+ end
+
+ break
+ end
+ end
+
+ def _(string); string; end
+ end
+
+end
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_body_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/after_body_phase.rb
similarity index 59%
rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_body_phase.rb
rename to vendor/plugins/HTML5lib/lib/html5/html5parser/after_body_phase.rb
index 27778ef1..5d535423 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_body_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/after_body_phase.rb
@@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'
-module HTML5lib
+module HTML5
class AfterBodyPhase < Phase
handle_end 'html'
@@ -8,36 +8,36 @@ module HTML5lib
def processComment(data)
# This is needed because data is to be appended to the element
# here and not to whatever is currently open.
- @tree.insertComment(data, @tree.openElements[0])
+ @tree.insert_comment(data, @tree.open_elements.first)
end
def processCharacters(data)
- @parser.parseError(_('Unexpected non-space characters in the after body phase.'))
+ parse_error(_('Unexpected non-space characters in the after body phase.'))
@parser.phase = @parser.phases[:inBody]
@parser.phase.processCharacters(data)
end
def processStartTag(name, attributes)
- @parser.parseError(_("Unexpected start tag token (#{name}) in the after body phase."))
+ parse_error(_("Unexpected start tag token (#{name}) in the after body phase."))
@parser.phase = @parser.phases[:inBody]
@parser.phase.processStartTag(name, attributes)
end
def endTagHtml(name)
- if @parser.innerHTML
- @parser.parseError
+ if @parser.inner_html
+ parse_error
else
# XXX: This may need to be done, not sure
- # Don't set lastPhase to the current phase but to the inBody phase
+ # Don't set last_phase to the current phase but to the inBody phase
# instead. No need for extra parse errors if there's something after .
# Try "X