diff --git a/app/controllers/application.rb b/app/controllers/application.rb index 1f085d20..c328ef58 100644 --- a/app/controllers/application.rb +++ b/app/controllers/application.rb @@ -152,8 +152,7 @@ class ApplicationController < ActionController::Base elsif %w(tex).include?(action_name) response.headers['Content-Type'] = 'text/plain; charset=UTF-8' elsif request.env['HTTP_USER_AGENT'] =~ /Validator/ or request.env.include?('HTTP_ACCEPT') && - Mime::Type.parse(request.env["HTTP_ACCEPT"]).include?(Mime::XHTML) && - !(request.env['HTTP_USER_AGENT'] =~ /Safari/ and %w(s5).include?(action_name)) + Mime::Type.parse(request.env["HTTP_ACCEPT"]).include?(Mime::XHTML) response.headers['Content-Type'] = 'application/xhtml+xml; charset=UTF-8' elsif request.env['HTTP_USER_AGENT'] =~ /MathPlayer/ response.headers['Content-Type'] = 'application/xhtml+xml' diff --git a/app/views/wiki/atom.rxml b/app/views/wiki/atom.rxml index e356d9ae..c1273c7f 100644 --- a/app/views/wiki/atom.rxml +++ b/app/views/wiki/atom.rxml @@ -18,7 +18,7 @@ xml.feed('xmlns' => "http://www.w3.org/2005/Atom", "xml:lang" => 'en') do xml.name(page.author) end if @hide_description - xml.summary('Content suppressed.', 'type' => 'text') + xml.summary("Updated by #{page.author} on #{page.updated_at.getgm.strftime("%Y-%m-%d")} at #{page.updated_at.getgm.strftime("%H:%M:%SZ")}.", 'type' => 'text') else xml.content('type' => 'xhtml', 'xml:base' => url_for(:only_path => false, :web => @web_name, :action => @link_action, :id => page.name) ) do xml.div('xmlns' => 'http://www.w3.org/1999/xhtml' ) do diff --git a/app/views/wiki/tex.rhtml b/app/views/wiki/tex.rhtml index 2f2e5e52..9edf7a7b 100644 --- a/app/views/wiki/tex.rhtml +++ b/app/views/wiki/tex.rhtml @@ -11,6 +11,16 @@ %----Macros---------- \newcommand{\gt}{>} \newcommand{\lt}{<} +\newcommand{\darr}{\downarrow} +\newcommand{\nearr}{\nearrow} +\newcommand{\nwarr}{\nwarrow} +\newcommand{\searr}{\searrow} +\newcommand{\swarr}{\swarrow} +\newcommand{\iff}{\Longleftrightarrow} +\newcommand{\impliedby}{\Leftarrow} +\newcommand{\map}{\mapsto} +\newcommand{\embedsin}{\hookrightarrow} +\newcommand{\implies}{\Rightarrow} \newcommand{\qed}{\blacksquare} %------------------------------------------------------------------- diff --git a/lib/chunks/category.rb b/lib/chunks/category.rb index d08d8636..33e51bc9 100644 --- a/lib/chunks/category.rb +++ b/lib/chunks/category.rb @@ -16,7 +16,7 @@ class Category < Chunk::Abstract def initialize(match_data, content) super(match_data, content) @hidden = match_data[1] - @list = match_data[2].split(',').map { |c| c.strip } + @list = match_data[2].split(',').map { |c| html_escape(c.strip) } @unmask_text = '' if @hidden @unmask_text = '' diff --git a/lib/chunks/chunk.rb b/lib/chunks/chunk.rb index 18de7d0c..f3384943 100644 --- a/lib/chunks/chunk.rb +++ b/lib/chunks/chunk.rb @@ -74,6 +74,13 @@ module Chunk @content.delete_chunk(self) end + def html_escape(string) + string.gsub( /&/, "&" ). + gsub( //, ">" ). + gsub( /"/, """ ) + end + end end diff --git a/lib/sanitize.rb b/lib/sanitize.rb index c36e7583..b9fa2449 100644 --- a/lib/sanitize.rb +++ b/lib/sanitize.rb @@ -25,14 +25,14 @@ module Sanitize - require 'html5lib/html5parser' - require 'html5lib/liberalxmlparser' - require 'html5lib/treewalkers' - require 'html5lib/treebuilders' - require 'html5lib/serializer' - require 'html5lib/sanitizer' + require 'html5/html5parser' + require 'html5/liberalxmlparser' + require 'html5/treewalkers' + require 'html5/treebuilders' + require 'html5/serializer' + require 'html5/sanitizer' - include HTML5lib + include HTML5 # Sanitize a string, parsed using XHTML parsing rules. # @@ -52,12 +52,12 @@ module Sanitize options.each do |name, value| next unless %w(encoding treebuilder to_tree).include? name.to_s if name.to_s == 'treebuilder' - @treebuilder = HTML5lib::TreeBuilders.getTreeBuilder(value) + @treebuilder = HTML5lib::TreeBuilders.get_tree_builder(value) else instance_variable_set("@#{name}", value) end end - parsed = XHTMLParser.parseFragment(html.to_ncr, {:tokenizer => HTMLSanitizer, + parsed = XHTMLParser.parse_fragment(html.to_ncr, {:tokenizer => HTMLSanitizer, :encoding => @encoding, :tree => @treebuilder }) return parsed if @to_tree return parsed.to_s @@ -81,12 +81,12 @@ module Sanitize options.each do |name, value| next unless %w(encoding treebuilder to_tree).include? name.to_s if name.to_s == 'treebuilder' - @treebuilder = HTML5lib::TreeBuilders.getTreeBuilder(value) + @treebuilder = HTML5lib::TreeBuilders.get_tree_builder(value) else instance_variable_set("@#{name}", value) end end - parsed = HTMLParser.parseFragment(html.to_ncr, {:tokenizer => HTMLSanitizer, + parsed = HTMLParser.parse_fragment(html.to_ncr, {:tokenizer => HTMLSanitizer, :encoding => @encoding, :tree => @treebuilder }) return parsed if @to_tree return parsed.to_s @@ -98,13 +98,9 @@ module Sanitize # sanitize_rexml(tree) -> string # def sanitize_rexml(tree) - tokens = TreeWalkers.getTreeWalker('rexml').new(tree.to_ncr) - HTMLSerializer.serialize(tokens, {:encoding=>'utf-8', - :quote_attr_values => true, - :minimize_boolean_attributes => false, - :use_trailing_solidus => true, + tokens = TreeWalkers.get_tree_walker('rexml').new(tree.to_ncr) + XHTMLSerializer.serialize(tokens, {:encoding=>'utf-8', :space_before_trailing_solidus => true, - :omit_optional_tags => false, :inject_meta_charset => false, :sanitize => true}) end diff --git a/public/s5/ui/default/math.css b/public/s5/ui/default/math.css index 15927aaf..b03bcbb1 100644 --- a/public/s5/ui/default/math.css +++ b/public/s5/ui/default/math.css @@ -16,4 +16,4 @@ table.plaintable { text-align:center; margin-left:30px; } - +.noborder td, .noborder th {border:0} diff --git a/public/s5/ui/default/pretty.css b/public/s5/ui/default/pretty.css index 536d1d6b..9f9d0cb9 100644 --- a/public/s5/ui/default/pretty.css +++ b/public/s5/ui/default/pretty.css @@ -1,6 +1,6 @@ /* Following are the presentation styles -- edit away! */ -body {background: #FFF; color: #000; font-size: 2em;} +body {background: #FFF; color: #000; font-size: 1.6em;} :link, :visited {text-decoration: none; color: #00C;} #controls :active {color: #8A8 !important;} #controls :focus {outline: 1px dotted #272;} diff --git a/public/s5/ui/default/slides.js b/public/s5/ui/default/slides.js index bdae97d6..704d29cd 100644 --- a/public/s5/ui/default/slides.js +++ b/public/s5/ui/default/slides.js @@ -1,4 +1,5 @@ -// S5 v1.2a1 slides.js -- released into the Public Domain +// S5 v1.2a2 slides.js -- released into the Public Domain +// Many modifications by Jacques Distler to allow operation as real XHTML. // // Please see http://www.meyerweb.com/eric/tools/s5/credits.html for information // about all the wonderful and talented contributors to this code! @@ -30,6 +31,7 @@ var countdown = { var isIE = navigator.appName == 'Microsoft Internet Explorer' && navigator.userAgent.indexOf('Opera') < 1 ? 1 : 0; var isOp = navigator.userAgent.indexOf('Opera') > -1 ? 1 : 0; +var isSa = navigator.userAgent.indexOf('Safari') > -1 ? 1 : 0; var isGe = navigator.userAgent.indexOf('Gecko') > -1 && navigator.userAgent.indexOf('Safari') < 1 ? 1 : 0; function hasClass(object, className) { @@ -111,7 +113,14 @@ function slideLabel() { for (var o = 0; o < menunodes.length; o++) { otext += nodeValue(menunodes[o]); } - list.options[list.length] = new Option(n + ' : ' + otext, n); + if (isSa) { + var option = createElement('option'); + option.setAttribute('value', n); + option.appendChild(document.createTextNode(n + ' : ' + otext) ); + list.appendChild(option); + } else { + list.options[list.length] = new Option(n + ' : ' + otext, n); + } } } @@ -122,12 +131,12 @@ function currentSlide() { } else { cs = document.currentSlide; } - var plink = document.createElement('a'); + var plink = createElement('a'); plink.id = 'plink'; plink.setAttribute('href', ''); - var csHere = document.createElement('span'); - var csSep = document.createElement('span'); - var csTotal = document.createElement('span'); + var csHere = createElement('span'); + var csSep = createElement('span'); + var csTotal = createElement('span'); csHere.id = 'csHere'; csSep.id = 'csSep'; csTotal.id = 'csTotal'; @@ -376,7 +385,7 @@ function slideJump() { function fixLinks() { var thisUri = window.location.href; thisUri = thisUri.slice(0, thisUri.length - window.location.hash.length); - var aelements = document.getElementsByTagName('A'); + var aelements = document.getElementsByTagName('a'); for (var i = 0; i < aelements.length; i++) { var a = aelements[i].href; var slideID = a.match('\#slide[0-9]{1,2}'); @@ -418,43 +427,43 @@ function permaLink() { function createControls() { var controlsDiv = document.getElementById("controls"); if (!controlsDiv) return; - var controlForm = document.createElement('form'); + var controlForm = createElement('form'); controlForm.id = 'controlForm'; controlForm.setAttribute('action', '#'); if (controlVis == 'hidden') { controlForm.setAttribute('onmouseover', 'showHide(\'s\');'); controlForm.setAttribute('onmouseout', 'showHide(\'h\');'); } - var navLinks = document.createElement('div'); + var navLinks = createElement('div'); navLinks.id = 'navLinks'; - var showNotes = document.createElement('a'); + var showNotes = createElement('a'); showNotes.id = 'show-notes'; showNotes.setAttribute('accesskey', 'n'); showNotes.setAttribute('href', 'javascript:createNotesWindow();'); showNotes.setAttribute('title', 'Show Notes'); showNotes.appendChild(document.createTextNode('\u2261')); - var toggle = document.createElement('a'); + var toggle = createElement('a'); toggle.id = 'toggle'; toggle.setAttribute('accesskey', 't'); toggle.setAttribute('href', 'javascript:toggle();'); toggle.appendChild(document.createTextNode('\u00D8')); - var prev = document.createElement('a'); + var prev = createElement('a'); prev.id = 'prev'; prev.setAttribute('accesskey', 'z'); prev.setAttribute('href', 'javascript:go(-1);'); prev.appendChild(document.createTextNode('\u00AB')); - var next = document.createElement('a'); + var next = createElement('a'); next.id = 'next'; next.setAttribute('accesskey', 'x'); next.setAttribute('href', 'javascript:go(1);'); next.appendChild(document.createTextNode('\u00BB')); - var navList = document.createElement('div'); + var navList = createElement('div'); navList.id = 'navList'; if (controlVis != 'hidden') { navList.setAttribute('onmouseover', 'showHide(\'s\');'); navList.setAttribute('onmouseout', 'showHide(\'h\');'); } - var jumplist = document.createElement('select'); + var jumplist = createElement('select'); jumplist.id = 'jumplist'; jumplist.setAttribute('onchange', 'go(\'j\');'); navList.appendChild(jumplist); @@ -503,7 +512,7 @@ function fontScale() { // causes layout problems in FireFox that get fixed if b function fontSize(value) { if (!(s5ss = document.getElementById('s5ss'))) { if (!document.createStyleSheet) { - document.getElementsByTagName('head')[0].appendChild(s5ss = document.createElement('style')); + document.getElementsByTagName('head')[0].appendChild(s5ss = createElement('style')); s5ss.setAttribute('media','screen, projection'); s5ss.setAttribute('id','s5ss'); } else { @@ -784,6 +793,14 @@ function readTime(val) { } } +function createElement(element) { + if (typeof document.createElementNS != 'undefined') { + return document.createElementNS('http://www.w3.org/1999/xhtml', element); + } else { + return document.createElement(element); + } +} + function windowChange() { fontScale(); } diff --git a/public/s5/ui/s5-notes.xhtml b/public/s5/ui/s5-notes.xhtml new file mode 100644 index 00000000..3d8cc136 --- /dev/null +++ b/public/s5/ui/s5-notes.xhtml @@ -0,0 +1,64 @@ + + + + + +Notes + + + + + + + +
+

+Elapsed Time +

+ +
+|← +
+
+ +
+

+Remaining Time +

+

+- +00:00:00 ++ +

+
+
+ +|| +|← +
+
+
+ +

...

+
+ +

...

+
+ + + diff --git a/vendor/plugins/HTML5lib/History.txt b/vendor/plugins/HTML5lib/History.txt new file mode 100644 index 00000000..d64c86c3 --- /dev/null +++ b/vendor/plugins/HTML5lib/History.txt @@ -0,0 +1,5 @@ +== 0.1.0 / 2007-08-07 + +* 1 major enhancement + * Birthday! + diff --git a/vendor/plugins/HTML5lib/Manifest.txt b/vendor/plugins/HTML5lib/Manifest.txt new file mode 100644 index 00000000..8a8a1bca --- /dev/null +++ b/vendor/plugins/HTML5lib/Manifest.txt @@ -0,0 +1,59 @@ +History.txt +Manifest.txt +README +Rakefile.rb +lib/html5.rb +lib/html5/constants.rb +lib/html5/filters/base.rb +lib/html5/filters/inject_meta_charset.rb +lib/html5/filters/optionaltags.rb +lib/html5/filters/sanitizer.rb +lib/html5/filters/whitespace.rb +lib/html5/html5parser.rb +lib/html5/html5parser/after_body_phase.rb +lib/html5/html5parser/after_frameset_phase.rb +lib/html5/html5parser/after_head_phase.rb +lib/html5/html5parser/before_head_phase.rb +lib/html5/html5parser/in_body_phase.rb +lib/html5/html5parser/in_caption_phase.rb +lib/html5/html5parser/in_cell_phase.rb +lib/html5/html5parser/in_column_group_phase.rb +lib/html5/html5parser/in_frameset_phase.rb +lib/html5/html5parser/in_head_phase.rb +lib/html5/html5parser/in_row_phase.rb +lib/html5/html5parser/in_select_phase.rb +lib/html5/html5parser/in_table_body_phase.rb +lib/html5/html5parser/in_table_phase.rb +lib/html5/html5parser/initial_phase.rb +lib/html5/html5parser/phase.rb +lib/html5/html5parser/root_element_phase.rb +lib/html5/html5parser/trailing_end_phase.rb +lib/html5/inputstream.rb +lib/html5/liberalxmlparser.rb +lib/html5/sanitizer.rb +lib/html5/serializer.rb +lib/html5/serializer/htmlserializer.rb +lib/html5/serializer/xhtmlserializer.rb +lib/html5/tokenizer.rb +lib/html5/treebuilders.rb +lib/html5/treebuilders/base.rb +lib/html5/treebuilders/hpricot.rb +lib/html5/treebuilders/rexml.rb +lib/html5/treebuilders/simpletree.rb +lib/html5/treewalkers.rb +lib/html5/treewalkers/base.rb +lib/html5/treewalkers/hpricot.rb +lib/html5/treewalkers/rexml.rb +lib/html5/treewalkers/simpletree.rb +lib/html5/version.rb +parse.rb +tests/preamble.rb +tests/test_encoding.rb +tests/test_lxp.rb +tests/test_parser.rb +tests/test_sanitizer.rb +tests/test_serializer.rb +tests/test_stream.rb +tests/test_tokenizer.rb +tests/test_treewalkers.rb +tests/tokenizer_test_parser.rb diff --git a/vendor/plugins/HTML5lib/README b/vendor/plugins/HTML5lib/README index c9b3304d..f1d9991a 100644 --- a/vendor/plugins/HTML5lib/README +++ b/vendor/plugins/HTML5lib/README @@ -1,9 +1,45 @@ -= HTML5lib +html5 + by Ryan King, et al + http://code.google.com/p/html5lib -== Basic Usage +== DESCRIPTION: - require 'html5lib' +A ruby implementation of the parsing algorithm in HTML5. - doc = HTML5lib.parse('...') - doc.class # REXML::Document \ No newline at end of file +== FEATURES/PROBLEMS: + + + +== SYNOPSIS: + + TODO + +== REQUIREMENTS: + +* chardet, only tested with 0.9.0 + +== INSTALL: + +* sudo gem install html5 + +== LICENSE: + +Copyright (c) 2006-2007 The Authors + +Contributers: +James Graham - jg307@cam.ac.uk +Anne van Kesteren - annevankesteren@gmail.com +Lachlan Hunt - lachlan.hunt@lachy.id.au +Matt McDonald - kanashii@kanashii.ca +Sam Ruby - rubys@intertwingly.net +Ian Hickson (Google) - ian@hixie.ch +Thomas Broyer - t.broyer@ltgt.net +Jacques Distler - distler@golem.ph.utexas.edu +Ryan King - ryan@theryanking.com + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/plugins/HTML5lib/Rakefile.rb b/vendor/plugins/HTML5lib/Rakefile.rb index 90a72824..65b20295 100644 --- a/vendor/plugins/HTML5lib/Rakefile.rb +++ b/vendor/plugins/HTML5lib/Rakefile.rb @@ -1,7 +1,33 @@ require 'rake' -require 'rake/testtask' +require 'hoe' +require 'lib/html5/version' -Rake::TestTask.new do |task| - task.pattern = 'tests/test_*.rb' - task.verbose = true +Hoe.new("html5", HTML5::VERSION) do |p| + p.name = "html5" + p.description = p.paragraphs_of('README', 2..5).join("\n\n") + p.summary = "HTML5 parser/tokenizer." + + p.author = ['Ryan King'] # TODO: add more names + p.email = 'ryan@theryanking.com' + p.url = 'http://code.google.com/p/html5lib' + p.need_zip = true + + p.extra_deps << ['chardet', '>= 0.9.0'] + p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n") end + +require 'rcov/rcovtask' + +namespace :test do + namespace :coverage do + desc "Delete aggregate coverage data." + task(:clean) { rm_f "coverage.data" } + end + desc 'Aggregate code coverage for unit, functional and integration tests' + Rcov::RcovTask.new(:coverage => "test:coverage:clean") do |t| + t.libs << "tests" + t.test_files = FileList["tests/test_*.rb"] + t.output_dir = "tests/coverage/" + t.verbose = true + end +end \ No newline at end of file diff --git a/vendor/plugins/HTML5lib/bin/html5 b/vendor/plugins/HTML5lib/bin/html5 new file mode 100755 index 00000000..2680aea3 --- /dev/null +++ b/vendor/plugins/HTML5lib/bin/html5 @@ -0,0 +1,215 @@ +#!/usr/bin/env ruby + +$:.unshift File.dirname(__FILE__), 'lib' + +def parse(opts, args) + encoding = nil + + f = args[-1] + if f + begin + if f[0..6] == 'http://' + require 'open-uri' + f = URI.parse(f).open + encoding = f.charset + elsif f == '-' + f = $stdin + else + f = open(f) + end + rescue + end + else + $stderr.write("No filename provided. Use -h for help\n") + exit(1) + end + + require 'html5/treebuilders' + treebuilder = HTML5::TreeBuilders[opts.treebuilder] + + if opts.output == :xml + require 'html5/liberalxmlparser' + p = HTML5::XMLParser.new(:tree=>treebuilder) + else + require 'html5/html5parser' + p = HTML5::HTMLParser.new(:tree=>treebuilder) + end + + if opts.parsemethod == :parse + args = [f, encoding] + else + args = [f, 'div', encoding] + end + + if opts.profile + require 'profiler' + Profiler__::start_profile + p.send(opts.parsemethod, *args) + Profiler__::stop_profile + Profiler__::print_profile($stderr) + elsif opts.time + require 'time' # TODO: switch to benchmark + t0 = Time.new + document = p.send(opts.parsemethod, *args) + t1 = Time.new + print_output(p, document, opts) + t2 = Time.new + puts "\n\nRun took: %fs (plus %fs to print the output)"%[t1-t0, t2-t1] + else + document = p.send(opts.parsemethod, *args) + print_output(p, document, opts) + end +end + +def print_output(parser, document, opts) + puts "Encoding: #{parser.tokenizer.stream.char_encoding}" if opts.encoding + + case opts.output + when :xml + print document + when :html + require 'html5/treewalkers' + tokens = HTML5::TreeWalkers[opts.treebuilder].new(document) + require 'html5/serializer' + puts HTML5::HTMLSerializer.serialize(tokens, opts.serializer) + when :hilite + print document.hilite + when :tree + document = [document] unless document.respond_to?(:each) + document.each {|fragment| puts parser.tree.testSerializer(fragment)} + end + + if opts.error + errList=[] + for pos, message in parser.errors + errList << ("Line %i Col %i"%pos + " " + message) + end + $stdout.write("\nParse errors:\n" + errList.join("\n")+"\n") + end +end + +require 'ostruct' +options = OpenStruct.new +options.profile = false +options.time = false +options.output = :html +options.treebuilder = 'simpletree' +options.error = false +options.encoding = false +options.parsemethod = :parse +options.serializer = { + :encoding => 'utf-8', + :omit_optional_tags => false, + :inject_meta_charset => false +} + +require 'optparse' +opts = OptionParser.new do |opts| + opts.separator "" + opts.separator "Parse Options:" + + opts.on("-b", "--treebuilder NAME") do |treebuilder| + options.treebuilder = treebuilder + end + + opts.on("-f", "--fragment", "Parse as a fragment") do |parse| + options.parsemethod = :parse_fragment + end + + opts.separator "" + opts.separator "Filter Options:" + + opts.on("--[no-]inject-meta-charset", "inject ") do |inject| + options.serializer[:inject_meta_charset] = inject + end + + opts.on("--[no-]strip-whitespace", "strip unnecessary whitespace") do |strip| + options.serializer[:strip_whitespace] = strip + end + + opts.on("--[no-]sanitize", "escape unsafe tags") do |sanitize| + options.serializer[:sanitize] = sanitize + end + + opts.separator "" + opts.separator "Output Options:" + + opts.on("--tree", "output as debug tree") do |tree| + options.output = :tree + end + + opts.on("-x", "--xml", "output as xml") do |xml| + options.output = :xml + options.treebuilder = "rexml" + end + + opts.on("--[no-]html", "Output as html") do |html| + options.output = (html ? :html : nil) + end + + opts.on("--hilite", "Output as formatted highlighted code.") do |hilite| + options.output = :hilite + end + + opts.on("-e", "--error", "Print a list of parse errors") do |error| + options.error = error + end + + opts.separator "" + opts.separator "Serialization Options:" + + opts.on("--[no-]omit-optional-tags", "Omit optional tags") do |omit| + options.serializer[:omit_optional_tags] = omit + end + + opts.on("--[no-]quote-attr-values", "Quote attribute values") do |quote| + options.serializer[:quote_attr_values] = quote + end + + opts.on("--[no-]use-best-quote-char", "Use best quote character") do |best| + options.serializer[:use_best_quote_char] = best + end + + opts.on("--quote-char C", "Use specified quote character") do |c| + options.serializer[:quote_char] = c + end + + opts.on("--[no-]minimize-boolean-attributes", "Minimize boolean attributes") do |min| + options.serializer[:minimize_boolean_attributes] = min + end + + opts.on("--[no-]use-trailing-solidus", "Use trailing solidus") do |slash| + options.serializer[:use_trailing_solidus] = slash + end + + opts.on("--[no-]escape-lt-in-attrs", "Escape less than signs in attribute values") do |lt| + options.serializer[:escape_lt_in_attrs] = lt + end + + opts.on("--[no-]escape-rcdata", "Escape rcdata element values") do |rcdata| + options.serializer[:escape_rcdata] = rcdata + end + + opts.separator "" + opts.separator "Other Options:" + + opts.on("-p", "--[no-]profile", "Profile the run") do |profile| + options.profile = profile + end + + opts.on("-t", "--[no-]time", "Time the run") do |time| + options.time = time + end + + opts.on("-c", "--[no-]encoding", "Print character encoding used") do |encoding| + options.encoding = encoding + end + + opts.on_tail("-h", "--help", "Show this message") do + puts opts + exit + end +end + +opts.parse!(ARGV) +parse options, ARGV diff --git a/vendor/plugins/HTML5lib/lib/html5.rb b/vendor/plugins/HTML5lib/lib/html5.rb new file mode 100644 index 00000000..7ca2ee61 --- /dev/null +++ b/vendor/plugins/HTML5lib/lib/html5.rb @@ -0,0 +1,13 @@ +require 'html5/html5parser' +require 'html5/version' + +module HTML5 + + def self.parse(stream, options={}) + HTMLParser.parse(stream, options) + end + + def self.parse_fragment(stream, options={}) + HTMLParser.parse(stream, options) + end +end diff --git a/vendor/plugins/HTML5lib/lib/html5/constants.rb b/vendor/plugins/HTML5lib/lib/html5/constants.rb new file mode 100755 index 00000000..8ccaf66d --- /dev/null +++ b/vendor/plugins/HTML5lib/lib/html5/constants.rb @@ -0,0 +1,818 @@ +module HTML5 + + class EOF < Exception; end + + CONTENT_MODEL_FLAGS = [ + :PCDATA, + :RCDATA, + :CDATA, + :PLAINTEXT + ] + + SCOPING_ELEMENTS = %w[ + button + caption + html + marquee + object + table + td + th + ] + + FORMATTING_ELEMENTS = %w[ + a + b + big + em + font + i + nobr + s + small + strike + strong + tt + u + ] + + SPECIAL_ELEMENTS = %w[ + address + area + base + basefont + bgsound + blockquote + body + br + center + col + colgroup + dd + dir + div + dl + dt + embed + fieldset + form + frame + frameset + h1 + h2 + h3 + h4 + h5 + h6 + head + hr + iframe + image + img + input + isindex + li + link + listing + menu + meta + noembed + noframes + noscript + ol + optgroup + option + p + param + plaintext + pre + script + select + spacer + style + tbody + textarea + tfoot + thead + title + tr + ul + wbr + ] + + SPACE_CHARACTERS = %W[ + \t + \n + \x0B + \x0C + \x20 + \r + ] + + TABLE_INSERT_MODE_ELEMENTS = %w[ + table + tbody + tfoot + thead + tr + ] + + ASCII_LOWERCASE = ('a'..'z').to_a.join('') + ASCII_UPPERCASE = ('A'..'Z').to_a.join('') + ASCII_LETTERS = ASCII_LOWERCASE + ASCII_UPPERCASE + DIGITS = '0'..'9' + HEX_DIGITS = DIGITS.to_a + ('a'..'f').to_a + ('A'..'F').to_a + + # Heading elements need to be ordered + HEADING_ELEMENTS = %w[ + h1 + h2 + h3 + h4 + h5 + h6 + ] + + # XXX What about event-source and command? + VOID_ELEMENTS = %w[ + base + link + meta + hr + br + img + embed + param + area + col + input + ] + + CDATA_ELEMENTS = %w[title textarea] + + RCDATA_ELEMENTS = %w[ + style + script + xmp + iframe + noembed + noframes + noscript + ] + + BOOLEAN_ATTRIBUTES = { + :global => %w[irrelevant], + 'style' => %w[scoped], + 'img' => %w[ismap], + 'audio' => %w[autoplay controls], + 'video' => %w[autoplay controls], + 'script' => %w[defer async], + 'details' => %w[open], + 'datagrid' => %w[multiple disabled], + 'command' => %w[hidden disabled checked default], + 'menu' => %w[autosubmit], + 'fieldset' => %w[disabled readonly], + 'option' => %w[disabled readonly selected], + 'optgroup' => %w[disabled readonly], + 'button' => %w[disabled autofocus], + 'input' => %w[disabled readonly required autofocus checked ismap], + 'select' => %w[disabled readonly autofocus multiple], + 'output' => %w[disabled readonly] + + } + + # entitiesWindows1252 has to be _ordered_ and needs to have an index. + ENTITIES_WINDOWS1252 = [ + 8364, # 0x80 0x20AC EURO SIGN + 65533, # 0x81 UNDEFINED + 8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK + 402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK + 8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK + 8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS + 8224, # 0x86 0x2020 DAGGER + 8225, # 0x87 0x2021 DOUBLE DAGGER + 710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT + 8240, # 0x89 0x2030 PER MILLE SIGN + 352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON + 8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE + 65533, # 0x8D UNDEFINED + 381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON + 65533, # 0x8F UNDEFINED + 65533, # 0x90 UNDEFINED + 8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK + 8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK + 8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK + 8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK + 8226, # 0x95 0x2022 BULLET + 8211, # 0x96 0x2013 EN DASH + 8212, # 0x97 0x2014 EM DASH + 732, # 0x98 0x02DC SMALL TILDE + 8482, # 0x99 0x2122 TRADE MARK SIGN + 353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON + 8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE + 65533, # 0x9D UNDEFINED + 382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON + 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS + ] + + # ENTITIES was generated from Python using the following code: + # + # import constants + # entities = constants.entities.items() + # entities.sort() + # list = [ ' '.join([repr(entity), '=>', ord(value)<128 and + # repr(str(value)) or repr(value.encode('utf-8')).replace("'",'"')]) + # for entity, value in entities] + # print ' ENTITIES = {\n ' + ',\n '.join(list) + '\n }' + + ENTITIES = { + 'AElig' => "\xc3\x86", + 'AElig;' => "\xc3\x86", + 'AMP' => '&', + 'AMP;' => '&', + 'Aacute' => "\xc3\x81", + 'Aacute;' => "\xc3\x81", + 'Acirc' => "\xc3\x82", + 'Acirc;' => "\xc3\x82", + 'Agrave' => "\xc3\x80", + 'Agrave;' => "\xc3\x80", + 'Alpha;' => "\xce\x91", + 'Aring' => "\xc3\x85", + 'Aring;' => "\xc3\x85", + 'Atilde' => "\xc3\x83", + 'Atilde;' => "\xc3\x83", + 'Auml' => "\xc3\x84", + 'Auml;' => "\xc3\x84", + 'Beta;' => "\xce\x92", + 'COPY' => "\xc2\xa9", + 'COPY;' => "\xc2\xa9", + 'Ccedil' => "\xc3\x87", + 'Ccedil;' => "\xc3\x87", + 'Chi;' => "\xce\xa7", + 'Dagger;' => "\xe2\x80\xa1", + 'Delta;' => "\xce\x94", + 'ETH' => "\xc3\x90", + 'ETH;' => "\xc3\x90", + 'Eacute' => "\xc3\x89", + 'Eacute;' => "\xc3\x89", + 'Ecirc' => "\xc3\x8a", + 'Ecirc;' => "\xc3\x8a", + 'Egrave' => "\xc3\x88", + 'Egrave;' => "\xc3\x88", + 'Epsilon;' => "\xce\x95", + 'Eta;' => "\xce\x97", + 'Euml' => "\xc3\x8b", + 'Euml;' => "\xc3\x8b", + 'GT' => '>', + 'GT;' => '>', + 'Gamma;' => "\xce\x93", + 'Iacute' => "\xc3\x8d", + 'Iacute;' => "\xc3\x8d", + 'Icirc' => "\xc3\x8e", + 'Icirc;' => "\xc3\x8e", + 'Igrave' => "\xc3\x8c", + 'Igrave;' => "\xc3\x8c", + 'Iota;' => "\xce\x99", + 'Iuml' => "\xc3\x8f", + 'Iuml;' => "\xc3\x8f", + 'Kappa;' => "\xce\x9a", + 'LT' => '<', + 'LT;' => '<', + 'Lambda;' => "\xce\x9b", + 'Mu;' => "\xce\x9c", + 'Ntilde' => "\xc3\x91", + 'Ntilde;' => "\xc3\x91", + 'Nu;' => "\xce\x9d", + 'OElig;' => "\xc5\x92", + 'Oacute' => "\xc3\x93", + 'Oacute;' => "\xc3\x93", + 'Ocirc' => "\xc3\x94", + 'Ocirc;' => "\xc3\x94", + 'Ograve' => "\xc3\x92", + 'Ograve;' => "\xc3\x92", + 'Omega;' => "\xce\xa9", + 'Omicron;' => "\xce\x9f", + 'Oslash' => "\xc3\x98", + 'Oslash;' => "\xc3\x98", + 'Otilde' => "\xc3\x95", + 'Otilde;' => "\xc3\x95", + 'Ouml' => "\xc3\x96", + 'Ouml;' => "\xc3\x96", + 'Phi;' => "\xce\xa6", + 'Pi;' => "\xce\xa0", + 'Prime;' => "\xe2\x80\xb3", + 'Psi;' => "\xce\xa8", + 'QUOT' => '"', + 'QUOT;' => '"', + 'REG' => "\xc2\xae", + 'REG;' => "\xc2\xae", + 'Rho;' => "\xce\xa1", + 'Scaron;' => "\xc5\xa0", + 'Sigma;' => "\xce\xa3", + 'THORN' => "\xc3\x9e", + 'THORN;' => "\xc3\x9e", + 'TRADE;' => "\xe2\x84\xa2", + 'Tau;' => "\xce\xa4", + 'Theta;' => "\xce\x98", + 'Uacute' => "\xc3\x9a", + 'Uacute;' => "\xc3\x9a", + 'Ucirc' => "\xc3\x9b", + 'Ucirc;' => "\xc3\x9b", + 'Ugrave' => "\xc3\x99", + 'Ugrave;' => "\xc3\x99", + 'Upsilon;' => "\xce\xa5", + 'Uuml' => "\xc3\x9c", + 'Uuml;' => "\xc3\x9c", + 'Xi;' => "\xce\x9e", + 'Yacute' => "\xc3\x9d", + 'Yacute;' => "\xc3\x9d", + 'Yuml;' => "\xc5\xb8", + 'Zeta;' => "\xce\x96", + 'aacute' => "\xc3\xa1", + 'aacute;' => "\xc3\xa1", + 'acirc' => "\xc3\xa2", + 'acirc;' => "\xc3\xa2", + 'acute' => "\xc2\xb4", + 'acute;' => "\xc2\xb4", + 'aelig' => "\xc3\xa6", + 'aelig;' => "\xc3\xa6", + 'agrave' => "\xc3\xa0", + 'agrave;' => "\xc3\xa0", + 'alefsym;' => "\xe2\x84\xb5", + 'alpha;' => "\xce\xb1", + 'amp' => '&', + 'amp;' => '&', + 'and;' => "\xe2\x88\xa7", + 'ang;' => "\xe2\x88\xa0", + 'apos;' => "'", + 'aring' => "\xc3\xa5", + 'aring;' => "\xc3\xa5", + 'asymp;' => "\xe2\x89\x88", + 'atilde' => "\xc3\xa3", + 'atilde;' => "\xc3\xa3", + 'auml' => "\xc3\xa4", + 'auml;' => "\xc3\xa4", + 'bdquo;' => "\xe2\x80\x9e", + 'beta;' => "\xce\xb2", + 'brvbar' => "\xc2\xa6", + 'brvbar;' => "\xc2\xa6", + 'bull;' => "\xe2\x80\xa2", + 'cap;' => "\xe2\x88\xa9", + 'ccedil' => "\xc3\xa7", + 'ccedil;' => "\xc3\xa7", + 'cedil' => "\xc2\xb8", + 'cedil;' => "\xc2\xb8", + 'cent' => "\xc2\xa2", + 'cent;' => "\xc2\xa2", + 'chi;' => "\xcf\x87", + 'circ;' => "\xcb\x86", + 'clubs;' => "\xe2\x99\xa3", + 'cong;' => "\xe2\x89\x85", + 'copy' => "\xc2\xa9", + 'copy;' => "\xc2\xa9", + 'crarr;' => "\xe2\x86\xb5", + 'cup;' => "\xe2\x88\xaa", + 'curren' => "\xc2\xa4", + 'curren;' => "\xc2\xa4", + 'dArr;' => "\xe2\x87\x93", + 'dagger;' => "\xe2\x80\xa0", + 'darr;' => "\xe2\x86\x93", + 'deg' => "\xc2\xb0", + 'deg;' => "\xc2\xb0", + 'delta;' => "\xce\xb4", + 'diams;' => "\xe2\x99\xa6", + 'divide' => "\xc3\xb7", + 'divide;' => "\xc3\xb7", + 'eacute' => "\xc3\xa9", + 'eacute;' => "\xc3\xa9", + 'ecirc' => "\xc3\xaa", + 'ecirc;' => "\xc3\xaa", + 'egrave' => "\xc3\xa8", + 'egrave;' => "\xc3\xa8", + 'empty;' => "\xe2\x88\x85", + 'emsp;' => "\xe2\x80\x83", + 'ensp;' => "\xe2\x80\x82", + 'epsilon;' => "\xce\xb5", + 'equiv;' => "\xe2\x89\xa1", + 'eta;' => "\xce\xb7", + 'eth' => "\xc3\xb0", + 'eth;' => "\xc3\xb0", + 'euml' => "\xc3\xab", + 'euml;' => "\xc3\xab", + 'euro;' => "\xe2\x82\xac", + 'exist;' => "\xe2\x88\x83", + 'fnof;' => "\xc6\x92", + 'forall;' => "\xe2\x88\x80", + 'frac12' => "\xc2\xbd", + 'frac12;' => "\xc2\xbd", + 'frac14' => "\xc2\xbc", + 'frac14;' => "\xc2\xbc", + 'frac34' => "\xc2\xbe", + 'frac34;' => "\xc2\xbe", + 'frasl;' => "\xe2\x81\x84", + 'gamma;' => "\xce\xb3", + 'ge;' => "\xe2\x89\xa5", + 'gt' => '>', + 'gt;' => '>', + 'hArr;' => "\xe2\x87\x94", + 'harr;' => "\xe2\x86\x94", + 'hearts;' => "\xe2\x99\xa5", + 'hellip;' => "\xe2\x80\xa6", + 'iacute' => "\xc3\xad", + 'iacute;' => "\xc3\xad", + 'icirc' => "\xc3\xae", + 'icirc;' => "\xc3\xae", + 'iexcl' => "\xc2\xa1", + 'iexcl;' => "\xc2\xa1", + 'igrave' => "\xc3\xac", + 'igrave;' => "\xc3\xac", + 'image;' => "\xe2\x84\x91", + 'infin;' => "\xe2\x88\x9e", + 'int;' => "\xe2\x88\xab", + 'iota;' => "\xce\xb9", + 'iquest' => "\xc2\xbf", + 'iquest;' => "\xc2\xbf", + 'isin;' => "\xe2\x88\x88", + 'iuml' => "\xc3\xaf", + 'iuml;' => "\xc3\xaf", + 'kappa;' => "\xce\xba", + 'lArr;' => "\xe2\x87\x90", + 'lambda;' => "\xce\xbb", + 'lang;' => "\xe3\x80\x88", + 'laquo' => "\xc2\xab", + 'laquo;' => "\xc2\xab", + 'larr;' => "\xe2\x86\x90", + 'lceil;' => "\xe2\x8c\x88", + 'ldquo;' => "\xe2\x80\x9c", + 'le;' => "\xe2\x89\xa4", + 'lfloor;' => "\xe2\x8c\x8a", + 'lowast;' => "\xe2\x88\x97", + 'loz;' => "\xe2\x97\x8a", + 'lrm;' => "\xe2\x80\x8e", + 'lsaquo;' => "\xe2\x80\xb9", + 'lsquo;' => "\xe2\x80\x98", + 'lt' => '<', + 'lt;' => '<', + 'macr' => "\xc2\xaf", + 'macr;' => "\xc2\xaf", + 'mdash;' => "\xe2\x80\x94", + 'micro' => "\xc2\xb5", + 'micro;' => "\xc2\xb5", + 'middot' => "\xc2\xb7", + 'middot;' => "\xc2\xb7", + 'minus;' => "\xe2\x88\x92", + 'mu;' => "\xce\xbc", + 'nabla;' => "\xe2\x88\x87", + 'nbsp' => "\xc2\xa0", + 'nbsp;' => "\xc2\xa0", + 'ndash;' => "\xe2\x80\x93", + 'ne;' => "\xe2\x89\xa0", + 'ni;' => "\xe2\x88\x8b", + 'not' => "\xc2\xac", + 'not;' => "\xc2\xac", + 'notin;' => "\xe2\x88\x89", + 'nsub;' => "\xe2\x8a\x84", + 'ntilde' => "\xc3\xb1", + 'ntilde;' => "\xc3\xb1", + 'nu;' => "\xce\xbd", + 'oacute' => "\xc3\xb3", + 'oacute;' => "\xc3\xb3", + 'ocirc' => "\xc3\xb4", + 'ocirc;' => "\xc3\xb4", + 'oelig;' => "\xc5\x93", + 'ograve' => "\xc3\xb2", + 'ograve;' => "\xc3\xb2", + 'oline;' => "\xe2\x80\xbe", + 'omega;' => "\xcf\x89", + 'omicron;' => "\xce\xbf", + 'oplus;' => "\xe2\x8a\x95", + 'or;' => "\xe2\x88\xa8", + 'ordf' => "\xc2\xaa", + 'ordf;' => "\xc2\xaa", + 'ordm' => "\xc2\xba", + 'ordm;' => "\xc2\xba", + 'oslash' => "\xc3\xb8", + 'oslash;' => "\xc3\xb8", + 'otilde' => "\xc3\xb5", + 'otilde;' => "\xc3\xb5", + 'otimes;' => "\xe2\x8a\x97", + 'ouml' => "\xc3\xb6", + 'ouml;' => "\xc3\xb6", + 'para' => "\xc2\xb6", + 'para;' => "\xc2\xb6", + 'part;' => "\xe2\x88\x82", + 'permil;' => "\xe2\x80\xb0", + 'perp;' => "\xe2\x8a\xa5", + 'phi;' => "\xcf\x86", + 'pi;' => "\xcf\x80", + 'piv;' => "\xcf\x96", + 'plusmn' => "\xc2\xb1", + 'plusmn;' => "\xc2\xb1", + 'pound' => "\xc2\xa3", + 'pound;' => "\xc2\xa3", + 'prime;' => "\xe2\x80\xb2", + 'prod;' => "\xe2\x88\x8f", + 'prop;' => "\xe2\x88\x9d", + 'psi;' => "\xcf\x88", + 'quot' => '"', + 'quot;' => '"', + 'rArr;' => "\xe2\x87\x92", + 'radic;' => "\xe2\x88\x9a", + 'rang;' => "\xe3\x80\x89", + 'raquo' => "\xc2\xbb", + 'raquo;' => "\xc2\xbb", + 'rarr;' => "\xe2\x86\x92", + 'rceil;' => "\xe2\x8c\x89", + 'rdquo;' => "\xe2\x80\x9d", + 'real;' => "\xe2\x84\x9c", + 'reg' => "\xc2\xae", + 'reg;' => "\xc2\xae", + 'rfloor;' => "\xe2\x8c\x8b", + 'rho;' => "\xcf\x81", + 'rlm;' => "\xe2\x80\x8f", + 'rsaquo;' => "\xe2\x80\xba", + 'rsquo;' => "\xe2\x80\x99", + 'sbquo;' => "\xe2\x80\x9a", + 'scaron;' => "\xc5\xa1", + 'sdot;' => "\xe2\x8b\x85", + 'sect' => "\xc2\xa7", + 'sect;' => "\xc2\xa7", + 'shy' => "\xc2\xad", + 'shy;' => "\xc2\xad", + 'sigma;' => "\xcf\x83", + 'sigmaf;' => "\xcf\x82", + 'sim;' => "\xe2\x88\xbc", + 'spades;' => "\xe2\x99\xa0", + 'sub;' => "\xe2\x8a\x82", + 'sube;' => "\xe2\x8a\x86", + 'sum;' => "\xe2\x88\x91", + 'sup1' => "\xc2\xb9", + 'sup1;' => "\xc2\xb9", + 'sup2' => "\xc2\xb2", + 'sup2;' => "\xc2\xb2", + 'sup3' => "\xc2\xb3", + 'sup3;' => "\xc2\xb3", + 'sup;' => "\xe2\x8a\x83", + 'supe;' => "\xe2\x8a\x87", + 'szlig' => "\xc3\x9f", + 'szlig;' => "\xc3\x9f", + 'tau;' => "\xcf\x84", + 'there4;' => "\xe2\x88\xb4", + 'theta;' => "\xce\xb8", + 'thetasym;' => "\xcf\x91", + 'thinsp;' => "\xe2\x80\x89", + 'thorn' => "\xc3\xbe", + 'thorn;' => "\xc3\xbe", + 'tilde;' => "\xcb\x9c", + 'times' => "\xc3\x97", + 'times;' => "\xc3\x97", + 'trade;' => "\xe2\x84\xa2", + 'uArr;' => "\xe2\x87\x91", + 'uacute' => "\xc3\xba", + 'uacute;' => "\xc3\xba", + 'uarr;' => "\xe2\x86\x91", + 'ucirc' => "\xc3\xbb", + 'ucirc;' => "\xc3\xbb", + 'ugrave' => "\xc3\xb9", + 'ugrave;' => "\xc3\xb9", + 'uml' => "\xc2\xa8", + 'uml;' => "\xc2\xa8", + 'upsih;' => "\xcf\x92", + 'upsilon;' => "\xcf\x85", + 'uuml' => "\xc3\xbc", + 'uuml;' => "\xc3\xbc", + 'weierp;' => "\xe2\x84\x98", + 'xi;' => "\xce\xbe", + 'yacute' => "\xc3\xbd", + 'yacute;' => "\xc3\xbd", + 'yen' => "\xc2\xa5", + 'yen;' => "\xc2\xa5", + 'yuml' => "\xc3\xbf", + 'yuml;' => "\xc3\xbf", + 'zeta;' => "\xce\xb6", + 'zwj;' => "\xe2\x80\x8d", + 'zwnj;' => "\xe2\x80\x8c" + } + + ENCODINGS = %w[ + ansi_x3.4-1968 + iso-ir-6 + ansi_x3.4-1986 + iso_646.irv:1991 + ascii + iso646-us + us-ascii + us + ibm367 + cp367 + csascii + ks_c_5601-1987 + korean + iso-2022-kr + csiso2022kr + euc-kr + iso-2022-jp + csiso2022jp + iso-2022-jp-2 + iso-ir-58 + chinese + csiso58gb231280 + iso_8859-1:1987 + iso-ir-100 + iso_8859-1 + iso-8859-1 + latin1 + l1 + ibm819 + cp819 + csisolatin1 + iso_8859-2:1987 + iso-ir-101 + iso_8859-2 + iso-8859-2 + latin2 + l2 + csisolatin2 + iso_8859-3:1988 + iso-ir-109 + iso_8859-3 + iso-8859-3 + latin3 + l3 + csisolatin3 + iso_8859-4:1988 + iso-ir-110 + iso_8859-4 + iso-8859-4 + latin4 + l4 + csisolatin4 + iso_8859-6:1987 + iso-ir-127 + iso_8859-6 + iso-8859-6 + ecma-114 + asmo-708 + arabic + csisolatinarabic + iso_8859-7:1987 + iso-ir-126 + iso_8859-7 + iso-8859-7 + elot_928 + ecma-118 + greek + greek8 + csisolatingreek + iso_8859-8:1988 + iso-ir-138 + iso_8859-8 + iso-8859-8 + hebrew + csisolatinhebrew + iso_8859-5:1988 + iso-ir-144 + iso_8859-5 + iso-8859-5 + cyrillic + csisolatincyrillic + iso_8859-9:1989 + iso-ir-148 + iso_8859-9 + iso-8859-9 + latin5 + l5 + csisolatin5 + iso-8859-10 + iso-ir-157 + l6 + iso_8859-10:1992 + csisolatin6 + latin6 + hp-roman8 + roman8 + r8 + ibm037 + cp037 + csibm037 + ibm424 + cp424 + csibm424 + ibm437 + cp437 + 437 + cspc8codepage437 + ibm500 + cp500 + csibm500 + ibm775 + cp775 + cspc775baltic + ibm850 + cp850 + 850 + cspc850multilingual + ibm852 + cp852 + 852 + cspcp852 + ibm855 + cp855 + 855 + csibm855 + ibm857 + cp857 + 857 + csibm857 + ibm860 + cp860 + 860 + csibm860 + ibm861 + cp861 + 861 + cp-is + csibm861 + ibm862 + cp862 + 862 + cspc862latinhebrew + ibm863 + cp863 + 863 + csibm863 + ibm864 + cp864 + csibm864 + ibm865 + cp865 + 865 + csibm865 + ibm866 + cp866 + 866 + csibm866 + ibm869 + cp869 + 869 + cp-gr + csibm869 + ibm1026 + cp1026 + csibm1026 + koi8-r + cskoi8r + koi8-u + big5-hkscs + ptcp154 + csptcp154 + pt154 + cp154 + utf-7 + utf-16be + utf-16le + utf-16 + utf-8 + iso-8859-13 + iso-8859-14 + iso-ir-199 + iso_8859-14:1998 + iso_8859-14 + latin8 + iso-celtic + l8 + iso-8859-15 + iso_8859-15 + iso-8859-16 + iso-ir-226 + iso_8859-16:2001 + iso_8859-16 + latin10 + l10 + gbk + cp936 + ms936 + gb18030 + shift_jis + ms_kanji + csshiftjis + euc-jp + gb2312 + big5 + csbig5 + windows-1250 + windows-1251 + windows-1252 + windows-1253 + windows-1254 + windows-1255 + windows-1256 + windows-1257 + windows-1258 + tis-620 + hz-gb-2312 + ] + +end diff --git a/vendor/plugins/HTML5lib/lib/html5/filters.rb b/vendor/plugins/HTML5lib/lib/html5/filters.rb new file mode 100644 index 00000000..74c7f0e0 --- /dev/null +++ b/vendor/plugins/HTML5lib/lib/html5/filters.rb @@ -0,0 +1 @@ +require 'html5/filters/optionaltags' diff --git a/vendor/plugins/HTML5lib/lib/html5lib/filters/base.rb b/vendor/plugins/HTML5lib/lib/html5/filters/base.rb similarity index 89% rename from vendor/plugins/HTML5lib/lib/html5lib/filters/base.rb rename to vendor/plugins/HTML5lib/lib/html5/filters/base.rb index c1a5c660..0cb023d2 100644 --- a/vendor/plugins/HTML5lib/lib/html5lib/filters/base.rb +++ b/vendor/plugins/HTML5lib/lib/html5/filters/base.rb @@ -1,7 +1,7 @@ require 'delegate' require 'enumerator' -module HTML5lib +module HTML5 module Filters class Base < SimpleDelegator include Enumerable diff --git a/vendor/plugins/HTML5lib/lib/html5lib/filters/inject_meta_charset.rb b/vendor/plugins/HTML5lib/lib/html5/filters/inject_meta_charset.rb similarity index 65% rename from vendor/plugins/HTML5lib/lib/html5lib/filters/inject_meta_charset.rb rename to vendor/plugins/HTML5lib/lib/html5/filters/inject_meta_charset.rb index 00dc980d..c998bf9c 100644 --- a/vendor/plugins/HTML5lib/lib/html5lib/filters/inject_meta_charset.rb +++ b/vendor/plugins/HTML5lib/lib/html5/filters/inject_meta_charset.rb @@ -1,6 +1,6 @@ -require 'html5lib/filters/base' +require 'html5/filters/base' -module HTML5lib +module HTML5 module Filters class InjectMetaCharset < Base def initialize(source, encoding) @@ -21,9 +21,9 @@ module HTML5lib when :EmptyTag if token[:name].downcase == "meta" # replace charset with actual encoding - token[:data].each_with_index do |(name,value),index| + token[:data].each_with_index do |(name, value), index| if name == 'charset' - token[:data][index][1]=@encoding + token[:data][index][1] = @encoding meta_found = true end end @@ -31,7 +31,7 @@ module HTML5lib # replace charset with actual encoding has_http_equiv_content_type = false content_index = -1 - token[:data].each_with_index do |(name,value),i| + token[:data].each_with_index do |(name, value), i| if name.downcase == 'charset' token[:data][i] = ['charset', @encoding] meta_found = true @@ -43,30 +43,27 @@ module HTML5lib end end - if not meta_found - if has_http_equiv_content_type and content_index >= 0 - token[:data][content_index][1] = - 'text/html; charset=%s' % @encoding + if !meta_found + if has_http_equiv_content_type && content_index >= 0 + token[:data][content_index][1] = 'text/html; charset=%s' % @encoding meta_found = true end end - elsif token[:name].downcase == "head" and not meta_found + elsif token[:name].downcase == "head" && !meta_found # insert meta into empty head - yield(:type => :StartTag, :name => "head", :data => token[:data]) - yield(:type => :EmptyTag, :name => "meta", - :data => [["charset", @encoding]]) - yield(:type => :EndTag, :name => "head") + yield :type => :StartTag, :name => "head", :data => token[:data] + yield :type => :EmptyTag, :name => "meta", :data => [["charset", @encoding]] + yield :type => :EndTag, :name => "head" meta_found = true next end when :EndTag - if token[:name].downcase == "head" and pending.any? + if token[:name].downcase == "head" && pending.any? # insert meta into head (if necessary) and flush pending queue yield pending.shift - yield(:type => :EmptyTag, :name => "meta", - :data => [["charset", @encoding]]) if not meta_found + yield :type => :EmptyTag, :name => "meta", :data => [["charset", @encoding]] if !meta_found yield pending.shift while pending.any? meta_found = true state = :post_head diff --git a/vendor/plugins/HTML5lib/lib/html5lib/filters/optionaltags.rb b/vendor/plugins/HTML5lib/lib/html5/filters/optionaltags.rb similarity index 97% rename from vendor/plugins/HTML5lib/lib/html5lib/filters/optionaltags.rb rename to vendor/plugins/HTML5lib/lib/html5/filters/optionaltags.rb index aacf3b73..ba9a11b0 100644 --- a/vendor/plugins/HTML5lib/lib/html5lib/filters/optionaltags.rb +++ b/vendor/plugins/HTML5lib/lib/html5/filters/optionaltags.rb @@ -1,7 +1,7 @@ -require 'html5lib/constants' -require 'html5lib/filters/base' +require 'html5/constants' +require 'html5/filters/base' -module HTML5lib +module HTML5 module Filters class OptionalTagFilter < Base @@ -75,8 +75,7 @@ module HTML5lib if type == :StartTag # omit the thead and tfoot elements' end tag when they are # immediately followed by a tbody element. See is_optional_end. - if previous and previous[:type] == :EndTag and \ - %w(tbody thead tfoot).include?(previous[:name]) + if previous and previous[:type] == :EndTag && %w(tbody thead tfoot).include?(previous[:name]) return false end @@ -85,7 +84,7 @@ module HTML5lib return false end end - return false + return false end def is_optional_end(tagname, nexttok) diff --git a/vendor/plugins/HTML5lib/lib/html5lib/filters/sanitizer.rb b/vendor/plugins/HTML5lib/lib/html5/filters/sanitizer.rb similarity index 73% rename from vendor/plugins/HTML5lib/lib/html5lib/filters/sanitizer.rb rename to vendor/plugins/HTML5lib/lib/html5/filters/sanitizer.rb index db9a12e0..8e25f594 100644 --- a/vendor/plugins/HTML5lib/lib/html5lib/filters/sanitizer.rb +++ b/vendor/plugins/HTML5lib/lib/html5/filters/sanitizer.rb @@ -1,7 +1,7 @@ -require 'html5lib/filters/base' -require 'html5lib/sanitizer' +require 'html5/filters/base' +require 'html5/sanitizer' -module HTML5lib +module HTML5 module Filters class HTMLSanitizeFilter < Base include HTMLSanitizeModule diff --git a/vendor/plugins/HTML5lib/lib/html5lib/filters/whitespace.rb b/vendor/plugins/HTML5lib/lib/html5/filters/whitespace.rb similarity index 84% rename from vendor/plugins/HTML5lib/lib/html5lib/filters/whitespace.rb rename to vendor/plugins/HTML5lib/lib/html5/filters/whitespace.rb index 3b85fd7b..18b07b59 100644 --- a/vendor/plugins/HTML5lib/lib/html5lib/filters/whitespace.rb +++ b/vendor/plugins/HTML5lib/lib/html5/filters/whitespace.rb @@ -1,7 +1,7 @@ -require 'html5lib/constants' -require 'html5lib/filters/base' +require 'html5/constants' +require 'html5/filters/base' -module HTML5lib +module HTML5 module Filters class WhitespaceFilter < Base @@ -21,7 +21,7 @@ module HTML5lib preserve -= 1 if preserve > 0 when :SpaceCharacters - next if preserve == 0 + token[:data] = " " if preserve == 0 && token[:data] when :Characters token[:data] = token[:data].sub(SPACES,' ') if preserve == 0 diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser.rb similarity index 63% rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser.rb rename to vendor/plugins/HTML5lib/lib/html5/html5parser.rb index bf48930a..b20238b8 100644 --- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser.rb +++ b/vendor/plugins/HTML5lib/lib/html5/html5parser.rb @@ -1,246 +1,248 @@ -require 'html5lib/constants' -require 'html5lib/tokenizer' -require 'html5lib/treebuilders/rexml' - -Dir.glob(File.join(File.dirname(__FILE__), 'html5parser', '*_phase.rb')).each do |path| - require 'html5lib/html5parser/' + File.basename(path) -end - -module HTML5lib - - # Error in parsed document - class ParseError < Exception; end - class AssertionError < Exception; end - - # HTML parser. Generates a tree structure from a stream of (possibly malformed) HTML - # - class HTMLParser - - attr_accessor :phase, :firstStartTag, :innerHTML, :lastPhase, :insertFromTable - - attr_reader :phases, :tokenizer, :tree, :errors - - def self.parse(stream, options = {}) - encoding = options.delete(:encoding) - new(options).parse(stream,encoding) - end - - def self.parseFragment(stream, options = {}) - container = options.delete(:container) || 'div' - encoding = options.delete(:encoding) - new(options).parseFragment(stream,container,encoding) - end - - @@phases = %w( initial rootElement beforeHead inHead afterHead inBody inTable inCaption - inColumnGroup inTableBody inRow inCell inSelect afterBody inFrameset afterFrameset trailingEnd ) - - # :strict - raise an exception when a parse error is encountered - # :tree - a treebuilder class controlling the type of tree that will be - # returned. Built in treebuilders can be accessed through - # HTML5lib::TreeBuilders[treeType] - def initialize(options = {}) - @strict = false - @errors = [] - - @tokenizer = HTMLTokenizer - @tree = TreeBuilders::REXML::TreeBuilder - - options.each { |name, value| instance_variable_set("@#{name}", value) } - - @tree = @tree.new - - @phases = @@phases.inject({}) do |phases, phase_name| - phase_class_name = phase_name.sub(/(.)/) { $1.upcase } + 'Phase' - phases[phase_name.to_sym] = HTML5lib.const_get(phase_class_name).new(self, @tree) - phases - end - end - - def _parse(stream, innerHTML, encoding, container = 'div') - @tree.reset - @firstStartTag = false - @errors = [] - - @tokenizer = @tokenizer.class unless Class === @tokenizer - @tokenizer = @tokenizer.new(stream, :encoding => encoding, - :parseMeta => !innerHTML) - - if innerHTML - case @innerHTML = container.downcase - when 'title', 'textarea' - @tokenizer.contentModelFlag = :RCDATA - when 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript' - @tokenizer.contentModelFlag = :CDATA - when 'plaintext' - @tokenizer.contentModelFlag = :PLAINTEXT - else - # contentModelFlag already is PCDATA - #@tokenizer.contentModelFlag = :PCDATA - end - - @phase = @phases[:rootElement] - @phase.insertHtmlElement - resetInsertionMode - else - @innerHTML = false - @phase = @phases[:initial] - end - - # We only seem to have InBodyPhase testcases where the following is - # relevant ... need others too - @lastPhase = nil - - # XXX This is temporary for the moment so there isn't any other - # changes needed for the parser to work with the iterable tokenizer - @tokenizer.each do |token| - token = normalizeToken(token) - - method = 'process%s' % token[:type] - - case token[:type] - when :Characters, :SpaceCharacters, :Comment - @phase.send method, token[:data] - when :StartTag - @phase.send method, token[:name], token[:data] - when :EndTag - @phase.send method, token[:name] - when :Doctype - @phase.send method, token[:name], token[:publicId], - token[:systemId], token[:correct] - else - parseError(token[:data]) - end - end - - # When the loop finishes it's EOF - @phase.processEOF - end - - # Parse a HTML document into a well-formed tree - # - # stream - a filelike object or string containing the HTML to be parsed - # - # The optional encoding parameter must be a string that indicates - # the encoding. If specified, that encoding will be used, - # regardless of any BOM or later declaration (such as in a meta - # element) - def parse(stream, encoding=nil) - _parse(stream, false, encoding) - return @tree.getDocument - end - - # Parse a HTML fragment into a well-formed tree fragment - - # container - name of the element we're setting the innerHTML property - # if set to nil, default to 'div' - # - # stream - a filelike object or string containing the HTML to be parsed - # - # The optional encoding parameter must be a string that indicates - # the encoding. If specified, that encoding will be used, - # regardless of any BOM or later declaration (such as in a meta - # element) - def parseFragment(stream, container='div', encoding=nil) - _parse(stream, true, encoding, container) - return @tree.getFragment - end - - def parseError(data = 'XXX ERROR MESSAGE NEEDED') - # XXX The idea is to make data mandatory. - @errors.push([@tokenizer.stream.position, data]) - raise ParseError if @strict - end - - # HTML5 specific normalizations to the token stream - def normalizeToken(token) - - if token[:type] == :EmptyTag - # When a solidus (/) is encountered within a tag name what happens - # depends on whether the current tag name matches that of a void - # element. If it matches a void element atheists did the wrong - # thing and if it doesn't it's wrong for everyone. - - unless VOID_ELEMENTS.include?(token[:name]) - parseError(_('Solidus (/) incorrectly placed in tag.')) - end - - token[:type] = :StartTag - end - - if token[:type] == :StartTag - token[:name] = token[:name].tr(ASCII_UPPERCASE,ASCII_LOWERCASE) - - # We need to remove the duplicate attributes and convert attributes - # to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"} - - unless token[:data].empty? - data = token[:data].reverse.map { |attr, value| [attr.tr(ASCII_UPPERCASE, ASCII_LOWERCASE), value] } - token[:data] = Hash[*data.flatten] - end - - elsif token[:type] == :EndTag - parseError(_('End tag contains unexpected attributes.')) unless token[:data].empty? - token[:name] = token[:name].downcase - end - - return token - end - - @@new_modes = { - 'select' => :inSelect, - 'td' => :inCell, - 'th' => :inCell, - 'tr' => :inRow, - 'tbody' => :inTableBody, - 'thead' => :inTableBody, - 'tfoot' => :inTableBody, - 'caption' => :inCaption, - 'colgroup' => :inColumnGroup, - 'table' => :inTable, - 'head' => :inBody, - 'body' => :inBody, - 'frameset' => :inFrameset - } - - def resetInsertionMode - # The name of this method is mostly historical. (It's also used in the - # specification.) - last = false - - @tree.openElements.reverse.each do |node| - nodeName = node.name - - if node == @tree.openElements[0] - last = true - unless ['td', 'th'].include?(nodeName) - # XXX - # assert @innerHTML - nodeName = @innerHTML - end - end - - # Check for conditions that should only happen in the innerHTML - # case - if ['select', 'colgroup', 'head', 'frameset'].include?(nodeName) - # XXX - # assert @innerHTML - end - - if @@new_modes.has_key?(nodeName) - @phase = @phases[@@new_modes[nodeName]] - elsif nodeName == 'html' - @phase = @phases[@tree.headPointer.nil?? :beforeHead : :afterHead] - elsif last - @phase = @phases[:inBody] - else - next - end - - break - end - end - - def _(string); string; end - end - -end +require 'html5/constants' +require 'html5/tokenizer' +require 'html5/treebuilders/rexml' + +Dir.glob(File.join(File.dirname(__FILE__), 'html5parser', '*_phase.rb')).each do |path| + require 'html5/html5parser/' + File.basename(path) +end + +module HTML5 + + # Error in parsed document + class ParseError < Exception; end + class AssertionError < Exception; end + + # HTML parser. Generates a tree structure from a stream of (possibly malformed) HTML + # + class HTMLParser + + attr_accessor :phase, :first_start_tag, :inner_html, :last_phase, :insert_from_table + + attr_reader :phases, :tokenizer, :tree, :errors + + def self.parse(stream, options = {}) + encoding = options.delete(:encoding) + new(options).parse(stream,encoding) + end + + def self.parse_fragment(stream, options = {}) + container = options.delete(:container) || 'div' + encoding = options.delete(:encoding) + new(options).parse_fragment(stream, container, encoding) + end + + @@phases = %w( initial rootElement beforeHead inHead afterHead inBody inTable inCaption + inColumnGroup inTableBody inRow inCell inSelect afterBody inFrameset afterFrameset trailingEnd ) + + # :strict - raise an exception when a parse error is encountered + # :tree - a treebuilder class controlling the type of tree that will be + # returned. Built in treebuilders can be accessed through + # HTML5::TreeBuilders[treeType] + def initialize(options = {}) + @strict = false + @errors = [] + + @tokenizer = HTMLTokenizer + @tree = TreeBuilders::REXML::TreeBuilder + + options.each {|name, value| instance_variable_set("@#{name}", value) } + @lowercase_attr_name = nil unless instance_variables.include?("@lowercase_attr_name") + @lowercase_element_name = nil unless instance_variables.include?("@lowercase_element_name") + + @tree = @tree.new + + @phases = @@phases.inject({}) do |phases, phase_name| + phase_class_name = phase_name.sub(/(.)/) { $1.upcase } + 'Phase' + phases[phase_name.to_sym] = HTML5.const_get(phase_class_name).new(self, @tree) + phases + end + end + + def _parse(stream, inner_html, encoding, container = 'div') + @tree.reset + @first_start_tag = false + @errors = [] + + @tokenizer = @tokenizer.class unless Class === @tokenizer + @tokenizer = @tokenizer.new(stream, :encoding => encoding, + :parseMeta => !inner_html, :lowercase_attr_name => @lowercase_attr_name, :lowercase_element_name => @lowercase_element_name) + + if inner_html + case @inner_html = container.downcase + when 'title', 'textarea' + @tokenizer.content_model_flag = :RCDATA + when 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript' + @tokenizer.content_model_flag = :CDATA + when 'plaintext' + @tokenizer.content_model_flag = :PLAINTEXT + else + # content_model_flag already is PCDATA + #@tokenizer.content_model_flag = :PCDATA + end + + @phase = @phases[:rootElement] + @phase.insert_html_element + reset_insertion_mode + else + @inner_html = false + @phase = @phases[:initial] + end + + # We only seem to have InBodyPhase testcases where the following is + # relevant ... need others too + @last_phase = nil + + # XXX This is temporary for the moment so there isn't any other + # changes needed for the parser to work with the iterable tokenizer + @tokenizer.each do |token| + token = normalize_token(token) + + method = 'process%s' % token[:type] + + case token[:type] + when :Characters, :SpaceCharacters, :Comment + @phase.send method, token[:data] + when :StartTag + @phase.send method, token[:name], token[:data] + when :EndTag + @phase.send method, token[:name] + when :Doctype + @phase.send method, token[:name], token[:publicId], + token[:systemId], token[:correct] + else + parse_error(token[:data]) + end + end + + # When the loop finishes it's EOF + @phase.process_eof + end + + # Parse a HTML document into a well-formed tree + # + # stream - a filelike object or string containing the HTML to be parsed + # + # The optional encoding parameter must be a string that indicates + # the encoding. If specified, that encoding will be used, + # regardless of any BOM or later declaration (such as in a meta + # element) + def parse(stream, encoding=nil) + _parse(stream, false, encoding) + @tree.get_document + end + + # Parse a HTML fragment into a well-formed tree fragment + + # container - name of the element we're setting the inner_html property + # if set to nil, default to 'div' + # + # stream - a filelike object or string containing the HTML to be parsed + # + # The optional encoding parameter must be a string that indicates + # the encoding. If specified, that encoding will be used, + # regardless of any BOM or later declaration (such as in a meta + # element) + def parse_fragment(stream, container='div', encoding=nil) + _parse(stream, true, encoding, container) + @tree.get_fragment + end + + def parse_error(data = 'XXX ERROR MESSAGE NEEDED') + # XXX The idea is to make data mandatory. + @errors.push([@tokenizer.stream.position, data]) + raise ParseError if @strict + end + + # HTML5 specific normalizations to the token stream + def normalize_token(token) + + if token[:type] == :EmptyTag + # When a solidus (/) is encountered within a tag name what happens + # depends on whether the current tag name matches that of a void + # element. If it matches a void element atheists did the wrong + # thing and if it doesn't it's wrong for everyone. + + unless VOID_ELEMENTS.include?(token[:name]) + parse_error(_('Solidus (/) incorrectly placed in tag.')) + end + + token[:type] = :StartTag + end + + if token[:type] == :StartTag + token[:name] = token[:name].downcase + + # We need to remove the duplicate attributes and convert attributes + # to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"} + + unless token[:data].empty? + data = token[:data].reverse.map {|attr, value| [attr.downcase, value] } + token[:data] = Hash[*data.flatten] + end + + elsif token[:type] == :EndTag + parse_error(_('End tag contains unexpected attributes.')) unless token[:data].empty? + token[:name] = token[:name].downcase + end + + token + end + + @@new_modes = { + 'select' => :inSelect, + 'td' => :inCell, + 'th' => :inCell, + 'tr' => :inRow, + 'tbody' => :inTableBody, + 'thead' => :inTableBody, + 'tfoot' => :inTableBody, + 'caption' => :inCaption, + 'colgroup' => :inColumnGroup, + 'table' => :inTable, + 'head' => :inBody, + 'body' => :inBody, + 'frameset' => :inFrameset + } + + def reset_insertion_mode + # The name of this method is mostly historical. (It's also used in the + # specification.) + last = false + + @tree.open_elements.reverse.each do |node| + node_name = node.name + + if node == @tree.open_elements.first + last = true + unless ['td', 'th'].include?(node_name) + # XXX + # assert @inner_html + node_name = @inner_html + end + end + + # Check for conditions that should only happen in the inner_html + # case + if ['select', 'colgroup', 'head', 'frameset'].include?(node_name) + # XXX + # assert @inner_html + end + + if @@new_modes.has_key?(node_name) + @phase = @phases[@@new_modes[node_name]] + elsif node_name == 'html' + @phase = @phases[@tree.head_pointer.nil?? :beforeHead : :afterHead] + elsif last + @phase = @phases[:inBody] + else + next + end + + break + end + end + + def _(string); string; end + end + +end diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_body_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/after_body_phase.rb similarity index 59% rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_body_phase.rb rename to vendor/plugins/HTML5lib/lib/html5/html5parser/after_body_phase.rb index 27778ef1..5d535423 100644 --- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_body_phase.rb +++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/after_body_phase.rb @@ -1,6 +1,6 @@ -require 'html5lib/html5parser/phase' +require 'html5/html5parser/phase' -module HTML5lib +module HTML5 class AfterBodyPhase < Phase handle_end 'html' @@ -8,36 +8,36 @@ module HTML5lib def processComment(data) # This is needed because data is to be appended to the element # here and not to whatever is currently open. - @tree.insertComment(data, @tree.openElements[0]) + @tree.insert_comment(data, @tree.open_elements.first) end def processCharacters(data) - @parser.parseError(_('Unexpected non-space characters in the after body phase.')) + parse_error(_('Unexpected non-space characters in the after body phase.')) @parser.phase = @parser.phases[:inBody] @parser.phase.processCharacters(data) end def processStartTag(name, attributes) - @parser.parseError(_("Unexpected start tag token (#{name}) in the after body phase.")) + parse_error(_("Unexpected start tag token (#{name}) in the after body phase.")) @parser.phase = @parser.phases[:inBody] @parser.phase.processStartTag(name, attributes) end def endTagHtml(name) - if @parser.innerHTML - @parser.parseError + if @parser.inner_html + parse_error else # XXX: This may need to be done, not sure - # Don't set lastPhase to the current phase but to the inBody phase + # Don't set last_phase to the current phase but to the inBody phase # instead. No need for extra parse errors if there's something after . # Try "XX" for instance. - @parser.lastPhase = @parser.phase - @parser.phase = @parser.phases[:trailingEnd] + @parser.last_phase = @parser.phase + @parser.phase = @parser.phases[:trailingEnd] end end def endTagOther(name) - @parser.parseError(_("Unexpected end tag token (#{name}) in the after body phase.")) + parse_error(_("Unexpected end tag token (#{name}) in the after body phase.")) @parser.phase = @parser.phases[:inBody] @parser.phase.processEndTag(name) end diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_frameset_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/after_frameset_phase.rb similarity index 50% rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_frameset_phase.rb rename to vendor/plugins/HTML5lib/lib/html5/html5parser/after_frameset_phase.rb index 376c5f38..0445f3c1 100644 --- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_frameset_phase.rb +++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/after_frameset_phase.rb @@ -1,6 +1,6 @@ -require 'html5lib/html5parser/phase' +require 'html5/html5parser/phase' -module HTML5lib +module HTML5 class AfterFramesetPhase < Phase # http://www.whatwg.org/specs/web-apps/current-work/#after3 @@ -10,7 +10,7 @@ module HTML5lib handle_end 'html' def processCharacters(data) - @parser.parseError(_('Unexpected non-space characters in the after frameset phase. Ignored.')) + parse_error(_('Unexpected non-space characters in the after frameset phase. Ignored.')) end def startTagNoframes(name, attributes) @@ -18,16 +18,16 @@ module HTML5lib end def startTagOther(name, attributes) - @parser.parseError(_("Unexpected start tag (#{name}) in the after frameset phase. Ignored.")) + parse_error(_("Unexpected start tag (#{name}) in the after frameset phase. Ignored.")) end def endTagHtml(name) - @parser.lastPhase = @parser.phase - @parser.phase = @parser.phases[:trailingEnd] + @parser.last_phase = @parser.phase + @parser.phase = @parser.phases[:trailingEnd] end def endTagOther(name) - @parser.parseError(_("Unexpected end tag (#{name}) in the after frameset phase. Ignored.")) + parse_error(_("Unexpected end tag (#{name}) in the after frameset phase. Ignored.")) end end diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_head_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/after_head_phase.rb similarity index 66% rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_head_phase.rb rename to vendor/plugins/HTML5lib/lib/html5/html5parser/after_head_phase.rb index 37c8bf6b..2f48946b 100644 --- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_head_phase.rb +++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/after_head_phase.rb @@ -1,48 +1,48 @@ -require 'html5lib/html5parser/phase' +require 'html5/html5parser/phase' -module HTML5lib +module HTML5 class AfterHeadPhase < Phase - + handle_start 'html', 'body', 'frameset', %w( base link meta script style title ) => 'FromHead' - def processEOF - anythingElse - @parser.phase.processEOF + def process_eof + anything_else + @parser.phase.process_eof end def processCharacters(data) - anythingElse + anything_else @parser.phase.processCharacters(data) end def startTagBody(name, attributes) - @tree.insertElement(name, attributes) + @tree.insert_element(name, attributes) @parser.phase = @parser.phases[:inBody] end def startTagFrameset(name, attributes) - @tree.insertElement(name, attributes) + @tree.insert_element(name, attributes) @parser.phase = @parser.phases[:inFrameset] end def startTagFromHead(name, attributes) - @parser.parseError(_("Unexpected start tag (#{name}) that can be in head. Moved.")) + parse_error(_("Unexpected start tag (#{name}) that can be in head. Moved.")) @parser.phase = @parser.phases[:inHead] @parser.phase.processStartTag(name, attributes) end def startTagOther(name, attributes) - anythingElse + anything_else @parser.phase.processStartTag(name, attributes) end def processEndTag(name) - anythingElse + anything_else @parser.phase.processEndTag(name) end - def anythingElse - @tree.insertElement('body', {}) + def anything_else + @tree.insert_element('body', {}) @parser.phase = @parser.phases[:inBody] end diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/before_head_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/before_head_phase.rb similarity index 63% rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/before_head_phase.rb rename to vendor/plugins/HTML5lib/lib/html5/html5parser/before_head_phase.rb index 98a9d023..79785d07 100644 --- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/before_head_phase.rb +++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/before_head_phase.rb @@ -1,15 +1,15 @@ -require 'html5lib/html5parser/phase' +require 'html5/html5parser/phase' -module HTML5lib +module HTML5 class BeforeHeadPhase < Phase handle_start 'html', 'head' - handle_end %w( html head body br ) => 'ImplyHead' + handle_end %w( html head body br p ) => 'ImplyHead' - def processEOF + def process_eof startTagHead('head', {}) - @parser.phase.processEOF + @parser.phase.process_eof end def processCharacters(data) @@ -18,8 +18,8 @@ module HTML5lib end def startTagHead(name, attributes) - @tree.insertElement(name, attributes) - @tree.headPointer = @tree.openElements[-1] + @tree.insert_element(name, attributes) + @tree.head_pointer = @tree.open_elements[-1] @parser.phase = @parser.phases[:inHead] end @@ -34,7 +34,7 @@ module HTML5lib end def endTagOther(name) - @parser.parseError(_("Unexpected end tag (#{name}) after the (implied) root element.")) + parse_error(_("Unexpected end tag (#{name}) after the (implied) root element.")) end end diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_body_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_body_phase.rb similarity index 65% rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_body_phase.rb rename to vendor/plugins/HTML5lib/lib/html5/html5parser/in_body_phase.rb index 57720292..ca59bbf7 100644 --- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_body_phase.rb +++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_body_phase.rb @@ -1,6 +1,6 @@ -require 'html5lib/html5parser/phase' +require 'html5/html5parser/phase' -module HTML5lib +module HTML5 class InBodyPhase < Phase # http://www.whatwg.org/specs/web-apps/current-work/#in-body @@ -51,25 +51,40 @@ module HTML5lib # for special handling of whitespace in
       @processSpaceCharactersDropNewline = false
+      if $-w
+        $-w = false
+        alias processSpaceCharactersNonPre processSpaceCharacters
+        $-w = true
+      else
+        alias processSpaceCharactersNonPre processSpaceCharacters
+      end
     end
 
     def processSpaceCharactersDropNewline(data)
-      #Sometimes (start of 
 blocks) we want to drop leading newlines
-      @processSpaceCharactersDropNewline = false
-      if (data.length > 0 and data[0] == ?\n and 
-        %w[pre textarea].include?(@tree.openElements[-1].name) and
-        not @tree.openElements[-1].hasContent)
+      # #Sometimes (start of 
 blocks) we want to drop leading newlines
+
+      if $-w
+        $-w = false
+        alias processSpaceCharacters processSpaceCharactersNonPre
+        $-w = true
+      else
+        alias processSpaceCharacters processSpaceCharactersNonPre
+      end
+
+      if (data.length > 0 and data[0] == ?\n && 
+        %w[pre textarea].include?(@tree.open_elements.last.name) && !@tree.open_elements.last.hasContent)
         data = data[1..-1]
       end
-      @tree.insertText(data) if data.length > 0
+
+      if data.length > 0
+        @tree.reconstructActiveFormattingElements
+        @tree.insertText(data)
+      end
     end
 
     def processSpaceCharacters(data)
-      if @processSpaceCharactersDropNewline
-        processSpaceCharactersDropNewline(data)
-      else
-        super(data)
-      end
+      @tree.reconstructActiveFormattingElements()
+      @tree.insertText(data)
     end
 
     def processCharacters(data)
@@ -85,20 +100,19 @@ module HTML5lib
     end
 
     def startTagTitle(name, attributes)
-      @parser.parseError(_("Unexpected start tag (#{name}) that belongs in the head. Moved."))
+      parse_error(_("Unexpected start tag (#{name}) that belongs in the head. Moved."))
       @parser.phases[:inHead].processStartTag(name, attributes)
     end
 
     def startTagBody(name, attributes)
-      @parser.parseError(_('Unexpected start tag (body).'))
+      parse_error(_('Unexpected start tag (body).'))
 
-      if (@tree.openElements.length == 1 or
-        @tree.openElements[1].name != 'body')
-        assert @parser.innerHTML
+      if (@tree.open_elements.length == 1 || @tree.open_elements[1].name != 'body')
+        assert @parser.inner_html
       else
         attributes.each do |attr, value|
-          unless @tree.openElements[1].attributes.has_key?(attr)
-            @tree.openElements[1].attributes[attr] = value
+          unless @tree.open_elements[1].attributes.has_key?(attr)
+            @tree.open_elements[1].attributes[attr] = value
           end
         end
       end
@@ -106,17 +120,17 @@ module HTML5lib
 
     def startTagCloseP(name, attributes)
       endTagP('p') if in_scope?('p')
-      @tree.insertElement(name, attributes)
+      @tree.insert_element(name, attributes)
       @processSpaceCharactersDropNewline = true if name == 'pre'
     end
 
     def startTagForm(name, attributes)
       if @tree.formPointer
-        @parser.parseError('Unexpected start tag (form). Ignored.')
+        parse_error(_('Unexpected start tag (form). Ignored.'))
       else
         endTagP('p') if in_scope?('p')
-        @tree.insertElement(name, attributes)
-        @tree.formPointer = @tree.openElements[-1]
+        @tree.insert_element(name, attributes)
+        @tree.formPointer = @tree.open_elements[-1]
       end
     end
 
@@ -125,31 +139,28 @@ module HTML5lib
       stopNames = {'li' => ['li'], 'dd' => ['dd', 'dt'], 'dt' => ['dd', 'dt']}
       stopName = stopNames[name]
 
-      @tree.openElements.reverse.each_with_index do |node, i|
+      @tree.open_elements.reverse.each_with_index do |node, i|
         if stopName.include?(node.name)
-          poppedNodes = (0..i).collect { @tree.openElements.pop }
+          poppedNodes = (0..i).collect { @tree.open_elements.pop }
           if i >= 1
-            @parser.parseError("Missing end tag%s (%s)" % [
-              (i>1 ? 's' : ''),
-              poppedNodes.reverse.map {|item| item.name}.join(', ')])
+            parse_error(_("Missing end tag%s (%s)" % [(i>1 ? 's' : ''), poppedNodes.reverse.map{|item| item.name}.join(', ')]))
           end
           break
         end
 
         # Phrasing elements are all non special, non scoping, non
         # formatting elements
-        break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) and
-          not ['address', 'div'].include?(node.name))
+        break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) && !%w[address div].include?(node.name))
       end
 
       # Always insert an 
  • element. - @tree.insertElement(name, attributes) + @tree.insert_element(name, attributes) end def startTagPlaintext(name, attributes) endTagP('p') if in_scope?('p') - @tree.insertElement(name, attributes) - @parser.tokenizer.contentModelFlag = :PLAINTEXT + @tree.insert_element(name, attributes) + @parser.tokenizer.content_model_flag = :PLAINTEXT end def startTagHeading(name, attributes) @@ -158,7 +169,7 @@ module HTML5lib # Uncomment the following for IE7 behavior: # HEADING_ELEMENTS.each do |element| # if in_scope?(element) - # @parser.parseError(_("Unexpected start tag (#{name}).")) + # parse_error(_("Unexpected start tag (#{name}).")) # # remove_open_elements_until do |element| # HEADING_ELEMENTS.include?(element.name) @@ -167,14 +178,14 @@ module HTML5lib # break # end # end - @tree.insertElement(name, attributes) + @tree.insert_element(name, attributes) end def startTagA(name, attributes) if afeAElement = @tree.elementInActiveFormattingElements('a') - @parser.parseError(_('Unexpected start tag (a) implies end tag (a).')) + parse_error(_('Unexpected start tag (a) implies end tag (a).')) endTagFormatting('a') - @tree.openElements.delete(afeAElement) if @tree.openElements.include?(afeAElement) + @tree.open_elements.delete(afeAElement) if @tree.open_elements.include?(afeAElement) @tree.activeFormattingElements.delete(afeAElement) if @tree.activeFormattingElements.include?(afeAElement) end @tree.reconstructActiveFormattingElements @@ -188,77 +199,82 @@ module HTML5lib def startTagNobr(name, attributes) @tree.reconstructActiveFormattingElements - processEndTag('nobr') if in_scope?('nobr') + if in_scope?('nobr') + parse_error(_('Unexpected start tag (nobr) implies end tag (nobr).')) + processEndTag('nobr') + # XXX Need tests that trigger the following + @tree.reconstructActiveFormattingElements + end addFormattingElement(name, attributes) end def startTagButton(name, attributes) if in_scope?('button') - @parser.parseError(_('Unexpected start tag (button) implied end tag (button).')) + parse_error(_('Unexpected start tag (button) implied end tag (button).')) processEndTag('button') @parser.phase.processStartTag(name, attributes) else @tree.reconstructActiveFormattingElements - @tree.insertElement(name, attributes) + @tree.insert_element(name, attributes) @tree.activeFormattingElements.push(Marker) end end def startTagMarqueeObject(name, attributes) @tree.reconstructActiveFormattingElements - @tree.insertElement(name, attributes) + @tree.insert_element(name, attributes) @tree.activeFormattingElements.push(Marker) end def startTagXmp(name, attributes) @tree.reconstructActiveFormattingElements - @tree.insertElement(name, attributes) - @parser.tokenizer.contentModelFlag = :CDATA + @tree.insert_element(name, attributes) + @parser.tokenizer.content_model_flag = :CDATA end def startTagTable(name, attributes) processEndTag('p') if in_scope?('p') - @tree.insertElement(name, attributes) + @tree.insert_element(name, attributes) @parser.phase = @parser.phases[:inTable] end def startTagVoidFormatting(name, attributes) @tree.reconstructActiveFormattingElements - @tree.insertElement(name, attributes) - @tree.openElements.pop + @tree.insert_element(name, attributes) + @tree.open_elements.pop end def startTagHr(name, attributes) endTagP('p') if in_scope?('p') - @tree.insertElement(name, attributes) - @tree.openElements.pop + @tree.insert_element(name, attributes) + @tree.open_elements.pop end def startTagImage(name, attributes) # No really... - @parser.parseError(_('Unexpected start tag (image). Treated as img.')) + parse_error(_('Unexpected start tag (image). Treated as img.')) processStartTag('img', attributes) end def startTagInput(name, attributes) @tree.reconstructActiveFormattingElements - @tree.insertElement(name, attributes) + @tree.insert_element(name, attributes) if @tree.formPointer # XXX Not exactly sure what to do here - # @tree.openElements[-1].form = @tree.formPointer + # @tree.open_elements[-1].form = @tree.formPointer end - @tree.openElements.pop + @tree.open_elements.pop end def startTagIsindex(name, attributes) - @parser.parseError("Unexpected start tag isindex. Don't use it!") + parse_error(_("Unexpected start tag isindex. Don't use it!")) return if @tree.formPointer processStartTag('form', {}) processStartTag('hr', {}) processStartTag('p', {}) processStartTag('label', {}) # XXX Localization ... - processCharacters('This is a searchable index. Insert your search keywords here:') + processCharacters('This is a searchable index. Insert your search keywords here: ') attributes['name'] = 'isindex' attrs = attributes.to_a processStartTag('input', attributes) @@ -270,20 +286,21 @@ module HTML5lib def startTagTextarea(name, attributes) # XXX Form element pointer checking here as well... - @tree.insertElement(name, attributes) - @parser.tokenizer.contentModelFlag = :RCDATA + @tree.insert_element(name, attributes) + @parser.tokenizer.content_model_flag = :RCDATA @processSpaceCharactersDropNewline = true + alias processSpaceCharacters processSpaceCharactersDropNewline end # iframe, noembed noframes, noscript(if scripting enabled) def startTagCdata(name, attributes) - @tree.insertElement(name, attributes) - @parser.tokenizer.contentModelFlag = :CDATA + @tree.insert_element(name, attributes) + @parser.tokenizer.content_model_flag = :CDATA end def startTagSelect(name, attributes) @tree.reconstructActiveFormattingElements - @tree.insertElement(name, attributes) + @tree.insert_element(name, attributes) @parser.phase = @parser.phases[:inSelect] end @@ -293,7 +310,7 @@ module HTML5lib # "caption", "col", "colgroup", "frame", "frameset", "head", # "option", "optgroup", "tbody", "td", "tfoot", "th", "thead", # "tr", "noscript" - @parser.parseError(_("Unexpected start tag (#{name}). Ignored.")) + parse_error(_("Unexpected start tag (#{name}). Ignored.")) end def startTagNew(name, attributes) @@ -306,33 +323,38 @@ module HTML5lib def startTagOther(name, attributes) @tree.reconstructActiveFormattingElements - @tree.insertElement(name, attributes) + @tree.insert_element(name, attributes) end def endTagP(name) @tree.generateImpliedEndTags('p') if in_scope?('p') - @parser.parseError('Unexpected end tag (p).') unless @tree.openElements[-1].name == 'p' - @tree.openElements.pop while in_scope?('p') + parse_error(_('Unexpected end tag (p).')) unless @tree.open_elements.last.name == 'p' + if in_scope?('p') + @tree.open_elements.pop while in_scope?('p') + else + startTagCloseP('p', {}) + endTagP('p') + end end def endTagBody(name) # XXX Need to take open

    tags into account here. We shouldn't imply #

    but we should not throw a parse error either. Specification is # likely to be updated. - unless @tree.openElements[1].name == 'body' - # innerHTML case - @parser.parseError + unless @tree.open_elements[1].name == 'body' + # inner_html case + parse_error return end - unless @tree.openElements[-1].name == 'body' - @parser.parseError(_("Unexpected end tag (body). Missing end tag (#{@tree.openElements[-1].name}).")) + unless @tree.open_elements.last.name == 'body' + parse_error(_("Unexpected end tag (body). Missing end tag (#{@tree.open_elements[-1].name}).")) end @parser.phase = @parser.phases[:afterBody] end def endTagHtml(name) endTagBody(name) - @parser.phase.processEndTag(name) unless @parser.innerHTML + @parser.phase.processEndTag(name) unless @parser.inner_html end def endTagBlock(name) @@ -341,8 +363,8 @@ module HTML5lib @tree.generateImpliedEndTags if in_scope?(name) - unless @tree.openElements[-1].name == name - @parser.parseError(("End tag (#{name}) seen too early. Expected other end tag.")) + unless @tree.open_elements.last.name == name + parse_error(_("End tag (#{name}) seen too early. Expected other end tag.")) end if in_scope?(name) @@ -351,18 +373,23 @@ module HTML5lib end def endTagForm(name) - endTagBlock(name) + if in_scope?(name) + @tree.generateImpliedEndTags + end + if @tree.open_elements.last.name != name + parse_error(_("End tag (form) seen too early. Ignored.")) + else + @tree.open_elements.pop + end @tree.formPointer = nil end def endTagListItem(name) # AT Could merge this with the Block case - if in_scope?(name) - @tree.generateImpliedEndTags(name) + @tree.generateImpliedEndTags(name) if in_scope?(name) - unless @tree.openElements[-1].name == name - @parser.parseError(("End tag (#{name}) seen too early. Expected other end tag.")) - end + unless @tree.open_elements.last.name == name + parse_error(_("End tag (#{name}) seen too early. " + 'Expected other end tag.')) end remove_open_elements_until(name) if in_scope?(name) @@ -376,13 +403,13 @@ module HTML5lib end end - unless @tree.openElements[-1].name == name - @parser.parseError(("Unexpected end tag (#{name}). Expected other end tag.")) + unless @tree.open_elements.last.name == name + parse_error(_("Unexpected end tag (#{name}). Expected other end tag.")) end HEADING_ELEMENTS.each do |element| if in_scope?(element) - remove_open_elements_until { |element| HEADING_ELEMENTS.include?(element.name) } + remove_open_elements_until {|element| HEADING_ELEMENTS.include?(element.name)} break end end @@ -391,30 +418,30 @@ module HTML5lib # The much-feared adoption agency algorithm def endTagFormatting(name) # http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency - # XXX Better parseError messages appreciated. + # XXX Better parse_error messages appreciated. while true # Step 1 paragraph 1 afeElement = @tree.elementInActiveFormattingElements(name) - if not afeElement or (@tree.openElements.include?(afeElement) and not in_scope?(afeElement.name)) - @parser.parseError(_("End tag (#{name}) violates step 1, paragraph 1 of the adoption agency algorithm.")) + if !afeElement or (@tree.open_elements.include?(afeElement) && !in_scope?(afeElement.name)) + parse_error(_("End tag (#{name}) violates step 1, paragraph 1 of the adoption agency algorithm.")) return # Step 1 paragraph 2 - elsif not @tree.openElements.include?(afeElement) - @parser.parseError(_("End tag (#{name}) violates step 1, paragraph 2 of the adoption agency algorithm.")) + elsif not @tree.open_elements.include?(afeElement) + parse_error(_("End tag (#{name}) violates step 1, paragraph 2 of the adoption agency algorithm.")) @tree.activeFormattingElements.delete(afeElement) return end # Step 1 paragraph 3 - if afeElement != @tree.openElements[-1] - @parser.parseError(_("End tag (#{name}) violates step 1, paragraph 3 of the adoption agency algorithm.")) + if afeElement != @tree.open_elements.last + parse_error(_("End tag (#{name}) violates step 1, paragraph 3 of the adoption agency algorithm.")) end # Step 2 # Start of the adoption agency algorithm proper - afeIndex = @tree.openElements.index(afeElement) + afeIndex = @tree.open_elements.index(afeElement) furthestBlock = nil - @tree.openElements[afeIndex..-1].each do |element| + @tree.open_elements[afeIndex..-1].each do |element| if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(element.name) furthestBlock = element break @@ -423,11 +450,11 @@ module HTML5lib # Step 3 if furthestBlock.nil? - element = remove_open_elements_until { |element| element == afeElement } + element = remove_open_elements_until {|element| element == afeElement } @tree.activeFormattingElements.delete(element) return end - commonAncestor = @tree.openElements[afeIndex - 1] + commonAncestor = @tree.open_elements[afeIndex - 1] # Step 5 furthestBlock.parent.removeChild(furthestBlock) if furthestBlock.parent @@ -444,11 +471,11 @@ module HTML5lib while true # AT replace this with a function and recursion? # Node is element before node in open elements - node = @tree.openElements[@tree.openElements.index(node) - 1] + node = @tree.open_elements[@tree.open_elements.index(node) - 1] until @tree.activeFormattingElements.include?(node) tmpNode = node - node = @tree.openElements[@tree.openElements.index(node) - 1] - @tree.openElements.delete(tmpNode) + node = @tree.open_elements[@tree.open_elements.index(node) - 1] + @tree.open_elements.delete(tmpNode) end # Step 7.3 break if node == afeElement @@ -465,7 +492,7 @@ module HTML5lib clone = node.cloneNode # Replace node with clone @tree.activeFormattingElements[@tree.activeFormattingElements.index(node)] = clone - @tree.openElements[@tree.openElements.index(node)] = clone + @tree.open_elements[@tree.open_elements.index(node)] = clone node = clone end # Step 7.6 @@ -495,47 +522,47 @@ module HTML5lib @tree.activeFormattingElements.insert([bookmark,@tree.activeFormattingElements.length].min, clone) # Step 13 - @tree.openElements.delete(afeElement) - @tree.openElements.insert(@tree.openElements.index(furthestBlock) + 1, clone) + @tree.open_elements.delete(afeElement) + @tree.open_elements.insert(@tree.open_elements.index(furthestBlock) + 1, clone) end end def endTagButtonMarqueeObject(name) @tree.generateImpliedEndTags if in_scope?(name) - unless @tree.openElements[-1].name == name - @parser.parseError(_("Unexpected end tag (#{name}). Expected other end tag first.")) + unless @tree.open_elements.last.name == name + parse_error(_("Unexpected end tag (#{name}). Expected other end tag first.")) end if in_scope?(name) remove_open_elements_until(name) - + @tree.clearActiveFormattingElements end end def endTagMisplaced(name) # This handles elements with end tags in other insertion modes. - @parser.parseError(_("Unexpected end tag (#{name}). Ignored.")) + parse_error(_("Unexpected end tag (#{name}). Ignored.")) end def endTagBr(name) - @parser.parseError(_("Unexpected end tag (br). Treated as br element.")) + parse_error(_("Unexpected end tag (br). Treated as br element.")) @tree.reconstructActiveFormattingElements - @tree.insertElement(name, {}) - @tree.openElements.pop() + @tree.insert_element(name, {}) + @tree.open_elements.pop() end def endTagNone(name) # This handles elements with no end tag. - @parser.parseError(_("This tag (#{name}) has no end tag")) + parse_error(_("This tag (#{name}) has no end tag")) end def endTagCdataTextAreaXmp(name) - if @tree.openElements[-1].name == name - @tree.openElements.pop + if @tree.open_elements.last.name == name + @tree.open_elements.pop else - @parser.parseError(_("Unexpected end tag (#{name}). Ignored.")) + parse_error(_("Unexpected end tag (#{name}). Ignored.")) end end @@ -549,20 +576,20 @@ module HTML5lib def endTagOther(name) # XXX This logic should be moved into the treebuilder - @tree.openElements.reverse.each do |node| + @tree.open_elements.reverse.each do |node| if node.name == name @tree.generateImpliedEndTags - unless @tree.openElements[-1].name == name - @parser.parseError(_("Unexpected end tag (#{name}).")) + unless @tree.open_elements.last.name == name + parse_error(_("Unexpected end tag (#{name}).")) end - remove_open_elements_until { |element| element == node } + remove_open_elements_until {|element| element == node } break else if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) - @parser.parseError(_("Unexpected end tag (#{name}). Ignored.")) + parse_error(_("Unexpected end tag (#{name}). Ignored.")) break end end @@ -572,8 +599,8 @@ module HTML5lib protected def addFormattingElement(name, attributes) - @tree.insertElement(name, attributes) - @tree.activeFormattingElements.push(@tree.openElements[-1]) + @tree.insert_element(name, attributes) + @tree.activeFormattingElements.push(@tree.open_elements.last) end end diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_caption_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_caption_phase.rb similarity index 77% rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_caption_phase.rb rename to vendor/plugins/HTML5lib/lib/html5/html5parser/in_caption_phase.rb index ccdfcb91..65718759 100644 --- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_caption_phase.rb +++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_caption_phase.rb @@ -1,6 +1,6 @@ -require 'html5lib/html5parser/phase' +require 'html5/html5parser/phase' -module HTML5lib +module HTML5 class InCaptionPhase < Phase # http://www.whatwg.org/specs/web-apps/current-work/#in-caption @@ -10,7 +10,7 @@ module HTML5lib handle_end 'caption', 'table', %w( body col colgroup html tbody td tfoot th thead tr ) => 'Ignore' def ignoreEndTagCaption - not in_scope?('caption', true) + !in_scope?('caption', true) end def processCharacters(data) @@ -18,7 +18,7 @@ module HTML5lib end def startTagTableElement(name, attributes) - @parser.parseError + parse_error #XXX Have to duplicate logic here to find out if the tag is ignored ignoreEndTag = ignoreEndTagCaption @parser.phase.processEndTag('caption') @@ -31,15 +31,15 @@ module HTML5lib def endTagCaption(name) if ignoreEndTagCaption - # innerHTML case - assert @parser.innerHTML - @parser.parseError + # inner_html case + assert @parser.inner_html + parse_error else # AT this code is quite similar to endTagTable in "InTable" @tree.generateImpliedEndTags - unless @tree.openElements[-1].name == 'caption' - @parser.parseError(_("Unexpected end tag (caption). Missing end tags.")) + unless @tree.open_elements[-1].name == 'caption' + parse_error(_("Unexpected end tag (caption). Missing end tags.")) end remove_open_elements_until('caption') @@ -50,14 +50,14 @@ module HTML5lib end def endTagTable(name) - @parser.parseError + parse_error ignoreEndTag = ignoreEndTagCaption @parser.phase.processEndTag('caption') @parser.phase.processEndTag(name) unless ignoreEndTag end def endTagIgnore(name) - @parser.parseError(_("Unexpected end tag (#{name}). Ignored.")) + parse_error(_("Unexpected end tag (#{name}). Ignored.")) end def endTagOther(name) diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_cell_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_cell_phase.rb similarity index 75% rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_cell_phase.rb rename to vendor/plugins/HTML5lib/lib/html5/html5parser/in_cell_phase.rb index 5b88a30b..50b29141 100644 --- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_cell_phase.rb +++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_cell_phase.rb @@ -1,6 +1,6 @@ -require 'html5lib/html5parser/phase' +require 'html5/html5parser/phase' -module HTML5lib +module HTML5 class InCellPhase < Phase # http://www.whatwg.org/specs/web-apps/current-work/#in-cell @@ -20,8 +20,8 @@ module HTML5lib closeCell @parser.phase.processStartTag(name, attributes) else - # innerHTML case - @parser.parseError + # inner_html case + parse_error end end @@ -32,22 +32,22 @@ module HTML5lib def endTagTableCell(name) if in_scope?(name, true) @tree.generateImpliedEndTags(name) - if @tree.openElements[-1].name != name - @parser.parseError("Got table cell end tag (#{name}) while required end tags are missing.") + if @tree.open_elements.last.name != name + parse_error("Got table cell end tag (#{name}) while required end tags are missing.") remove_open_elements_until(name) else - @tree.openElements.pop + @tree.open_elements.pop end @tree.clearActiveFormattingElements @parser.phase = @parser.phases[:inRow] else - @parser.parseError(_("Unexpected end tag (#{name}). Ignored.")) + parse_error(_("Unexpected end tag (#{name}). Ignored.")) end end def endTagIgnore(name) - @parser.parseError(_("Unexpected end tag (#{name}). Ignored.")) + parse_error(_("Unexpected end tag (#{name}). Ignored.")) end def endTagImply(name) @@ -55,8 +55,8 @@ module HTML5lib closeCell @parser.phase.processEndTag(name) else - # sometimes innerHTML case - @parser.parseError + # sometimes inner_html case + parse_error end end diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_column_group_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_column_group_phase.rb similarity index 73% rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_column_group_phase.rb rename to vendor/plugins/HTML5lib/lib/html5/html5parser/in_column_group_phase.rb index 7729eb83..81841c26 100644 --- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_column_group_phase.rb +++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_column_group_phase.rb @@ -1,6 +1,6 @@ -require 'html5lib/html5parser/phase' +require 'html5/html5parser/phase' -module HTML5lib +module HTML5 class InColumnGroupPhase < Phase # http://www.whatwg.org/specs/web-apps/current-work/#in-column @@ -10,7 +10,7 @@ module HTML5lib handle_end 'colgroup', 'col' def ignoreEndTagColgroup - @tree.openElements[-1].name == 'html' + @tree.open_elements[-1].name == 'html' end def processCharacters(data) @@ -20,8 +20,8 @@ module HTML5lib end def startTagCol(name, attributes) - @tree.insertElement(name, attributes) - @tree.openElements.pop + @tree.insert_element(name, attributes) + @tree.open_elements.pop end def startTagOther(name, attributes) @@ -32,17 +32,17 @@ module HTML5lib def endTagColgroup(name) if ignoreEndTagColgroup - # innerHTML case - assert @parser.innerHTML - @parser.parseError + # inner_html case + assert @parser.inner_html + parse_error else - @tree.openElements.pop + @tree.open_elements.pop @parser.phase = @parser.phases[:inTable] end end def endTagCol(name) - @parser.parseError(_('Unexpected end tag (col). col has no end tag.')) + parse_error(_('Unexpected end tag (col). col has no end tag.')) end def endTagOther(name) diff --git a/vendor/plugins/HTML5lib/lib/html5/html5parser/in_frameset_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_frameset_phase.rb new file mode 100644 index 00000000..07d68d71 --- /dev/null +++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_frameset_phase.rb @@ -0,0 +1,57 @@ +require 'html5/html5parser/phase' + +module HTML5 + class InFramesetPhase < Phase + + # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset + + handle_start 'html', 'frameset', 'frame', 'noframes' + + handle_end 'frameset', 'noframes' + + def processCharacters(data) + parse_error(_('Unexpected characters in the frameset phase. Characters ignored.')) + end + + def startTagFrameset(name, attributes) + @tree.insert_element(name, attributes) + end + + def startTagFrame(name, attributes) + @tree.insert_element(name, attributes) + @tree.open_elements.pop + end + + def startTagNoframes(name, attributes) + @parser.phases[:inBody].processStartTag(name, attributes) + end + + def startTagOther(name, attributes) + parse_error(_("Unexpected start tag token (#{name}) in the frameset phase. Ignored")) + end + + def endTagFrameset(name) + if @tree.open_elements.last.name == 'html' + # inner_html case + parse_error(_("Unexpected end tag token (frameset) in the frameset phase (inner_html).")) + else + @tree.open_elements.pop + end + if (not @parser.inner_html and + @tree.open_elements.last.name != 'frameset') + # If we're not in inner_html mode and the the current node is not a + # "frameset" element (anymore) then switch. + @parser.phase = @parser.phases[:afterFrameset] + end + end + + def endTagNoframes(name) + @parser.phases[:inBody].processEndTag(name) + end + + def endTagOther(name) + parse_error(_("Unexpected end tag token (#{name}) in the frameset phase. Ignored.")) + end + + end +end \ No newline at end of file diff --git a/vendor/plugins/HTML5lib/lib/html5/html5parser/in_head_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_head_phase.rb new file mode 100644 index 00000000..349c138a --- /dev/null +++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_head_phase.rb @@ -0,0 +1,138 @@ +require 'html5/html5parser/phase' + +module HTML5 + class InHeadPhase < Phase + + handle_start 'html', 'head', 'title', 'style', 'script', 'noscript' + handle_start %w( base link meta ) + + handle_end 'head' + handle_end %w( html body br p ) => 'ImplyAfterHead' + handle_end %w( title style script noscript ) + + def process_eof + if ['title', 'style', 'script'].include?(name = @tree.open_elements.last.name) + parse_error(_("Unexpected end of file. Expected end tag (#{name}).")) + @tree.open_elements.pop + end + anything_else + @parser.phase.process_eof + end + + def processCharacters(data) + if %w[title style script noscript].include?(@tree.open_elements.last.name) + @tree.insertText(data) + else + anything_else + @parser.phase.processCharacters(data) + end + end + + def startTagHead(name, attributes) + parse_error(_('Unexpected start tag head in existing head. Ignored')) + end + + def startTagTitle(name, attributes) + element = @tree.createElement(name, attributes) + appendToHead(element) + @tree.open_elements.push(element) + @parser.tokenizer.content_model_flag = :RCDATA + end + + def startTagStyle(name, attributes) + element = @tree.createElement(name, attributes) + if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead] + appendToHead(element) + else + @tree.open_elements.last.appendChild(element) + end + @tree.open_elements.push(element) + @parser.tokenizer.content_model_flag = :CDATA + end + + def startTagNoscript(name, attributes) + # XXX Need to decide whether to implement the scripting disabled case. + element = @tree.createElement(name, attributes) + if @tree.head_pointer !=nil and @parser.phase == @parser.phases[:inHead] + appendToHead(element) + else + @tree.open_elements.last.appendChild(element) + end + @tree.open_elements.push(element) + @parser.tokenizer.content_model_flag = :CDATA + end + + def startTagScript(name, attributes) + #XXX Inner HTML case may be wrong + element = @tree.createElement(name, attributes) + element._flags.push("parser-inserted") + if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead] + appendToHead(element) + else + @tree.open_elements.last.appendChild(element) + end + @tree.open_elements.push(element) + @parser.tokenizer.content_model_flag = :CDATA + end + + def startTagBaseLinkMeta(name, attributes) + element = @tree.createElement(name, attributes) + if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead] + appendToHead(element) + else + @tree.open_elements.last.appendChild(element) + end + end + + def startTagOther(name, attributes) + anything_else + @parser.phase.processStartTag(name, attributes) + end + + def endTagHead(name) + if @tree.open_elements.last.name == 'head' + @tree.open_elements.pop + else + parse_error(_("Unexpected end tag (head). Ignored.")) + end + @parser.phase = @parser.phases[:afterHead] + end + + def endTagImplyAfterHead(name) + anything_else + @parser.phase.processEndTag(name) + end + + def endTagTitleStyleScriptNoscript(name) + if @tree.open_elements.last.name == name + @tree.open_elements.pop + else + parse_error(_("Unexpected end tag (#{name}). Ignored.")) + end + end + + def endTagOther(name) + parse_error(_("Unexpected end tag (#{name}). Ignored.")) + end + + def anything_else + if @tree.open_elements.last.name == 'head' + endTagHead('head') + else + @parser.phase = @parser.phases[:afterHead] + end + end + + protected + + def appendToHead(element) + if @tree.head_pointer.nil? + assert @parser.inner_html + @tree.open_elements.last.appendChild(element) + else + @tree.head_pointer.appendChild(element) + end + end + + end +end diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_row_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_row_phase.rb similarity index 72% rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_row_phase.rb rename to vendor/plugins/HTML5lib/lib/html5/html5parser/in_row_phase.rb index b3ffa3f0..b8cbf304 100644 --- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_row_phase.rb +++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_row_phase.rb @@ -1,6 +1,6 @@ -require 'html5lib/html5parser/phase' +require 'html5/html5parser/phase' -module HTML5lib +module HTML5 class InRowPhase < Phase # http://www.whatwg.org/specs/web-apps/current-work/#in-row @@ -15,7 +15,7 @@ module HTML5lib def startTagTableCell(name, attributes) clearStackToTableRowContext - @tree.insertElement(name, attributes) + @tree.insert_element(name, attributes) @parser.phase = @parser.phases[:inCell] @tree.activeFormattingElements.push(Marker) end @@ -23,7 +23,7 @@ module HTML5lib def startTagTableOther(name, attributes) ignoreEndTag = ignoreEndTagTr endTagTr('tr') - # XXX how are we sure it's always ignored in the innerHTML case? + # XXX how are we sure it's always ignored in the inner_html case? @parser.phase.processStartTag(name, attributes) unless ignoreEndTag end @@ -33,12 +33,12 @@ module HTML5lib def endTagTr(name) if ignoreEndTagTr - # innerHTML case - assert @parser.innerHTML - @parser.parseError + # inner_html case + assert @parser.inner_html + parse_error else clearStackToTableRowContext - @tree.openElements.pop + @tree.open_elements.pop @parser.phase = @parser.phases[:inTableBody] end end @@ -47,7 +47,7 @@ module HTML5lib ignoreEndTag = ignoreEndTagTr endTagTr('tr') # Reprocess the current tag if the tr end tag was not ignored - # XXX how are we sure it's always ignored in the innerHTML case? + # XXX how are we sure it's always ignored in the inner_html case? @parser.phase.processEndTag(name) unless ignoreEndTag end @@ -56,13 +56,13 @@ module HTML5lib endTagTr('tr') @parser.phase.processEndTag(name) else - # innerHTML case - @parser.parseError + # inner_html case + parse_error end end def endTagIgnore(name) - @parser.parseError(_("Unexpected end tag (#{name}) in the row phase. Ignored.")) + parse_error(_("Unexpected end tag (#{name}) in the row phase. Ignored.")) end def endTagOther(name) @@ -73,9 +73,9 @@ module HTML5lib # XXX unify this with other table helper methods def clearStackToTableRowContext - until ['tr', 'html'].include?(name = @tree.openElements[-1].name) - @parser.parseError(_("Unexpected implied end tag (#{name}) in the row phase.")) - @tree.openElements.pop + until %w[tr html].include?(name = @tree.open_elements.last.name) + parse_error(_("Unexpected implied end tag (#{name}) in the row phase.")) + @tree.open_elements.pop end end diff --git a/vendor/plugins/HTML5lib/lib/html5/html5parser/in_select_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_select_phase.rb new file mode 100644 index 00000000..9294bcf3 --- /dev/null +++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_select_phase.rb @@ -0,0 +1,84 @@ +require 'html5/html5parser/phase' + +module HTML5 + class InSelectPhase < Phase + + # http://www.whatwg.org/specs/web-apps/current-work/#in-select + + handle_start 'html', 'option', 'optgroup', 'select' + + handle_end 'option', 'optgroup', 'select', %w( caption table tbody tfoot thead tr td th ) => 'TableElements' + + def processCharacters(data) + @tree.insertText(data) + end + + def startTagOption(name, attributes) + # We need to imply if if
  • @@ -1920,6 +1926,9 @@ Unexpected EOF 610: unexpected option element end tag 622: unexpected plaintext element end tag 633: mismatched special end tag textarea +XXX +XXX +XXX #document | | @@ -1928,3 +1937,14 @@ Unexpected EOF | | | +|

    + +#data + +#errors +10: Start tag seen without seeing a doctype first. +11: End of file seen and there were open elements. +#document +| +| +| diff --git a/vendor/plugins/HTML5lib/testdata/tree-construction/tests2.dat b/vendor/plugins/HTML5lib/testdata/tree-construction/tests2.dat index fdf8356a..e63ea5ec 100755 --- a/vendor/plugins/HTML5lib/testdata/tree-construction/tests2.dat +++ b/vendor/plugins/HTML5lib/testdata/tree-construction/tests2.dat @@ -12,7 +12,6 @@ #errors #document @@ -194,6 +211,20 @@ foo | +#errors +#document +| +| +| +| +|