diff --git a/app/views/wiki/tex.rhtml b/app/views/wiki/tex.rhtml index 5d10dba2..ddc87320 100644 --- a/app/views/wiki/tex.rhtml +++ b/app/views/wiki/tex.rhtml @@ -100,6 +100,7 @@ \re@DeclareMathSymbol{\nwArr}{\mathrel}{symbolsC}{118} \re@DeclareMathSymbol{\swArrow}{\mathrel}{symbolsC}{119} \re@DeclareMathSymbol{\swArr}{\mathrel}{symbolsC}{119} +\re@DeclareMathSymbol{\nequiv}{\mathrel}{symbolsC}{46} \makeatother % Widecheck diff --git a/lib/chunks/chunk.rb b/lib/chunks/chunk.rb index 46382c76..90b4939a 100644 --- a/lib/chunks/chunk.rb +++ b/lib/chunks/chunk.rb @@ -9,6 +9,10 @@ require 'uri/common' module Chunk class Abstract +# Rails's default utf-8 support causes problems here. So, for Chunk::Abstract class, turn off +# multibyte character support. + $KCODE = 'iso-8859-1' + # automatically construct the array of derivatives of Chunk::Abstract @derivatives = [] diff --git a/test/functional/wiki_controller_test.rb b/test/functional/wiki_controller_test.rb index 27bee12b..11b4e661 100755 --- a/test/functional/wiki_controller_test.rb +++ b/test/functional/wiki_controller_test.rb @@ -767,6 +767,7 @@ class WikiControllerTest < Test::Unit::TestCase \re@DeclareMathSymbol{\nwArr}{\mathrel}{symbolsC}{118} \re@DeclareMathSymbol{\swArrow}{\mathrel}{symbolsC}{119} \re@DeclareMathSymbol{\swArr}{\mathrel}{symbolsC}{119} +\re@DeclareMathSymbol{\nequiv}{\mathrel}{symbolsC}{46} \makeatother % Widecheck diff --git a/vendor/plugins/HTML5lib/Manifest.txt b/vendor/plugins/HTML5lib/Manifest.txt index 8a8a1bca..082b2b0b 100644 --- a/vendor/plugins/HTML5lib/Manifest.txt +++ b/vendor/plugins/HTML5lib/Manifest.txt @@ -2,12 +2,18 @@ History.txt Manifest.txt README Rakefile.rb +bin/html5 +lib/core_ext/string.rb lib/html5.rb lib/html5/constants.rb lib/html5/filters/base.rb lib/html5/filters/inject_meta_charset.rb +lib/html5/filters/iso639codes.rb lib/html5/filters/optionaltags.rb +lib/html5/filters/rfc2046.rb +lib/html5/filters/rfc3987.rb lib/html5/filters/sanitizer.rb +lib/html5/filters/validator.rb lib/html5/filters/whitespace.rb lib/html5/html5parser.rb lib/html5/html5parser/after_body_phase.rb @@ -34,6 +40,7 @@ lib/html5/sanitizer.rb lib/html5/serializer.rb lib/html5/serializer/htmlserializer.rb lib/html5/serializer/xhtmlserializer.rb +lib/html5/sniffer.rb lib/html5/tokenizer.rb lib/html5/treebuilders.rb lib/html5/treebuilders/base.rb @@ -46,14 +53,65 @@ lib/html5/treewalkers/hpricot.rb lib/html5/treewalkers/rexml.rb lib/html5/treewalkers/simpletree.rb lib/html5/version.rb -parse.rb +testdata/encoding/chardet/test_big5.txt +testdata/encoding/test-yahoo-jp.dat +testdata/encoding/tests1.dat +testdata/encoding/tests2.dat +testdata/sanitizer/tests1.dat +testdata/serializer/core.test +testdata/serializer/injectmeta.test +testdata/serializer/optionaltags.test +testdata/serializer/options.test +testdata/serializer/whitespace.test +testdata/sites/google-results.htm +testdata/sites/python-ref-import.htm +testdata/sites/web-apps-old.htm +testdata/sites/web-apps.htm +testdata/sniffer/htmlOrFeed.json +testdata/tokenizer/contentModelFlags.test +testdata/tokenizer/entities.test +testdata/tokenizer/escapeFlag.test +testdata/tokenizer/test1.test +testdata/tokenizer/test2.test +testdata/tokenizer/test3.test +testdata/tokenizer/test4.test +testdata/tree-construction/tests1.dat +testdata/tree-construction/tests2.dat +testdata/tree-construction/tests3.dat +testdata/tree-construction/tests4.dat +testdata/tree-construction/tests5.dat +testdata/tree-construction/tests6.dat +testdata/validator/attributes.test +testdata/validator/base-href-attribute.test +testdata/validator/base-target-attribute.test +testdata/validator/blockquote-cite-attribute.test +testdata/validator/classattribute.test +testdata/validator/contenteditableattribute.test +testdata/validator/contextmenuattribute.test +testdata/validator/dirattribute.test +testdata/validator/draggableattribute.test +testdata/validator/html-xmlns-attribute.test +testdata/validator/idattribute.test +testdata/validator/inputattributes.test +testdata/validator/irrelevantattribute.test +testdata/validator/langattribute.test +testdata/validator/li-value-attribute.test +testdata/validator/link-href-attribute.test +testdata/validator/link-hreflang-attribute.test +testdata/validator/link-rel-attribute.test +testdata/validator/ol-start-attribute.test +testdata/validator/starttags.test +testdata/validator/style-scoped-attribute.test +testdata/validator/tabindexattribute.test tests/preamble.rb tests/test_encoding.rb tests/test_lxp.rb tests/test_parser.rb tests/test_sanitizer.rb tests/test_serializer.rb +tests/test_sniffer.rb tests/test_stream.rb tests/test_tokenizer.rb tests/test_treewalkers.rb +tests/test_validator.rb tests/tokenizer_test_parser.rb diff --git a/vendor/plugins/HTML5lib/Rakefile.rb b/vendor/plugins/HTML5lib/Rakefile.rb index 65b20295..49324fcb 100644 --- a/vendor/plugins/HTML5lib/Rakefile.rb +++ b/vendor/plugins/HTML5lib/Rakefile.rb @@ -18,16 +18,16 @@ end require 'rcov/rcovtask' -namespace :test do +namespace :test do namespace :coverage do desc "Delete aggregate coverage data." task(:clean) { rm_f "coverage.data" } end desc 'Aggregate code coverage for unit, functional and integration tests' Rcov::RcovTask.new(:coverage => "test:coverage:clean") do |t| - t.libs << "tests" - t.test_files = FileList["tests/test_*.rb"] - t.output_dir = "tests/coverage/" + t.libs << "test" + t.test_files = FileList["test/test_*.rb"] + t.output_dir = "test/coverage/" t.verbose = true end end \ No newline at end of file diff --git a/vendor/plugins/HTML5lib/bin/html5 b/vendor/plugins/HTML5lib/bin/html5 index e16e9248..c74e780b 100755 --- a/vendor/plugins/HTML5lib/bin/html5 +++ b/vendor/plugins/HTML5lib/bin/html5 @@ -1,217 +1,5 @@ #!/usr/bin/env ruby -require 'core_ext/string' -$:.unshift File.dirname(__FILE__), 'lib' +require 'html5/cli' -def parse(opts, args) - encoding = nil - - f = args[-1] - if f - begin - if f[0..6] == 'http://' - require 'open-uri' - f = URI.parse(f).open - encoding = f.charset - elsif f == '-' - f = $stdin - else - f = open(f) - end - rescue - end - else - $stderr.write("No filename provided. Use -h for help\n") - exit(1) - end - - require 'html5/treebuilders' - treebuilder = HTML5::TreeBuilders[opts.treebuilder] - - if opts.output == :xml - require 'html5/liberalxmlparser' - p = HTML5::XMLParser.new(:tree=>treebuilder) - else - require 'html5/html5parser' - p = HTML5::HTMLParser.new(:tree=>treebuilder) - end - - if opts.parsemethod == :parse - args = [f, encoding] - else - args = [f, (opts.container || 'div'), encoding] - end - - if opts.profile - require 'profiler' - Profiler__::start_profile - p.send(opts.parsemethod, *args) - Profiler__::stop_profile - Profiler__::print_profile($stderr) - elsif opts.time - require 'time' # TODO: switch to benchmark - t0 = Time.new - document = p.send(opts.parsemethod, *args) - t1 = Time.new - print_output(p, document, opts) - t2 = Time.new - puts "\n\nRun took: %fs (plus %fs to print the output)"%[t1-t0, t2-t1] - else - document = p.send(opts.parsemethod, *args) - print_output(p, document, opts) - end -end - -def print_output(parser, document, opts) - puts "Encoding: #{parser.tokenizer.stream.char_encoding}" if opts.encoding - - case opts.output - when :xml - print document - when :html - require 'html5/treewalkers' - tokens = HTML5::TreeWalkers[opts.treebuilder].new(document) - require 'html5/serializer' - puts HTML5::HTMLSerializer.serialize(tokens, opts.serializer) - when :hilite - print document.hilite - when :tree - document = [document] unless document.respond_to?(:each) - document.each {|fragment| puts parser.tree.testSerializer(fragment)} - end - - if opts.error - errList=[] - for pos, errorcode, datavars in parser.errors - errList << "Line #{pos[0]} Col #{pos[1]} " + (HTML5::E[errorcode] || "Unknown error \"#{errorcode}\"") % datavars - end - $stdout.write("\nParse errors:\n" + errList.join("\n")+"\n") - end -end - -require 'ostruct' -options = OpenStruct.new -options.profile = false -options.time = false -options.output = :html -options.treebuilder = 'simpletree' -options.error = false -options.encoding = false -options.parsemethod = :parse -options.serializer = { - :encoding => 'utf-8', - :omit_optional_tags => false, - :inject_meta_charset => false -} - -require 'optparse' -opts = OptionParser.new do |opts| - opts.separator "" - opts.separator "Parse Options:" - - opts.on("-b", "--treebuilder NAME") do |treebuilder| - options.treebuilder = treebuilder - end - - opts.on("-f", "--fragment CONTAINER", "Parse as a fragment") do |container| - options.parsemethod = :parse_fragment - options.container = container if container - end - - opts.separator "" - opts.separator "Filter Options:" - - opts.on("--[no-]inject-meta-charset", "inject ") do |inject| - options.serializer[:inject_meta_charset] = inject - end - - opts.on("--[no-]strip-whitespace", "strip unnecessary whitespace") do |strip| - options.serializer[:strip_whitespace] = strip - end - - opts.on("--[no-]sanitize", "escape unsafe tags") do |sanitize| - options.serializer[:sanitize] = sanitize - end - - opts.separator "" - opts.separator "Output Options:" - - opts.on("--tree", "output as debug tree") do |tree| - options.output = :tree - end - - opts.on("-x", "--xml", "output as xml") do |xml| - options.output = :xml - options.treebuilder = "rexml" - end - - opts.on("--[no-]html", "Output as html") do |html| - options.output = (html ? :html : nil) - end - - opts.on("--hilite", "Output as formatted highlighted code.") do |hilite| - options.output = :hilite - end - - opts.on("-e", "--error", "Print a list of parse errors") do |error| - options.error = error - end - - opts.separator "" - opts.separator "Serialization Options:" - - opts.on("--[no-]omit-optional-tags", "Omit optional tags") do |omit| - options.serializer[:omit_optional_tags] = omit - end - - opts.on("--[no-]quote-attr-values", "Quote attribute values") do |quote| - options.serializer[:quote_attr_values] = quote - end - - opts.on("--[no-]use-best-quote-char", "Use best quote character") do |best| - options.serializer[:use_best_quote_char] = best - end - - opts.on("--quote-char C", "Use specified quote character") do |c| - options.serializer[:quote_char] = c - end - - opts.on("--[no-]minimize-boolean-attributes", "Minimize boolean attributes") do |min| - options.serializer[:minimize_boolean_attributes] = min - end - - opts.on("--[no-]use-trailing-solidus", "Use trailing solidus") do |slash| - options.serializer[:use_trailing_solidus] = slash - end - - opts.on("--[no-]escape-lt-in-attrs", "Escape less than signs in attribute values") do |lt| - options.serializer[:escape_lt_in_attrs] = lt - end - - opts.on("--[no-]escape-rcdata", "Escape rcdata element values") do |rcdata| - options.serializer[:escape_rcdata] = rcdata - end - - opts.separator "" - opts.separator "Other Options:" - - opts.on("-p", "--[no-]profile", "Profile the run") do |profile| - options.profile = profile - end - - opts.on("-t", "--[no-]time", "Time the run") do |time| - options.time = time - end - - opts.on("-c", "--[no-]encoding", "Print character encoding used") do |encoding| - options.encoding = encoding - end - - opts.on_tail("-h", "--help", "Show this message") do - puts opts - exit - end -end - -opts.parse!(ARGV) -parse options, ARGV +HTML5::CLI.run \ No newline at end of file diff --git a/vendor/plugins/HTML5lib/lib/html5.rb b/vendor/plugins/HTML5lib/lib/html5.rb index 7ca2ee61..68bd6b16 100644 --- a/vendor/plugins/HTML5lib/lib/html5.rb +++ b/vendor/plugins/HTML5lib/lib/html5.rb @@ -8,6 +8,6 @@ module HTML5 end def self.parse_fragment(stream, options={}) - HTMLParser.parse(stream, options) + HTMLParser.parse_fragment(stream, options) end end diff --git a/vendor/plugins/HTML5lib/lib/html5/cli.rb b/vendor/plugins/HTML5lib/lib/html5/cli.rb new file mode 100644 index 00000000..ef49d4c6 --- /dev/null +++ b/vendor/plugins/HTML5lib/lib/html5/cli.rb @@ -0,0 +1,231 @@ +$:.unshift File.dirname(__FILE__), 'lib' +require 'html5' +require 'core_ext/string' +require 'ostruct' +require 'optparse' + +module HTML5::CLI + + def self.parse_opts argv + options = OpenStruct.new + options.profile = false + options.time = false + options.output = :html + options.treebuilder = 'simpletree' + options.error = false + options.encoding = false + options.parsemethod = :parse + options.serializer = { + :encoding => 'utf-8', + :omit_optional_tags => false, + :inject_meta_charset => false + } + + opts = OptionParser.new do |opts| + opts.separator "" + opts.separator "Parse Options:" + + opts.on("-b", "--treebuilder NAME") do |treebuilder| + options.treebuilder = treebuilder + end + + opts.on("-f", "--fragment CONTAINER", "Parse as a fragment") do |container| + options.parsemethod = :parse_fragment + options.container = container if container + end + + opts.separator "" + opts.separator "Filter Options:" + + opts.on("--[no-]inject-meta-charset", "inject ") do |inject| + options.serializer[:inject_meta_charset] = inject + end + + opts.on("--[no-]strip-whitespace", "strip unnecessary whitespace") do |strip| + options.serializer[:strip_whitespace] = strip + end + + opts.on("--[no-]sanitize", "escape unsafe tags") do |sanitize| + options.serializer[:sanitize] = sanitize + end + + opts.separator "" + opts.separator "Output Options:" + + opts.on("--tree", "output as debug tree") do |tree| + options.output = :tree + end + + opts.on("-x", "--xml", "output as xml") do |xml| + options.output = :xml + options.treebuilder = "rexml" + end + + opts.on("--[no-]html", "Output as html") do |html| + options.output = (html ? :html : nil) + end + + opts.on("--hilite", "Output as formatted highlighted code.") do |hilite| + options.output = :hilite + end + + opts.on("-e", "--error", "Print a list of parse errors") do |error| + options.error = error + end + + opts.separator "" + opts.separator "Serialization Options:" + + opts.on("--[no-]omit-optional-tags", "Omit optional tags") do |omit| + options.serializer[:omit_optional_tags] = omit + end + + opts.on("--[no-]quote-attr-values", "Quote attribute values") do |quote| + options.serializer[:quote_attr_values] = quote + end + + opts.on("--[no-]use-best-quote-char", "Use best quote character") do |best| + options.serializer[:use_best_quote_char] = best + end + + opts.on("--quote-char C", "Use specified quote character") do |c| + options.serializer[:quote_char] = c + end + + opts.on("--[no-]minimize-boolean-attributes", "Minimize boolean attributes") do |min| + options.serializer[:minimize_boolean_attributes] = min + end + + opts.on("--[no-]use-trailing-solidus", "Use trailing solidus") do |slash| + options.serializer[:use_trailing_solidus] = slash + end + + opts.on("--[no-]escape-lt-in-attrs", "Escape less than signs in attribute values") do |lt| + options.serializer[:escape_lt_in_attrs] = lt + end + + opts.on("--[no-]escape-rcdata", "Escape rcdata element values") do |rcdata| + options.serializer[:escape_rcdata] = rcdata + end + + opts.separator "" + opts.separator "Other Options:" + + opts.on("-p", "--[no-]profile", "Profile the run") do |profile| + options.profile = profile + end + + opts.on("-t", "--[no-]time", "Time the run") do |time| + options.time = time + end + + opts.on("-c", "--[no-]encoding", "Print character encoding used") do |encoding| + options.encoding = encoding + end + + opts.on_tail("-h", "--help", "Show this message") do + puts opts + exit + end + + + end + opts.parse!(argv) + options + end + + def self.open_input f + if f + begin + if f[0..6] == 'http://' + require 'open-uri' + f = URI.parse(f).open + encoding = f.charset + elsif f == '-' + f = $stdin + else + f = open(f) + end + rescue + end + else + $stderr.write("No filename provided. Use -h for help\n") + exit(1) + end + f + end + + def self.parse(opts, args) + encoding = nil + + f = open_input args.last + + require 'html5/treebuilders' + treebuilder = HTML5::TreeBuilders[opts.treebuilder] + + if opts.output == :xml + require 'html5/liberalxmlparser' + p = HTML5::XMLParser.new(:tree=>treebuilder) + else + require 'html5/html5parser' + p = HTML5::HTMLParser.new(:tree=>treebuilder) + end + + if opts.parsemethod == :parse + args = [f, encoding] + else + args = [f, (opts.container || 'div'), encoding] + end + + if opts.profile + require 'profiler' + Profiler__::start_profile + p.send(opts.parsemethod, *args) + Profiler__::stop_profile + Profiler__::print_profile($stderr) + elsif opts.time + require 'time' # TODO: switch to benchmark + t0 = Time.new + document = p.send(opts.parsemethod, *args) + t1 = Time.new + print_output(p, document, opts) + t2 = Time.new + puts "\n\nRun took: %fs (plus %fs to print the output)"%[t1-t0, t2-t1] + else + document = p.send(opts.parsemethod, *args) + print_output(p, document, opts) + end + end + + def self.print_output(parser, document, opts) + puts "Encoding: #{parser.tokenizer.stream.char_encoding}" if opts.encoding + + case opts.output + when :xml + print document + when :html + require 'html5/treewalkers' + tokens = HTML5::TreeWalkers[opts.treebuilder].new(document) + require 'html5/serializer' + puts HTML5::HTMLSerializer.serialize(tokens, opts.serializer) + when :hilite + print document.hilite + when :tree + document = [document] unless document.respond_to?(:each) + document.each {|fragment| puts parser.tree.testSerializer(fragment)} + end + + if opts.error + errList=[] + for pos, errorcode, datavars in parser.errors + errList << "Line #{pos[0]} Col #{pos[1]} " + (HTML5::E[errorcode] || "Unknown error \"#{errorcode}\"") % datavars + end + $stdout.write("\nParse errors:\n" + errList.join("\n")+"\n") + end + end + + def self.run + options = parse_opts ARGV + parse options, ARGV + end +end \ No newline at end of file diff --git a/vendor/plugins/HTML5lib/lib/html5/constants.rb b/vendor/plugins/HTML5lib/lib/html5/constants.rb index ed34b086..53baa8e8 100755 --- a/vendor/plugins/HTML5lib/lib/html5/constants.rb +++ b/vendor/plugins/HTML5lib/lib/html5/constants.rb @@ -908,7 +908,7 @@ module HTML5 "eof-in-bogus-doctype" => _("Unexpected end of file in bogus doctype."), "eof-in-innerhtml" => - _("XXX innerHTML EOF"), + _("Unexpected EOF in inner html mode."), "unexpected-doctype" => _("Unexpected DOCTYPE. Ignored."), "non-html-root" => @@ -1040,7 +1040,8 @@ module HTML5 _("Unexpected end tag (%(name))" + ". Expected end of file."), "unexpected-end-table-in-caption" => - _("Unexpected end table tag in caption. Generates implied end caption.") + _("Unexpected end table tag in caption. Generates implied end caption."), + "end-html-in-innerhtml" => _("Unexpected html end tag in inner html mode.") } end diff --git a/vendor/plugins/HTML5lib/lib/html5/filters.rb b/vendor/plugins/HTML5lib/lib/html5/filters.rb deleted file mode 100644 index 74c7f0e0..00000000 --- a/vendor/plugins/HTML5lib/lib/html5/filters.rb +++ /dev/null @@ -1 +0,0 @@ -require 'html5/filters/optionaltags' diff --git a/vendor/plugins/HTML5lib/lib/html5/html5parser/after_body_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/after_body_phase.rb index a55e4701..2cba741b 100644 --- a/vendor/plugins/HTML5lib/lib/html5/html5parser/after_body_phase.rb +++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/after_body_phase.rb @@ -25,7 +25,7 @@ module HTML5 def endTagHtml(name) if @parser.inner_html - parse_error + parse_error "end-html-in-innerhtml" else # XXX: This may need to be done, not sure # Don't set last_phase to the current phase but to the inBody phase diff --git a/vendor/plugins/HTML5lib/lib/html5/html5parser/in_body_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_body_phase.rb index 8b8ed02e..f592c57b 100644 --- a/vendor/plugins/HTML5lib/lib/html5/html5parser/in_body_phase.rb +++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_body_phase.rb @@ -51,34 +51,22 @@ module HTML5 super(parser, tree) # for special handling of whitespace in
- if $-w - $-w = false - class << self; alias processSpaceCharactersNonPre processSpaceCharacters; end - $-w = true - else - class << self; alias processSpaceCharactersNonPre processSpaceCharacters; end + silence do + class << self + alias processSpaceCharactersNonPre processSpaceCharacters + end end end def processSpaceCharactersDropNewline(data) # #Sometimes (start ofblocks) we want to drop leading newlines - if $-w - $-w = false - class << self - silence do - alias processSpaceCharacters processSpaceCharactersNonPre - end - end - $-w = true - else - class << self - silence do - alias processSpaceCharacters processSpaceCharactersNonPre - end + class << self + silence do + alias processSpaceCharacters processSpaceCharactersNonPre end end - + if (data.length > 0 and data[0] == ?\n && %w[pre textarea].include?(@tree.open_elements.last.name) && !@tree.open_elements.last.hasContent) data = data[1..-1] @@ -376,16 +364,6 @@ module HTML5 end def endTagBlock(name) - #Put us back in the right whitespace handling mode - if name == 'pre' - class << self; - silence do - alias processSpaceCharacters processSpaceCharactersNonPre; - end - end - end - - @tree.generateImpliedEndTags if in_scope?(name) unless @tree.open_elements.last.name == name diff --git a/vendor/plugins/HTML5lib/lib/html5/html5parser/phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/phase.rb index 237ae7d1..b6ea65b3 100644 --- a/vendor/plugins/HTML5lib/lib/html5/html5parser/phase.rb +++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/phase.rb @@ -144,7 +144,7 @@ module HTML5 def remove_open_elements_until(name=nil) finished = false - until finished + until finished || @tree.open_elements.length == 0 element = @tree.open_elements.pop finished = name.nil? ? yield(element) : element.name == name end diff --git a/vendor/plugins/HTML5lib/lib/html5/sanitizer.rb b/vendor/plugins/HTML5lib/lib/html5/sanitizer.rb index 1b771b23..8d05e96c 100644 --- a/vendor/plugins/HTML5lib/lib/html5/sanitizer.rb +++ b/vendor/plugins/HTML5lib/lib/html5/sanitizer.rb @@ -78,8 +78,12 @@ module HTML5 ATTR_VAL_IS_URI = %w[href src cite action longdesc xlink:href xml:base] - SVG_ATTR_VAL_ALLOWS_REF = %w[clip-path fill filter marker marker-start - marker-mid marker-end mask stroke textpath] + SVG_ATTR_VAL_ALLOWS_REF = %w[clip-path color-profile cursor fill + filter marker marker-start marker-mid marker-end mask stroke] + + SVG_ALLOW_LOCAL_HREF = %w[altGlyph animate animateColor animateMotion + animateTransform cursor feImage filter linearGradient pattern + radialGradient textpath tref set use] ACCEPTABLE_CSS_PROPERTIES = %w[azimuth background-color border-bottom-color border-collapse border-color border-left-color @@ -127,6 +131,9 @@ module HTML5 SVG_ATTR_VAL_ALLOWS_REF.each do |attr| attrs[attr] = attrs[attr].to_s.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if attrs[attr] end + if SVG_ALLOW_LOCAL_HREF.include?(token[:name]) && attrs['xlink:href'] && attrs['xlink:href'] =~ /^\s*[^#\s].*/m + attrs.delete 'xlink:href' + end if attrs['style'] attrs['style'] = sanitize_css(attrs['style']) end diff --git a/vendor/plugins/HTML5lib/parse.rb b/vendor/plugins/HTML5lib/parse.rb deleted file mode 100755 index ba0d9071..00000000 --- a/vendor/plugins/HTML5lib/parse.rb +++ /dev/null @@ -1,217 +0,0 @@ -#!/usr/bin/env ruby -# -# Parse a document to a simpletree tree, with optional profiling - -$:.unshift File.dirname(__FILE__),'lib' - -def parse(opts, args) - encoding = nil - - f = args[-1] - if f - begin - if f[0..6] == 'http://' - require 'open-uri' - f = URI.parse(f).open - encoding = f.charset - elsif f == '-' - f = $stdin - else - f = open(f) - end - rescue - end - else - $stderr.write("No filename provided. Use -h for help\n") - exit(1) - end - - require 'html5/treebuilders' - treebuilder = HTML5::TreeBuilders[opts.treebuilder] - - if opts.output == :xml - require 'html5/liberalxmlparser' - p = HTML5::XHTMLParser.new(:tree=>treebuilder) - else - require 'html5/html5parser' - p = HTML5::HTMLParser.new(:tree=>treebuilder) - end - - if opts.parsemethod == :parse - args = [f, encoding] - else - args = [f, 'div', encoding] - end - - if opts.profile - require 'profiler' - Profiler__::start_profile - p.send(opts.parsemethod, *args) - Profiler__::stop_profile - Profiler__::print_profile($stderr) - elsif opts.time - require 'time' - t0 = Time.new - document = p.send(opts.parsemethod, *args) - t1 = Time.new - printOutput(p, document, opts) - t2 = Time.new - puts "\n\nRun took: %fs (plus %fs to print the output)"%[t1-t0, t2-t1] - else - document = p.send(opts.parsemethod, *args) - printOutput(p, document, opts) - end -end - -def printOutput(parser, document, opts) - puts "Encoding: #{parser.tokenizer.stream.char_encoding}" if opts.encoding - - case opts.output - when :xml - print document - when :html - require 'html5/treewalkers' - tokens = HTML5::TreeWalkers[opts.treebuilder].new(document) - require 'html5/serializer' - puts HTML5::HTMLSerializer.serialize(tokens, opts.serializer) - when :hilite - print document.hilite - when :tree - document = [document] unless document.respond_to?(:each) - document.each {|fragment| puts parser.tree.testSerializer(fragment)} - end - - if opts.error - errList=[] - for pos, message in parser.errors - errList << ("Line %i Col %i"%pos + " " + message) - end - $stdout.write("\nParse errors:\n" + errList.join("\n")+"\n") - end -end - -require 'ostruct' -options = OpenStruct.new -options.profile = false -options.time = false -options.output = :html -options.treebuilder = 'simpletree' -options.error = false -options.encoding = false -options.parsemethod = :parse -options.serializer = { - :encoding => 'utf-8', - :omit_optional_tags => false, - :inject_meta_charset => false -} - -require 'optparse' -opts = OptionParser.new do |opts| - opts.separator "" - opts.separator "Parse Options:" - - opts.on("-b", "--treebuilder NAME") do |treebuilder| - options.treebuilder = treebuilder - end - - opts.on("-f", "--fragment", "Parse as a fragment") do |parse| - options.parsemethod = :parseFragment - end - - opts.separator "" - opts.separator "Filter Options:" - - opts.on("--[no-]inject-meta-charset", "inject ") do |inject| - options.serializer[:inject_meta_charset] = inject - end - - opts.on("--[no-]strip-whitespace", "strip unnecessary whitespace") do |strip| - options.serializer[:strip_whitespace] = strip - end - - opts.on("--[no-]sanitize", "escape unsafe tags") do |sanitize| - options.serializer[:sanitize] = sanitize - end - - opts.separator "" - opts.separator "Output Options:" - - opts.on("--tree", "output as debug tree") do |tree| - options.output = :tree - end - - opts.on("-x", "--xml", "output as xml") do |xml| - options.output = :xml - options.treebuilder = "rexml" - end - - opts.on("--[no-]html", "Output as html") do |html| - options.output = (html ? :html : nil) - end - - opts.on("--hilite", "Output as formatted highlighted code.") do |hilite| - options.output = :hilite - end - - opts.on("-e", "--error", "Print a list of parse errors") do |error| - options.error = error - end - - opts.separator "" - opts.separator "Serialization Options:" - - opts.on("--[no-]omit-optional-tags", "Omit optional tags") do |omit| - options.serializer[:omit_optional_tags] = omit - end - - opts.on("--[no-]quote-attr-values", "Quote attribute values") do |quote| - options.serializer[:quote_attr_values] = quote - end - - opts.on("--[no-]use-best-quote-char", "Use best quote character") do |best| - options.serializer[:use_best_quote_char] = best - end - - opts.on("--quote-char C", "Use specified quote character") do |c| - options.serializer[:quote_char] = c - end - - opts.on("--[no-]minimize-boolean-attributes", "Minimize boolean attributes") do |min| - options.serializer[:minimize_boolean_attributes] = min - end - - opts.on("--[no-]use-trailing-solidus", "Use trailing solidus") do |slash| - options.serializer[:use_trailing_solidus] = slash - end - - opts.on("--[no-]escape-lt-in-attrs", "Escape less than signs in attribute values") do |lt| - options.serializer[:escape_lt_in_attrs] = lt - end - - opts.on("--[no-]escape-rcdata", "Escape rcdata element values") do |rcdata| - options.serializer[:escape_rcdata] = rcdata - end - - opts.separator "" - opts.separator "Other Options:" - - opts.on("-p", "--[no-]profile", "Profile the run") do |profile| - options.profile = profile - end - - opts.on("-t", "--[no-]time", "Time the run") do |time| - options.time = time - end - - opts.on("-c", "--[no-]encoding", "Print character encoding used") do |encoding| - options.encoding = encoding - end - - opts.on_tail("-h", "--help", "Show this message") do - puts opts - exit - end -end - -opts.parse!(ARGV) -parse options, ARGV diff --git a/vendor/plugins/HTML5lib/tests/preamble.rb b/vendor/plugins/HTML5lib/test/preamble.rb similarity index 86% rename from vendor/plugins/HTML5lib/tests/preamble.rb rename to vendor/plugins/HTML5lib/test/preamble.rb index f38a581a..ce4b1297 100644 --- a/vendor/plugins/HTML5lib/tests/preamble.rb +++ b/vendor/plugins/HTML5lib/test/preamble.rb @@ -2,15 +2,14 @@ require 'test/unit' HTML5_BASE = File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__)))) -if File.exists?(File.join(HTML5_BASE, 'testdata')) - TESTDATA_DIR = File.join(HTML5_BASE, 'testdata') +if File.exists?(File.join(HTML5_BASE, 'ruby', 'testdata')) + TESTDATA_DIR = File.join(HTML5_BASE, 'ruby', 'testdata') else - TESTDATA_DIR = File.join(File.dirname(File.dirname(File.expand_path(__FILE__))), 'testdata') + TESTDATA_DIR = File.join(HTML5_BASE, 'testdata') end -# $:.unshift File.join(File.dirname(File.dirname(__FILE__)), 'lib') - -# $:.unshift File.dirname(__FILE__) +$:.unshift File.join(File.dirname(File.dirname(__FILE__)), 'lib') +$:.unshift File.dirname(__FILE__) require 'core_ext/string' diff --git a/vendor/plugins/HTML5lib/test/test_cli.rb b/vendor/plugins/HTML5lib/test/test_cli.rb new file mode 100644 index 00000000..1725ffc4 --- /dev/null +++ b/vendor/plugins/HTML5lib/test/test_cli.rb @@ -0,0 +1,16 @@ +require File.join(File.dirname(__FILE__), 'preamble') +require "html5/cli" + +class TestCli < Test::Unit::TestCase + def test_open_input + assert_equal $stdin, HTML5::CLI.open_input('-') + assert_kind_of StringIO, HTML5::CLI.open_input('http://whatwg.org/') + assert_kind_of File, HTML5::CLI.open_input('testdata/sites/google-results.htm') + end + + def test_parse_opts + HTML5::CLI.parse_opts [] # TODO test defaults + assert_equal 'hpricot', HTML5::CLI.parse_opts(['-b', 'hpricot']).treebuilder + assert_equal 'hpricot', HTML5::CLI.parse_opts(['--treebuilder', 'hpricot']).treebuilder + end +end \ No newline at end of file diff --git a/vendor/plugins/HTML5lib/tests/test_encoding.rb b/vendor/plugins/HTML5lib/test/test_encoding.rb similarity index 100% rename from vendor/plugins/HTML5lib/tests/test_encoding.rb rename to vendor/plugins/HTML5lib/test/test_encoding.rb diff --git a/vendor/plugins/HTML5lib/tests/test_input_stream.rb b/vendor/plugins/HTML5lib/test/test_input_stream.rb similarity index 67% rename from vendor/plugins/HTML5lib/tests/test_input_stream.rb rename to vendor/plugins/HTML5lib/test/test_input_stream.rb index 00cbbac6..6a7d855f 100644 --- a/vendor/plugins/HTML5lib/tests/test_input_stream.rb +++ b/vendor/plugins/HTML5lib/test/test_input_stream.rb @@ -14,4 +14,13 @@ class TestHtml5Inputstream < Test::Unit::TestCase 1022.times{stream.char} assert_equal "i", stream.char end + + def test_chars_until + stream = HTML5::HTMLInputStream.new("aaaaaaab") + assert_equal "aaaaaaa", stream.chars_until("b") + + stream = HTML5::HTMLInputStream.new("aaaaaaab") + assert_equal "aaaaaaab", stream.chars_until("c") + + end end \ No newline at end of file diff --git a/vendor/plugins/HTML5lib/tests/test_lxp.rb b/vendor/plugins/HTML5lib/test/test_lxp.rb similarity index 100% rename from vendor/plugins/HTML5lib/tests/test_lxp.rb rename to vendor/plugins/HTML5lib/test/test_lxp.rb diff --git a/vendor/plugins/HTML5lib/tests/test_parser.rb b/vendor/plugins/HTML5lib/test/test_parser.rb similarity index 83% rename from vendor/plugins/HTML5lib/tests/test_parser.rb rename to vendor/plugins/HTML5lib/test/test_parser.rb index b3c042dc..15764c52 100644 --- a/vendor/plugins/HTML5lib/tests/test_parser.rb +++ b/vendor/plugins/HTML5lib/test/test_parser.rb @@ -12,11 +12,6 @@ begin rescue LoadError end -$CHECK_PARSER_ERRORS = ARGV.delete('-p') # TODO - -puts 'Testing tree builders: ' + $tree_types_to_test * ', ' - - class Html5ParserTestCase < Test::Unit::TestCase include HTML5 include TestSupport @@ -25,8 +20,7 @@ class Html5ParserTestCase < Test::Unit::TestCase test_name = File.basename(test_file).sub('.dat', '') - TestData.new(test_file, %w(data errors document-fragment document)). - each_with_index do |(input, errors, inner_html, expected), index| + TestData.new(test_file, %w(data errors document-fragment document)).each_with_index do |(input, errors, inner_html, expected), index| errors = errors.split("\n") expected = expected.gsub("\n| ","\n")[2..-1] @@ -35,13 +29,13 @@ class Html5ParserTestCase < Test::Unit::TestCase define_method 'test_%s_%d_%s' % [ test_name, index + 1, tree_name ] do parser = HTMLParser.new(:tree => TreeBuilders[tree_name]) - + if inner_html parser.parse_fragment(input, inner_html) else parser.parse(input) end - + actual_output = convertTreeDump(parser.tree.testSerializer(parser.tree.document)) assert_equal sortattrs(expected), sortattrs(actual_output), [ @@ -53,13 +47,13 @@ class Html5ParserTestCase < Test::Unit::TestCase actual_errors = parser.errors.map do |(line, col), message, datavars| 'Line: %i Col: %i %s' % [line, col, E[message] % datavars] end - assert_equal errors.length, parser.errors.length, [ + + assert_equal errors, actual_errors, [ '', 'Input', input, '', "Expected errors (#{errors.length}):", errors.join("\n"), '', "Actual errors (#{actual_errors.length}):", - actual_errors.join("\n") + actual_errors.join("\n") + "\n" ].join("\n") - end end end diff --git a/vendor/plugins/HTML5lib/tests/test_sanitizer.rb b/vendor/plugins/HTML5lib/test/test_sanitizer.rb similarity index 80% rename from vendor/plugins/HTML5lib/tests/test_sanitizer.rb rename to vendor/plugins/HTML5lib/test/test_sanitizer.rb index 375a1a15..d53df925 100644 --- a/vendor/plugins/HTML5lib/tests/test_sanitizer.rb +++ b/vendor/plugins/HTML5lib/test/test_sanitizer.rb @@ -110,6 +110,37 @@ class SanitizeTest < Test::Unit::TestCase end end + HTMLSanitizer::SVG_ALLOW_LOCAL_HREF.each do |tag_name| + next unless HTMLSanitizer::ALLOWED_ELEMENTS.include?(tag_name) + define_method "test_#{tag_name}_should_allow_local_href" do + input = %(<#{tag_name} xlink:href="#foo"/>) + output = "<#{tag_name.downcase} xlink:href='#foo'/>" + xhtmloutput = "<#{tag_name} xlink:href='#foo'>#{tag_name}>" + check_sanitization(input, output, xhtmloutput, xhtmloutput) + end + + define_method "test_#{tag_name}_should_allow_local_href_with_newline" do + input = %(<#{tag_name} xlink:href="\n#foo"/>) + output = "<#{tag_name.downcase} xlink:href='\n#foo'/>" + xhtmloutput = "<#{tag_name} xlink:href='\n#foo'>#{tag_name}>" + check_sanitization(input, output, xhtmloutput, xhtmloutput) + end + + define_method "test_#{tag_name}_should_forbid_nonlocal_href" do + input = %(<#{tag_name} xlink:href="http://bad.com/foo"/>) + output = "<#{tag_name.downcase}/>" + xhtmloutput = "<#{tag_name}>#{tag_name}>" + check_sanitization(input, output, xhtmloutput, xhtmloutput) + end + + define_method "test_#{tag_name}_should_forbid_nonlocal_href_with_newline" do + input = %(<#{tag_name} xlink:href="\nhttp://bad.com/foo"/>) + output = "<#{tag_name.downcase}/>" + xhtmloutput = "<#{tag_name}>#{tag_name}>" + check_sanitization(input, output, xhtmloutput, xhtmloutput) + end + end + def test_should_handle_astral_plane_characters input = "𝒵 𝔸
" output = "\360\235\222\265 \360\235\224\270
" diff --git a/vendor/plugins/HTML5lib/tests/test_serializer.rb b/vendor/plugins/HTML5lib/test/test_serializer.rb similarity index 100% rename from vendor/plugins/HTML5lib/tests/test_serializer.rb rename to vendor/plugins/HTML5lib/test/test_serializer.rb diff --git a/vendor/plugins/HTML5lib/tests/test_sniffer.rb b/vendor/plugins/HTML5lib/test/test_sniffer.rb similarity index 100% rename from vendor/plugins/HTML5lib/tests/test_sniffer.rb rename to vendor/plugins/HTML5lib/test/test_sniffer.rb diff --git a/vendor/plugins/HTML5lib/tests/test_stream.rb b/vendor/plugins/HTML5lib/test/test_stream.rb similarity index 88% rename from vendor/plugins/HTML5lib/tests/test_stream.rb rename to vendor/plugins/HTML5lib/test/test_stream.rb index 2ce4e560..40955bd7 100755 --- a/vendor/plugins/HTML5lib/tests/test_stream.rb +++ b/vendor/plugins/HTML5lib/test/test_stream.rb @@ -42,9 +42,10 @@ class HTMLInputStreamTest < Test::Unit::TestCase require 'iconv' def test_utf_16 - stream = HTMLInputStream.new("\xff\xfe" + " \x00"*1025) - assert(stream.char_encoding, 'utf-16-le') - assert_equal(1025, stream.chars_until(' ',true).length) + input = Iconv.new('utf-16', 'utf-8').iconv(' '*1025) + stream = HTMLInputStream.new(input) + assert('utf-16-le', stream.char_encoding) + assert_equal(1025, stream.chars_until(' ', true).length) end rescue LoadError puts "iconv not found, skipping iconv tests" diff --git a/vendor/plugins/HTML5lib/tests/test_tokenizer.rb b/vendor/plugins/HTML5lib/test/test_tokenizer.rb similarity index 100% rename from vendor/plugins/HTML5lib/tests/test_tokenizer.rb rename to vendor/plugins/HTML5lib/test/test_tokenizer.rb diff --git a/vendor/plugins/HTML5lib/tests/test_treewalkers.rb b/vendor/plugins/HTML5lib/test/test_treewalkers.rb similarity index 100% rename from vendor/plugins/HTML5lib/tests/test_treewalkers.rb rename to vendor/plugins/HTML5lib/test/test_treewalkers.rb diff --git a/vendor/plugins/HTML5lib/tests/test_validator.rb b/vendor/plugins/HTML5lib/test/test_validator.rb similarity index 100% rename from vendor/plugins/HTML5lib/tests/test_validator.rb rename to vendor/plugins/HTML5lib/test/test_validator.rb diff --git a/vendor/plugins/HTML5lib/tests/tokenizer_test_parser.rb b/vendor/plugins/HTML5lib/test/tokenizer_test_parser.rb similarity index 100% rename from vendor/plugins/HTML5lib/tests/tokenizer_test_parser.rb rename to vendor/plugins/HTML5lib/test/tokenizer_test_parser.rb diff --git a/vendor/plugins/HTML5lib/testdata/sanitizer/tests1.dat b/vendor/plugins/HTML5lib/testdata/sanitizer/tests1.dat index 53de66c5..73de161a 100644 --- a/vendor/plugins/HTML5lib/testdata/sanitizer/tests1.dat +++ b/vendor/plugins/HTML5lib/testdata/sanitizer/tests1.dat @@ -433,9 +433,9 @@ { "name": "uri_ref_with_space_in svg_attribute", "input": "", - "rexml": " ", - "xhtml": " ", - "output": " " + "rexml": " ", + "xhtml": " ", + "output": " " }, { diff --git a/vendor/plugins/HTML5lib/testdata/tree-construction/tests4.dat b/vendor/plugins/HTML5lib/testdata/tree-construction/tests4.dat index 26785058..f65133c7 100644 --- a/vendor/plugins/HTML5lib/testdata/tree-construction/tests4.dat +++ b/vendor/plugins/HTML5lib/testdata/tree-construction/tests4.dat @@ -41,7 +41,7 @@ plaintext #data setting html's innerHTML #errors -Line: 1 Col: 24 XXX innerHTML EOF +Line: 1 Col: 24 Unexpected EOF in inner html mode. #document-fragment html #document diff --git a/vendor/plugins/HTML5lib/testdata/tree-construction/tests6.dat b/vendor/plugins/HTML5lib/testdata/tree-construction/tests6.dat index 24115123..bf8a0a39 100644 --- a/vendor/plugins/HTML5lib/testdata/tree-construction/tests6.dat +++ b/vendor/plugins/HTML5lib/testdata/tree-construction/tests6.dat @@ -608,4 +608,25 @@ Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE. | | | -| \ No newline at end of file +| + +#data + +#document-fragment +html +#errors +Line: 1 Col: 20 Unexpected html end tag in inner html mode. +Line: 1 Col: 20 Unexpected EOF in inner html mode. +#document +| +| + +#data + +#errors +Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE. +#document +| +| +|