Merged with trunk.
This commit is contained in:
commit
7dbf8be706
|
@ -100,6 +100,7 @@
|
||||||
\re@DeclareMathSymbol{\nwArr}{\mathrel}{symbolsC}{118}
|
\re@DeclareMathSymbol{\nwArr}{\mathrel}{symbolsC}{118}
|
||||||
\re@DeclareMathSymbol{\swArrow}{\mathrel}{symbolsC}{119}
|
\re@DeclareMathSymbol{\swArrow}{\mathrel}{symbolsC}{119}
|
||||||
\re@DeclareMathSymbol{\swArr}{\mathrel}{symbolsC}{119}
|
\re@DeclareMathSymbol{\swArr}{\mathrel}{symbolsC}{119}
|
||||||
|
\re@DeclareMathSymbol{\nequiv}{\mathrel}{symbolsC}{46}
|
||||||
\makeatother
|
\makeatother
|
||||||
|
|
||||||
% Widecheck
|
% Widecheck
|
||||||
|
|
|
@ -9,6 +9,10 @@ require 'uri/common'
|
||||||
module Chunk
|
module Chunk
|
||||||
class Abstract
|
class Abstract
|
||||||
|
|
||||||
|
# Rails's default utf-8 support causes problems here. So, for Chunk::Abstract class, turn off
|
||||||
|
# multibyte character support.
|
||||||
|
$KCODE = 'iso-8859-1'
|
||||||
|
|
||||||
# automatically construct the array of derivatives of Chunk::Abstract
|
# automatically construct the array of derivatives of Chunk::Abstract
|
||||||
@derivatives = []
|
@derivatives = []
|
||||||
|
|
||||||
|
|
|
@ -767,6 +767,7 @@ class WikiControllerTest < Test::Unit::TestCase
|
||||||
\re@DeclareMathSymbol{\nwArr}{\mathrel}{symbolsC}{118}
|
\re@DeclareMathSymbol{\nwArr}{\mathrel}{symbolsC}{118}
|
||||||
\re@DeclareMathSymbol{\swArrow}{\mathrel}{symbolsC}{119}
|
\re@DeclareMathSymbol{\swArrow}{\mathrel}{symbolsC}{119}
|
||||||
\re@DeclareMathSymbol{\swArr}{\mathrel}{symbolsC}{119}
|
\re@DeclareMathSymbol{\swArr}{\mathrel}{symbolsC}{119}
|
||||||
|
\re@DeclareMathSymbol{\nequiv}{\mathrel}{symbolsC}{46}
|
||||||
\makeatother
|
\makeatother
|
||||||
|
|
||||||
% Widecheck
|
% Widecheck
|
||||||
|
|
60
vendor/plugins/HTML5lib/Manifest.txt
vendored
60
vendor/plugins/HTML5lib/Manifest.txt
vendored
|
@ -2,12 +2,18 @@ History.txt
|
||||||
Manifest.txt
|
Manifest.txt
|
||||||
README
|
README
|
||||||
Rakefile.rb
|
Rakefile.rb
|
||||||
|
bin/html5
|
||||||
|
lib/core_ext/string.rb
|
||||||
lib/html5.rb
|
lib/html5.rb
|
||||||
lib/html5/constants.rb
|
lib/html5/constants.rb
|
||||||
lib/html5/filters/base.rb
|
lib/html5/filters/base.rb
|
||||||
lib/html5/filters/inject_meta_charset.rb
|
lib/html5/filters/inject_meta_charset.rb
|
||||||
|
lib/html5/filters/iso639codes.rb
|
||||||
lib/html5/filters/optionaltags.rb
|
lib/html5/filters/optionaltags.rb
|
||||||
|
lib/html5/filters/rfc2046.rb
|
||||||
|
lib/html5/filters/rfc3987.rb
|
||||||
lib/html5/filters/sanitizer.rb
|
lib/html5/filters/sanitizer.rb
|
||||||
|
lib/html5/filters/validator.rb
|
||||||
lib/html5/filters/whitespace.rb
|
lib/html5/filters/whitespace.rb
|
||||||
lib/html5/html5parser.rb
|
lib/html5/html5parser.rb
|
||||||
lib/html5/html5parser/after_body_phase.rb
|
lib/html5/html5parser/after_body_phase.rb
|
||||||
|
@ -34,6 +40,7 @@ lib/html5/sanitizer.rb
|
||||||
lib/html5/serializer.rb
|
lib/html5/serializer.rb
|
||||||
lib/html5/serializer/htmlserializer.rb
|
lib/html5/serializer/htmlserializer.rb
|
||||||
lib/html5/serializer/xhtmlserializer.rb
|
lib/html5/serializer/xhtmlserializer.rb
|
||||||
|
lib/html5/sniffer.rb
|
||||||
lib/html5/tokenizer.rb
|
lib/html5/tokenizer.rb
|
||||||
lib/html5/treebuilders.rb
|
lib/html5/treebuilders.rb
|
||||||
lib/html5/treebuilders/base.rb
|
lib/html5/treebuilders/base.rb
|
||||||
|
@ -46,14 +53,65 @@ lib/html5/treewalkers/hpricot.rb
|
||||||
lib/html5/treewalkers/rexml.rb
|
lib/html5/treewalkers/rexml.rb
|
||||||
lib/html5/treewalkers/simpletree.rb
|
lib/html5/treewalkers/simpletree.rb
|
||||||
lib/html5/version.rb
|
lib/html5/version.rb
|
||||||
parse.rb
|
testdata/encoding/chardet/test_big5.txt
|
||||||
|
testdata/encoding/test-yahoo-jp.dat
|
||||||
|
testdata/encoding/tests1.dat
|
||||||
|
testdata/encoding/tests2.dat
|
||||||
|
testdata/sanitizer/tests1.dat
|
||||||
|
testdata/serializer/core.test
|
||||||
|
testdata/serializer/injectmeta.test
|
||||||
|
testdata/serializer/optionaltags.test
|
||||||
|
testdata/serializer/options.test
|
||||||
|
testdata/serializer/whitespace.test
|
||||||
|
testdata/sites/google-results.htm
|
||||||
|
testdata/sites/python-ref-import.htm
|
||||||
|
testdata/sites/web-apps-old.htm
|
||||||
|
testdata/sites/web-apps.htm
|
||||||
|
testdata/sniffer/htmlOrFeed.json
|
||||||
|
testdata/tokenizer/contentModelFlags.test
|
||||||
|
testdata/tokenizer/entities.test
|
||||||
|
testdata/tokenizer/escapeFlag.test
|
||||||
|
testdata/tokenizer/test1.test
|
||||||
|
testdata/tokenizer/test2.test
|
||||||
|
testdata/tokenizer/test3.test
|
||||||
|
testdata/tokenizer/test4.test
|
||||||
|
testdata/tree-construction/tests1.dat
|
||||||
|
testdata/tree-construction/tests2.dat
|
||||||
|
testdata/tree-construction/tests3.dat
|
||||||
|
testdata/tree-construction/tests4.dat
|
||||||
|
testdata/tree-construction/tests5.dat
|
||||||
|
testdata/tree-construction/tests6.dat
|
||||||
|
testdata/validator/attributes.test
|
||||||
|
testdata/validator/base-href-attribute.test
|
||||||
|
testdata/validator/base-target-attribute.test
|
||||||
|
testdata/validator/blockquote-cite-attribute.test
|
||||||
|
testdata/validator/classattribute.test
|
||||||
|
testdata/validator/contenteditableattribute.test
|
||||||
|
testdata/validator/contextmenuattribute.test
|
||||||
|
testdata/validator/dirattribute.test
|
||||||
|
testdata/validator/draggableattribute.test
|
||||||
|
testdata/validator/html-xmlns-attribute.test
|
||||||
|
testdata/validator/idattribute.test
|
||||||
|
testdata/validator/inputattributes.test
|
||||||
|
testdata/validator/irrelevantattribute.test
|
||||||
|
testdata/validator/langattribute.test
|
||||||
|
testdata/validator/li-value-attribute.test
|
||||||
|
testdata/validator/link-href-attribute.test
|
||||||
|
testdata/validator/link-hreflang-attribute.test
|
||||||
|
testdata/validator/link-rel-attribute.test
|
||||||
|
testdata/validator/ol-start-attribute.test
|
||||||
|
testdata/validator/starttags.test
|
||||||
|
testdata/validator/style-scoped-attribute.test
|
||||||
|
testdata/validator/tabindexattribute.test
|
||||||
tests/preamble.rb
|
tests/preamble.rb
|
||||||
tests/test_encoding.rb
|
tests/test_encoding.rb
|
||||||
tests/test_lxp.rb
|
tests/test_lxp.rb
|
||||||
tests/test_parser.rb
|
tests/test_parser.rb
|
||||||
tests/test_sanitizer.rb
|
tests/test_sanitizer.rb
|
||||||
tests/test_serializer.rb
|
tests/test_serializer.rb
|
||||||
|
tests/test_sniffer.rb
|
||||||
tests/test_stream.rb
|
tests/test_stream.rb
|
||||||
tests/test_tokenizer.rb
|
tests/test_tokenizer.rb
|
||||||
tests/test_treewalkers.rb
|
tests/test_treewalkers.rb
|
||||||
|
tests/test_validator.rb
|
||||||
tests/tokenizer_test_parser.rb
|
tests/tokenizer_test_parser.rb
|
||||||
|
|
8
vendor/plugins/HTML5lib/Rakefile.rb
vendored
8
vendor/plugins/HTML5lib/Rakefile.rb
vendored
|
@ -18,16 +18,16 @@ end
|
||||||
|
|
||||||
require 'rcov/rcovtask'
|
require 'rcov/rcovtask'
|
||||||
|
|
||||||
namespace :test do
|
namespace :test do
|
||||||
namespace :coverage do
|
namespace :coverage do
|
||||||
desc "Delete aggregate coverage data."
|
desc "Delete aggregate coverage data."
|
||||||
task(:clean) { rm_f "coverage.data" }
|
task(:clean) { rm_f "coverage.data" }
|
||||||
end
|
end
|
||||||
desc 'Aggregate code coverage for unit, functional and integration tests'
|
desc 'Aggregate code coverage for unit, functional and integration tests'
|
||||||
Rcov::RcovTask.new(:coverage => "test:coverage:clean") do |t|
|
Rcov::RcovTask.new(:coverage => "test:coverage:clean") do |t|
|
||||||
t.libs << "tests"
|
t.libs << "test"
|
||||||
t.test_files = FileList["tests/test_*.rb"]
|
t.test_files = FileList["test/test_*.rb"]
|
||||||
t.output_dir = "tests/coverage/"
|
t.output_dir = "test/coverage/"
|
||||||
t.verbose = true
|
t.verbose = true
|
||||||
end
|
end
|
||||||
end
|
end
|
216
vendor/plugins/HTML5lib/bin/html5
vendored
216
vendor/plugins/HTML5lib/bin/html5
vendored
|
@ -1,217 +1,5 @@
|
||||||
#!/usr/bin/env ruby
|
#!/usr/bin/env ruby
|
||||||
|
|
||||||
require 'core_ext/string'
|
require 'html5/cli'
|
||||||
$:.unshift File.dirname(__FILE__), 'lib'
|
|
||||||
|
|
||||||
def parse(opts, args)
|
HTML5::CLI.run
|
||||||
encoding = nil
|
|
||||||
|
|
||||||
f = args[-1]
|
|
||||||
if f
|
|
||||||
begin
|
|
||||||
if f[0..6] == 'http://'
|
|
||||||
require 'open-uri'
|
|
||||||
f = URI.parse(f).open
|
|
||||||
encoding = f.charset
|
|
||||||
elsif f == '-'
|
|
||||||
f = $stdin
|
|
||||||
else
|
|
||||||
f = open(f)
|
|
||||||
end
|
|
||||||
rescue
|
|
||||||
end
|
|
||||||
else
|
|
||||||
$stderr.write("No filename provided. Use -h for help\n")
|
|
||||||
exit(1)
|
|
||||||
end
|
|
||||||
|
|
||||||
require 'html5/treebuilders'
|
|
||||||
treebuilder = HTML5::TreeBuilders[opts.treebuilder]
|
|
||||||
|
|
||||||
if opts.output == :xml
|
|
||||||
require 'html5/liberalxmlparser'
|
|
||||||
p = HTML5::XMLParser.new(:tree=>treebuilder)
|
|
||||||
else
|
|
||||||
require 'html5/html5parser'
|
|
||||||
p = HTML5::HTMLParser.new(:tree=>treebuilder)
|
|
||||||
end
|
|
||||||
|
|
||||||
if opts.parsemethod == :parse
|
|
||||||
args = [f, encoding]
|
|
||||||
else
|
|
||||||
args = [f, (opts.container || 'div'), encoding]
|
|
||||||
end
|
|
||||||
|
|
||||||
if opts.profile
|
|
||||||
require 'profiler'
|
|
||||||
Profiler__::start_profile
|
|
||||||
p.send(opts.parsemethod, *args)
|
|
||||||
Profiler__::stop_profile
|
|
||||||
Profiler__::print_profile($stderr)
|
|
||||||
elsif opts.time
|
|
||||||
require 'time' # TODO: switch to benchmark
|
|
||||||
t0 = Time.new
|
|
||||||
document = p.send(opts.parsemethod, *args)
|
|
||||||
t1 = Time.new
|
|
||||||
print_output(p, document, opts)
|
|
||||||
t2 = Time.new
|
|
||||||
puts "\n\nRun took: %fs (plus %fs to print the output)"%[t1-t0, t2-t1]
|
|
||||||
else
|
|
||||||
document = p.send(opts.parsemethod, *args)
|
|
||||||
print_output(p, document, opts)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def print_output(parser, document, opts)
|
|
||||||
puts "Encoding: #{parser.tokenizer.stream.char_encoding}" if opts.encoding
|
|
||||||
|
|
||||||
case opts.output
|
|
||||||
when :xml
|
|
||||||
print document
|
|
||||||
when :html
|
|
||||||
require 'html5/treewalkers'
|
|
||||||
tokens = HTML5::TreeWalkers[opts.treebuilder].new(document)
|
|
||||||
require 'html5/serializer'
|
|
||||||
puts HTML5::HTMLSerializer.serialize(tokens, opts.serializer)
|
|
||||||
when :hilite
|
|
||||||
print document.hilite
|
|
||||||
when :tree
|
|
||||||
document = [document] unless document.respond_to?(:each)
|
|
||||||
document.each {|fragment| puts parser.tree.testSerializer(fragment)}
|
|
||||||
end
|
|
||||||
|
|
||||||
if opts.error
|
|
||||||
errList=[]
|
|
||||||
for pos, errorcode, datavars in parser.errors
|
|
||||||
errList << "Line #{pos[0]} Col #{pos[1]} " + (HTML5::E[errorcode] || "Unknown error \"#{errorcode}\"") % datavars
|
|
||||||
end
|
|
||||||
$stdout.write("\nParse errors:\n" + errList.join("\n")+"\n")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
require 'ostruct'
|
|
||||||
options = OpenStruct.new
|
|
||||||
options.profile = false
|
|
||||||
options.time = false
|
|
||||||
options.output = :html
|
|
||||||
options.treebuilder = 'simpletree'
|
|
||||||
options.error = false
|
|
||||||
options.encoding = false
|
|
||||||
options.parsemethod = :parse
|
|
||||||
options.serializer = {
|
|
||||||
:encoding => 'utf-8',
|
|
||||||
:omit_optional_tags => false,
|
|
||||||
:inject_meta_charset => false
|
|
||||||
}
|
|
||||||
|
|
||||||
require 'optparse'
|
|
||||||
opts = OptionParser.new do |opts|
|
|
||||||
opts.separator ""
|
|
||||||
opts.separator "Parse Options:"
|
|
||||||
|
|
||||||
opts.on("-b", "--treebuilder NAME") do |treebuilder|
|
|
||||||
options.treebuilder = treebuilder
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("-f", "--fragment CONTAINER", "Parse as a fragment") do |container|
|
|
||||||
options.parsemethod = :parse_fragment
|
|
||||||
options.container = container if container
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.separator ""
|
|
||||||
opts.separator "Filter Options:"
|
|
||||||
|
|
||||||
opts.on("--[no-]inject-meta-charset", "inject <meta charset>") do |inject|
|
|
||||||
options.serializer[:inject_meta_charset] = inject
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("--[no-]strip-whitespace", "strip unnecessary whitespace") do |strip|
|
|
||||||
options.serializer[:strip_whitespace] = strip
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("--[no-]sanitize", "escape unsafe tags") do |sanitize|
|
|
||||||
options.serializer[:sanitize] = sanitize
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.separator ""
|
|
||||||
opts.separator "Output Options:"
|
|
||||||
|
|
||||||
opts.on("--tree", "output as debug tree") do |tree|
|
|
||||||
options.output = :tree
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("-x", "--xml", "output as xml") do |xml|
|
|
||||||
options.output = :xml
|
|
||||||
options.treebuilder = "rexml"
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("--[no-]html", "Output as html") do |html|
|
|
||||||
options.output = (html ? :html : nil)
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("--hilite", "Output as formatted highlighted code.") do |hilite|
|
|
||||||
options.output = :hilite
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("-e", "--error", "Print a list of parse errors") do |error|
|
|
||||||
options.error = error
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.separator ""
|
|
||||||
opts.separator "Serialization Options:"
|
|
||||||
|
|
||||||
opts.on("--[no-]omit-optional-tags", "Omit optional tags") do |omit|
|
|
||||||
options.serializer[:omit_optional_tags] = omit
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("--[no-]quote-attr-values", "Quote attribute values") do |quote|
|
|
||||||
options.serializer[:quote_attr_values] = quote
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("--[no-]use-best-quote-char", "Use best quote character") do |best|
|
|
||||||
options.serializer[:use_best_quote_char] = best
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("--quote-char C", "Use specified quote character") do |c|
|
|
||||||
options.serializer[:quote_char] = c
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("--[no-]minimize-boolean-attributes", "Minimize boolean attributes") do |min|
|
|
||||||
options.serializer[:minimize_boolean_attributes] = min
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("--[no-]use-trailing-solidus", "Use trailing solidus") do |slash|
|
|
||||||
options.serializer[:use_trailing_solidus] = slash
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("--[no-]escape-lt-in-attrs", "Escape less than signs in attribute values") do |lt|
|
|
||||||
options.serializer[:escape_lt_in_attrs] = lt
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("--[no-]escape-rcdata", "Escape rcdata element values") do |rcdata|
|
|
||||||
options.serializer[:escape_rcdata] = rcdata
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.separator ""
|
|
||||||
opts.separator "Other Options:"
|
|
||||||
|
|
||||||
opts.on("-p", "--[no-]profile", "Profile the run") do |profile|
|
|
||||||
options.profile = profile
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("-t", "--[no-]time", "Time the run") do |time|
|
|
||||||
options.time = time
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("-c", "--[no-]encoding", "Print character encoding used") do |encoding|
|
|
||||||
options.encoding = encoding
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on_tail("-h", "--help", "Show this message") do
|
|
||||||
puts opts
|
|
||||||
exit
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.parse!(ARGV)
|
|
||||||
parse options, ARGV
|
|
2
vendor/plugins/HTML5lib/lib/html5.rb
vendored
2
vendor/plugins/HTML5lib/lib/html5.rb
vendored
|
@ -8,6 +8,6 @@ module HTML5
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.parse_fragment(stream, options={})
|
def self.parse_fragment(stream, options={})
|
||||||
HTMLParser.parse(stream, options)
|
HTMLParser.parse_fragment(stream, options)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
231
vendor/plugins/HTML5lib/lib/html5/cli.rb
vendored
Normal file
231
vendor/plugins/HTML5lib/lib/html5/cli.rb
vendored
Normal file
|
@ -0,0 +1,231 @@
|
||||||
|
$:.unshift File.dirname(__FILE__), 'lib'
|
||||||
|
require 'html5'
|
||||||
|
require 'core_ext/string'
|
||||||
|
require 'ostruct'
|
||||||
|
require 'optparse'
|
||||||
|
|
||||||
|
module HTML5::CLI
|
||||||
|
|
||||||
|
def self.parse_opts argv
|
||||||
|
options = OpenStruct.new
|
||||||
|
options.profile = false
|
||||||
|
options.time = false
|
||||||
|
options.output = :html
|
||||||
|
options.treebuilder = 'simpletree'
|
||||||
|
options.error = false
|
||||||
|
options.encoding = false
|
||||||
|
options.parsemethod = :parse
|
||||||
|
options.serializer = {
|
||||||
|
:encoding => 'utf-8',
|
||||||
|
:omit_optional_tags => false,
|
||||||
|
:inject_meta_charset => false
|
||||||
|
}
|
||||||
|
|
||||||
|
opts = OptionParser.new do |opts|
|
||||||
|
opts.separator ""
|
||||||
|
opts.separator "Parse Options:"
|
||||||
|
|
||||||
|
opts.on("-b", "--treebuilder NAME") do |treebuilder|
|
||||||
|
options.treebuilder = treebuilder
|
||||||
|
end
|
||||||
|
|
||||||
|
opts.on("-f", "--fragment CONTAINER", "Parse as a fragment") do |container|
|
||||||
|
options.parsemethod = :parse_fragment
|
||||||
|
options.container = container if container
|
||||||
|
end
|
||||||
|
|
||||||
|
opts.separator ""
|
||||||
|
opts.separator "Filter Options:"
|
||||||
|
|
||||||
|
opts.on("--[no-]inject-meta-charset", "inject <meta charset>") do |inject|
|
||||||
|
options.serializer[:inject_meta_charset] = inject
|
||||||
|
end
|
||||||
|
|
||||||
|
opts.on("--[no-]strip-whitespace", "strip unnecessary whitespace") do |strip|
|
||||||
|
options.serializer[:strip_whitespace] = strip
|
||||||
|
end
|
||||||
|
|
||||||
|
opts.on("--[no-]sanitize", "escape unsafe tags") do |sanitize|
|
||||||
|
options.serializer[:sanitize] = sanitize
|
||||||
|
end
|
||||||
|
|
||||||
|
opts.separator ""
|
||||||
|
opts.separator "Output Options:"
|
||||||
|
|
||||||
|
opts.on("--tree", "output as debug tree") do |tree|
|
||||||
|
options.output = :tree
|
||||||
|
end
|
||||||
|
|
||||||
|
opts.on("-x", "--xml", "output as xml") do |xml|
|
||||||
|
options.output = :xml
|
||||||
|
options.treebuilder = "rexml"
|
||||||
|
end
|
||||||
|
|
||||||
|
opts.on("--[no-]html", "Output as html") do |html|
|
||||||
|
options.output = (html ? :html : nil)
|
||||||
|
end
|
||||||
|
|
||||||
|
opts.on("--hilite", "Output as formatted highlighted code.") do |hilite|
|
||||||
|
options.output = :hilite
|
||||||
|
end
|
||||||
|
|
||||||
|
opts.on("-e", "--error", "Print a list of parse errors") do |error|
|
||||||
|
options.error = error
|
||||||
|
end
|
||||||
|
|
||||||
|
opts.separator ""
|
||||||
|
opts.separator "Serialization Options:"
|
||||||
|
|
||||||
|
opts.on("--[no-]omit-optional-tags", "Omit optional tags") do |omit|
|
||||||
|
options.serializer[:omit_optional_tags] = omit
|
||||||
|
end
|
||||||
|
|
||||||
|
opts.on("--[no-]quote-attr-values", "Quote attribute values") do |quote|
|
||||||
|
options.serializer[:quote_attr_values] = quote
|
||||||
|
end
|
||||||
|
|
||||||
|
opts.on("--[no-]use-best-quote-char", "Use best quote character") do |best|
|
||||||
|
options.serializer[:use_best_quote_char] = best
|
||||||
|
end
|
||||||
|
|
||||||
|
opts.on("--quote-char C", "Use specified quote character") do |c|
|
||||||
|
options.serializer[:quote_char] = c
|
||||||
|
end
|
||||||
|
|
||||||
|
opts.on("--[no-]minimize-boolean-attributes", "Minimize boolean attributes") do |min|
|
||||||
|
options.serializer[:minimize_boolean_attributes] = min
|
||||||
|
end
|
||||||
|
|
||||||
|
opts.on("--[no-]use-trailing-solidus", "Use trailing solidus") do |slash|
|
||||||
|
options.serializer[:use_trailing_solidus] = slash
|
||||||
|
end
|
||||||
|
|
||||||
|
opts.on("--[no-]escape-lt-in-attrs", "Escape less than signs in attribute values") do |lt|
|
||||||
|
options.serializer[:escape_lt_in_attrs] = lt
|
||||||
|
end
|
||||||
|
|
||||||
|
opts.on("--[no-]escape-rcdata", "Escape rcdata element values") do |rcdata|
|
||||||
|
options.serializer[:escape_rcdata] = rcdata
|
||||||
|
end
|
||||||
|
|
||||||
|
opts.separator ""
|
||||||
|
opts.separator "Other Options:"
|
||||||
|
|
||||||
|
opts.on("-p", "--[no-]profile", "Profile the run") do |profile|
|
||||||
|
options.profile = profile
|
||||||
|
end
|
||||||
|
|
||||||
|
opts.on("-t", "--[no-]time", "Time the run") do |time|
|
||||||
|
options.time = time
|
||||||
|
end
|
||||||
|
|
||||||
|
opts.on("-c", "--[no-]encoding", "Print character encoding used") do |encoding|
|
||||||
|
options.encoding = encoding
|
||||||
|
end
|
||||||
|
|
||||||
|
opts.on_tail("-h", "--help", "Show this message") do
|
||||||
|
puts opts
|
||||||
|
exit
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
end
|
||||||
|
opts.parse!(argv)
|
||||||
|
options
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.open_input f
|
||||||
|
if f
|
||||||
|
begin
|
||||||
|
if f[0..6] == 'http://'
|
||||||
|
require 'open-uri'
|
||||||
|
f = URI.parse(f).open
|
||||||
|
encoding = f.charset
|
||||||
|
elsif f == '-'
|
||||||
|
f = $stdin
|
||||||
|
else
|
||||||
|
f = open(f)
|
||||||
|
end
|
||||||
|
rescue
|
||||||
|
end
|
||||||
|
else
|
||||||
|
$stderr.write("No filename provided. Use -h for help\n")
|
||||||
|
exit(1)
|
||||||
|
end
|
||||||
|
f
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.parse(opts, args)
|
||||||
|
encoding = nil
|
||||||
|
|
||||||
|
f = open_input args.last
|
||||||
|
|
||||||
|
require 'html5/treebuilders'
|
||||||
|
treebuilder = HTML5::TreeBuilders[opts.treebuilder]
|
||||||
|
|
||||||
|
if opts.output == :xml
|
||||||
|
require 'html5/liberalxmlparser'
|
||||||
|
p = HTML5::XMLParser.new(:tree=>treebuilder)
|
||||||
|
else
|
||||||
|
require 'html5/html5parser'
|
||||||
|
p = HTML5::HTMLParser.new(:tree=>treebuilder)
|
||||||
|
end
|
||||||
|
|
||||||
|
if opts.parsemethod == :parse
|
||||||
|
args = [f, encoding]
|
||||||
|
else
|
||||||
|
args = [f, (opts.container || 'div'), encoding]
|
||||||
|
end
|
||||||
|
|
||||||
|
if opts.profile
|
||||||
|
require 'profiler'
|
||||||
|
Profiler__::start_profile
|
||||||
|
p.send(opts.parsemethod, *args)
|
||||||
|
Profiler__::stop_profile
|
||||||
|
Profiler__::print_profile($stderr)
|
||||||
|
elsif opts.time
|
||||||
|
require 'time' # TODO: switch to benchmark
|
||||||
|
t0 = Time.new
|
||||||
|
document = p.send(opts.parsemethod, *args)
|
||||||
|
t1 = Time.new
|
||||||
|
print_output(p, document, opts)
|
||||||
|
t2 = Time.new
|
||||||
|
puts "\n\nRun took: %fs (plus %fs to print the output)"%[t1-t0, t2-t1]
|
||||||
|
else
|
||||||
|
document = p.send(opts.parsemethod, *args)
|
||||||
|
print_output(p, document, opts)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.print_output(parser, document, opts)
|
||||||
|
puts "Encoding: #{parser.tokenizer.stream.char_encoding}" if opts.encoding
|
||||||
|
|
||||||
|
case opts.output
|
||||||
|
when :xml
|
||||||
|
print document
|
||||||
|
when :html
|
||||||
|
require 'html5/treewalkers'
|
||||||
|
tokens = HTML5::TreeWalkers[opts.treebuilder].new(document)
|
||||||
|
require 'html5/serializer'
|
||||||
|
puts HTML5::HTMLSerializer.serialize(tokens, opts.serializer)
|
||||||
|
when :hilite
|
||||||
|
print document.hilite
|
||||||
|
when :tree
|
||||||
|
document = [document] unless document.respond_to?(:each)
|
||||||
|
document.each {|fragment| puts parser.tree.testSerializer(fragment)}
|
||||||
|
end
|
||||||
|
|
||||||
|
if opts.error
|
||||||
|
errList=[]
|
||||||
|
for pos, errorcode, datavars in parser.errors
|
||||||
|
errList << "Line #{pos[0]} Col #{pos[1]} " + (HTML5::E[errorcode] || "Unknown error \"#{errorcode}\"") % datavars
|
||||||
|
end
|
||||||
|
$stdout.write("\nParse errors:\n" + errList.join("\n")+"\n")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.run
|
||||||
|
options = parse_opts ARGV
|
||||||
|
parse options, ARGV
|
||||||
|
end
|
||||||
|
end
|
|
@ -908,7 +908,7 @@ module HTML5
|
||||||
"eof-in-bogus-doctype" =>
|
"eof-in-bogus-doctype" =>
|
||||||
_("Unexpected end of file in bogus doctype."),
|
_("Unexpected end of file in bogus doctype."),
|
||||||
"eof-in-innerhtml" =>
|
"eof-in-innerhtml" =>
|
||||||
_("XXX innerHTML EOF"),
|
_("Unexpected EOF in inner html mode."),
|
||||||
"unexpected-doctype" =>
|
"unexpected-doctype" =>
|
||||||
_("Unexpected DOCTYPE. Ignored."),
|
_("Unexpected DOCTYPE. Ignored."),
|
||||||
"non-html-root" =>
|
"non-html-root" =>
|
||||||
|
@ -1040,7 +1040,8 @@ module HTML5
|
||||||
_("Unexpected end tag (%(name))" +
|
_("Unexpected end tag (%(name))" +
|
||||||
". Expected end of file."),
|
". Expected end of file."),
|
||||||
"unexpected-end-table-in-caption" =>
|
"unexpected-end-table-in-caption" =>
|
||||||
_("Unexpected end table tag in caption. Generates implied end caption.")
|
_("Unexpected end table tag in caption. Generates implied end caption."),
|
||||||
|
"end-html-in-innerhtml" => _("Unexpected html end tag in inner html mode.")
|
||||||
}
|
}
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
1
vendor/plugins/HTML5lib/lib/html5/filters.rb
vendored
1
vendor/plugins/HTML5lib/lib/html5/filters.rb
vendored
|
@ -1 +0,0 @@
|
||||||
require 'html5/filters/optionaltags'
|
|
|
@ -25,7 +25,7 @@ module HTML5
|
||||||
|
|
||||||
def endTagHtml(name)
|
def endTagHtml(name)
|
||||||
if @parser.inner_html
|
if @parser.inner_html
|
||||||
parse_error
|
parse_error "end-html-in-innerhtml"
|
||||||
else
|
else
|
||||||
# XXX: This may need to be done, not sure
|
# XXX: This may need to be done, not sure
|
||||||
# Don't set last_phase to the current phase but to the inBody phase
|
# Don't set last_phase to the current phase but to the inBody phase
|
||||||
|
|
|
@ -51,34 +51,22 @@ module HTML5
|
||||||
super(parser, tree)
|
super(parser, tree)
|
||||||
|
|
||||||
# for special handling of whitespace in <pre>
|
# for special handling of whitespace in <pre>
|
||||||
if $-w
|
silence do
|
||||||
$-w = false
|
class << self
|
||||||
class << self; alias processSpaceCharactersNonPre processSpaceCharacters; end
|
alias processSpaceCharactersNonPre processSpaceCharacters
|
||||||
$-w = true
|
end
|
||||||
else
|
|
||||||
class << self; alias processSpaceCharactersNonPre processSpaceCharacters; end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def processSpaceCharactersDropNewline(data)
|
def processSpaceCharactersDropNewline(data)
|
||||||
# #Sometimes (start of <pre> blocks) we want to drop leading newlines
|
# #Sometimes (start of <pre> blocks) we want to drop leading newlines
|
||||||
|
|
||||||
if $-w
|
class << self
|
||||||
$-w = false
|
silence do
|
||||||
class << self
|
alias processSpaceCharacters processSpaceCharactersNonPre
|
||||||
silence do
|
|
||||||
alias processSpaceCharacters processSpaceCharactersNonPre
|
|
||||||
end
|
|
||||||
end
|
|
||||||
$-w = true
|
|
||||||
else
|
|
||||||
class << self
|
|
||||||
silence do
|
|
||||||
alias processSpaceCharacters processSpaceCharactersNonPre
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
if (data.length > 0 and data[0] == ?\n &&
|
if (data.length > 0 and data[0] == ?\n &&
|
||||||
%w[pre textarea].include?(@tree.open_elements.last.name) && !@tree.open_elements.last.hasContent)
|
%w[pre textarea].include?(@tree.open_elements.last.name) && !@tree.open_elements.last.hasContent)
|
||||||
data = data[1..-1]
|
data = data[1..-1]
|
||||||
|
@ -376,16 +364,6 @@ module HTML5
|
||||||
end
|
end
|
||||||
|
|
||||||
def endTagBlock(name)
|
def endTagBlock(name)
|
||||||
#Put us back in the right whitespace handling mode
|
|
||||||
if name == 'pre'
|
|
||||||
class << self;
|
|
||||||
silence do
|
|
||||||
alias processSpaceCharacters processSpaceCharactersNonPre;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
@tree.generateImpliedEndTags if in_scope?(name)
|
@tree.generateImpliedEndTags if in_scope?(name)
|
||||||
|
|
||||||
unless @tree.open_elements.last.name == name
|
unless @tree.open_elements.last.name == name
|
||||||
|
|
|
@ -144,7 +144,7 @@ module HTML5
|
||||||
|
|
||||||
def remove_open_elements_until(name=nil)
|
def remove_open_elements_until(name=nil)
|
||||||
finished = false
|
finished = false
|
||||||
until finished
|
until finished || @tree.open_elements.length == 0
|
||||||
element = @tree.open_elements.pop
|
element = @tree.open_elements.pop
|
||||||
finished = name.nil? ? yield(element) : element.name == name
|
finished = name.nil? ? yield(element) : element.name == name
|
||||||
end
|
end
|
||||||
|
|
11
vendor/plugins/HTML5lib/lib/html5/sanitizer.rb
vendored
11
vendor/plugins/HTML5lib/lib/html5/sanitizer.rb
vendored
|
@ -78,8 +78,12 @@ module HTML5
|
||||||
|
|
||||||
ATTR_VAL_IS_URI = %w[href src cite action longdesc xlink:href xml:base]
|
ATTR_VAL_IS_URI = %w[href src cite action longdesc xlink:href xml:base]
|
||||||
|
|
||||||
SVG_ATTR_VAL_ALLOWS_REF = %w[clip-path fill filter marker marker-start
|
SVG_ATTR_VAL_ALLOWS_REF = %w[clip-path color-profile cursor fill
|
||||||
marker-mid marker-end mask stroke textpath]
|
filter marker marker-start marker-mid marker-end mask stroke]
|
||||||
|
|
||||||
|
SVG_ALLOW_LOCAL_HREF = %w[altGlyph animate animateColor animateMotion
|
||||||
|
animateTransform cursor feImage filter linearGradient pattern
|
||||||
|
radialGradient textpath tref set use]
|
||||||
|
|
||||||
ACCEPTABLE_CSS_PROPERTIES = %w[azimuth background-color
|
ACCEPTABLE_CSS_PROPERTIES = %w[azimuth background-color
|
||||||
border-bottom-color border-collapse border-color border-left-color
|
border-bottom-color border-collapse border-color border-left-color
|
||||||
|
@ -127,6 +131,9 @@ module HTML5
|
||||||
SVG_ATTR_VAL_ALLOWS_REF.each do |attr|
|
SVG_ATTR_VAL_ALLOWS_REF.each do |attr|
|
||||||
attrs[attr] = attrs[attr].to_s.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if attrs[attr]
|
attrs[attr] = attrs[attr].to_s.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if attrs[attr]
|
||||||
end
|
end
|
||||||
|
if SVG_ALLOW_LOCAL_HREF.include?(token[:name]) && attrs['xlink:href'] && attrs['xlink:href'] =~ /^\s*[^#\s].*/m
|
||||||
|
attrs.delete 'xlink:href'
|
||||||
|
end
|
||||||
if attrs['style']
|
if attrs['style']
|
||||||
attrs['style'] = sanitize_css(attrs['style'])
|
attrs['style'] = sanitize_css(attrs['style'])
|
||||||
end
|
end
|
||||||
|
|
217
vendor/plugins/HTML5lib/parse.rb
vendored
217
vendor/plugins/HTML5lib/parse.rb
vendored
|
@ -1,217 +0,0 @@
|
||||||
#!/usr/bin/env ruby
|
|
||||||
#
|
|
||||||
# Parse a document to a simpletree tree, with optional profiling
|
|
||||||
|
|
||||||
$:.unshift File.dirname(__FILE__),'lib'
|
|
||||||
|
|
||||||
def parse(opts, args)
|
|
||||||
encoding = nil
|
|
||||||
|
|
||||||
f = args[-1]
|
|
||||||
if f
|
|
||||||
begin
|
|
||||||
if f[0..6] == 'http://'
|
|
||||||
require 'open-uri'
|
|
||||||
f = URI.parse(f).open
|
|
||||||
encoding = f.charset
|
|
||||||
elsif f == '-'
|
|
||||||
f = $stdin
|
|
||||||
else
|
|
||||||
f = open(f)
|
|
||||||
end
|
|
||||||
rescue
|
|
||||||
end
|
|
||||||
else
|
|
||||||
$stderr.write("No filename provided. Use -h for help\n")
|
|
||||||
exit(1)
|
|
||||||
end
|
|
||||||
|
|
||||||
require 'html5/treebuilders'
|
|
||||||
treebuilder = HTML5::TreeBuilders[opts.treebuilder]
|
|
||||||
|
|
||||||
if opts.output == :xml
|
|
||||||
require 'html5/liberalxmlparser'
|
|
||||||
p = HTML5::XHTMLParser.new(:tree=>treebuilder)
|
|
||||||
else
|
|
||||||
require 'html5/html5parser'
|
|
||||||
p = HTML5::HTMLParser.new(:tree=>treebuilder)
|
|
||||||
end
|
|
||||||
|
|
||||||
if opts.parsemethod == :parse
|
|
||||||
args = [f, encoding]
|
|
||||||
else
|
|
||||||
args = [f, 'div', encoding]
|
|
||||||
end
|
|
||||||
|
|
||||||
if opts.profile
|
|
||||||
require 'profiler'
|
|
||||||
Profiler__::start_profile
|
|
||||||
p.send(opts.parsemethod, *args)
|
|
||||||
Profiler__::stop_profile
|
|
||||||
Profiler__::print_profile($stderr)
|
|
||||||
elsif opts.time
|
|
||||||
require 'time'
|
|
||||||
t0 = Time.new
|
|
||||||
document = p.send(opts.parsemethod, *args)
|
|
||||||
t1 = Time.new
|
|
||||||
printOutput(p, document, opts)
|
|
||||||
t2 = Time.new
|
|
||||||
puts "\n\nRun took: %fs (plus %fs to print the output)"%[t1-t0, t2-t1]
|
|
||||||
else
|
|
||||||
document = p.send(opts.parsemethod, *args)
|
|
||||||
printOutput(p, document, opts)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def printOutput(parser, document, opts)
|
|
||||||
puts "Encoding: #{parser.tokenizer.stream.char_encoding}" if opts.encoding
|
|
||||||
|
|
||||||
case opts.output
|
|
||||||
when :xml
|
|
||||||
print document
|
|
||||||
when :html
|
|
||||||
require 'html5/treewalkers'
|
|
||||||
tokens = HTML5::TreeWalkers[opts.treebuilder].new(document)
|
|
||||||
require 'html5/serializer'
|
|
||||||
puts HTML5::HTMLSerializer.serialize(tokens, opts.serializer)
|
|
||||||
when :hilite
|
|
||||||
print document.hilite
|
|
||||||
when :tree
|
|
||||||
document = [document] unless document.respond_to?(:each)
|
|
||||||
document.each {|fragment| puts parser.tree.testSerializer(fragment)}
|
|
||||||
end
|
|
||||||
|
|
||||||
if opts.error
|
|
||||||
errList=[]
|
|
||||||
for pos, message in parser.errors
|
|
||||||
errList << ("Line %i Col %i"%pos + " " + message)
|
|
||||||
end
|
|
||||||
$stdout.write("\nParse errors:\n" + errList.join("\n")+"\n")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
require 'ostruct'
|
|
||||||
options = OpenStruct.new
|
|
||||||
options.profile = false
|
|
||||||
options.time = false
|
|
||||||
options.output = :html
|
|
||||||
options.treebuilder = 'simpletree'
|
|
||||||
options.error = false
|
|
||||||
options.encoding = false
|
|
||||||
options.parsemethod = :parse
|
|
||||||
options.serializer = {
|
|
||||||
:encoding => 'utf-8',
|
|
||||||
:omit_optional_tags => false,
|
|
||||||
:inject_meta_charset => false
|
|
||||||
}
|
|
||||||
|
|
||||||
require 'optparse'
|
|
||||||
opts = OptionParser.new do |opts|
|
|
||||||
opts.separator ""
|
|
||||||
opts.separator "Parse Options:"
|
|
||||||
|
|
||||||
opts.on("-b", "--treebuilder NAME") do |treebuilder|
|
|
||||||
options.treebuilder = treebuilder
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("-f", "--fragment", "Parse as a fragment") do |parse|
|
|
||||||
options.parsemethod = :parseFragment
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.separator ""
|
|
||||||
opts.separator "Filter Options:"
|
|
||||||
|
|
||||||
opts.on("--[no-]inject-meta-charset", "inject <meta charset>") do |inject|
|
|
||||||
options.serializer[:inject_meta_charset] = inject
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("--[no-]strip-whitespace", "strip unnecessary whitespace") do |strip|
|
|
||||||
options.serializer[:strip_whitespace] = strip
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("--[no-]sanitize", "escape unsafe tags") do |sanitize|
|
|
||||||
options.serializer[:sanitize] = sanitize
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.separator ""
|
|
||||||
opts.separator "Output Options:"
|
|
||||||
|
|
||||||
opts.on("--tree", "output as debug tree") do |tree|
|
|
||||||
options.output = :tree
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("-x", "--xml", "output as xml") do |xml|
|
|
||||||
options.output = :xml
|
|
||||||
options.treebuilder = "rexml"
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("--[no-]html", "Output as html") do |html|
|
|
||||||
options.output = (html ? :html : nil)
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("--hilite", "Output as formatted highlighted code.") do |hilite|
|
|
||||||
options.output = :hilite
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("-e", "--error", "Print a list of parse errors") do |error|
|
|
||||||
options.error = error
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.separator ""
|
|
||||||
opts.separator "Serialization Options:"
|
|
||||||
|
|
||||||
opts.on("--[no-]omit-optional-tags", "Omit optional tags") do |omit|
|
|
||||||
options.serializer[:omit_optional_tags] = omit
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("--[no-]quote-attr-values", "Quote attribute values") do |quote|
|
|
||||||
options.serializer[:quote_attr_values] = quote
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("--[no-]use-best-quote-char", "Use best quote character") do |best|
|
|
||||||
options.serializer[:use_best_quote_char] = best
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("--quote-char C", "Use specified quote character") do |c|
|
|
||||||
options.serializer[:quote_char] = c
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("--[no-]minimize-boolean-attributes", "Minimize boolean attributes") do |min|
|
|
||||||
options.serializer[:minimize_boolean_attributes] = min
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("--[no-]use-trailing-solidus", "Use trailing solidus") do |slash|
|
|
||||||
options.serializer[:use_trailing_solidus] = slash
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("--[no-]escape-lt-in-attrs", "Escape less than signs in attribute values") do |lt|
|
|
||||||
options.serializer[:escape_lt_in_attrs] = lt
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("--[no-]escape-rcdata", "Escape rcdata element values") do |rcdata|
|
|
||||||
options.serializer[:escape_rcdata] = rcdata
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.separator ""
|
|
||||||
opts.separator "Other Options:"
|
|
||||||
|
|
||||||
opts.on("-p", "--[no-]profile", "Profile the run") do |profile|
|
|
||||||
options.profile = profile
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("-t", "--[no-]time", "Time the run") do |time|
|
|
||||||
options.time = time
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on("-c", "--[no-]encoding", "Print character encoding used") do |encoding|
|
|
||||||
options.encoding = encoding
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on_tail("-h", "--help", "Show this message") do
|
|
||||||
puts opts
|
|
||||||
exit
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.parse!(ARGV)
|
|
||||||
parse options, ARGV
|
|
|
@ -2,15 +2,14 @@ require 'test/unit'
|
||||||
|
|
||||||
HTML5_BASE = File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__))))
|
HTML5_BASE = File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__))))
|
||||||
|
|
||||||
if File.exists?(File.join(HTML5_BASE, 'testdata'))
|
if File.exists?(File.join(HTML5_BASE, 'ruby', 'testdata'))
|
||||||
TESTDATA_DIR = File.join(HTML5_BASE, 'testdata')
|
TESTDATA_DIR = File.join(HTML5_BASE, 'ruby', 'testdata')
|
||||||
else
|
else
|
||||||
TESTDATA_DIR = File.join(File.dirname(File.dirname(File.expand_path(__FILE__))), 'testdata')
|
TESTDATA_DIR = File.join(HTML5_BASE, 'testdata')
|
||||||
end
|
end
|
||||||
|
|
||||||
# $:.unshift File.join(File.dirname(File.dirname(__FILE__)), 'lib')
|
$:.unshift File.join(File.dirname(File.dirname(__FILE__)), 'lib')
|
||||||
|
$:.unshift File.dirname(__FILE__)
|
||||||
# $:.unshift File.dirname(__FILE__)
|
|
||||||
|
|
||||||
require 'core_ext/string'
|
require 'core_ext/string'
|
||||||
|
|
16
vendor/plugins/HTML5lib/test/test_cli.rb
vendored
Normal file
16
vendor/plugins/HTML5lib/test/test_cli.rb
vendored
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
require File.join(File.dirname(__FILE__), 'preamble')
|
||||||
|
require "html5/cli"
|
||||||
|
|
||||||
|
class TestCli < Test::Unit::TestCase
|
||||||
|
def test_open_input
|
||||||
|
assert_equal $stdin, HTML5::CLI.open_input('-')
|
||||||
|
assert_kind_of StringIO, HTML5::CLI.open_input('http://whatwg.org/')
|
||||||
|
assert_kind_of File, HTML5::CLI.open_input('testdata/sites/google-results.htm')
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_parse_opts
|
||||||
|
HTML5::CLI.parse_opts [] # TODO test defaults
|
||||||
|
assert_equal 'hpricot', HTML5::CLI.parse_opts(['-b', 'hpricot']).treebuilder
|
||||||
|
assert_equal 'hpricot', HTML5::CLI.parse_opts(['--treebuilder', 'hpricot']).treebuilder
|
||||||
|
end
|
||||||
|
end
|
|
@ -14,4 +14,13 @@ class TestHtml5Inputstream < Test::Unit::TestCase
|
||||||
1022.times{stream.char}
|
1022.times{stream.char}
|
||||||
assert_equal "i", stream.char
|
assert_equal "i", stream.char
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_chars_until
|
||||||
|
stream = HTML5::HTMLInputStream.new("aaaaaaab")
|
||||||
|
assert_equal "aaaaaaa", stream.chars_until("b")
|
||||||
|
|
||||||
|
stream = HTML5::HTMLInputStream.new("aaaaaaab")
|
||||||
|
assert_equal "aaaaaaab", stream.chars_until("c")
|
||||||
|
|
||||||
|
end
|
||||||
end
|
end
|
|
@ -12,11 +12,6 @@ begin
|
||||||
rescue LoadError
|
rescue LoadError
|
||||||
end
|
end
|
||||||
|
|
||||||
$CHECK_PARSER_ERRORS = ARGV.delete('-p') # TODO
|
|
||||||
|
|
||||||
puts 'Testing tree builders: ' + $tree_types_to_test * ', '
|
|
||||||
|
|
||||||
|
|
||||||
class Html5ParserTestCase < Test::Unit::TestCase
|
class Html5ParserTestCase < Test::Unit::TestCase
|
||||||
include HTML5
|
include HTML5
|
||||||
include TestSupport
|
include TestSupport
|
||||||
|
@ -25,8 +20,7 @@ class Html5ParserTestCase < Test::Unit::TestCase
|
||||||
|
|
||||||
test_name = File.basename(test_file).sub('.dat', '')
|
test_name = File.basename(test_file).sub('.dat', '')
|
||||||
|
|
||||||
TestData.new(test_file, %w(data errors document-fragment document)).
|
TestData.new(test_file, %w(data errors document-fragment document)).each_with_index do |(input, errors, inner_html, expected), index|
|
||||||
each_with_index do |(input, errors, inner_html, expected), index|
|
|
||||||
|
|
||||||
errors = errors.split("\n")
|
errors = errors.split("\n")
|
||||||
expected = expected.gsub("\n| ","\n")[2..-1]
|
expected = expected.gsub("\n| ","\n")[2..-1]
|
||||||
|
@ -35,13 +29,13 @@ class Html5ParserTestCase < Test::Unit::TestCase
|
||||||
define_method 'test_%s_%d_%s' % [ test_name, index + 1, tree_name ] do
|
define_method 'test_%s_%d_%s' % [ test_name, index + 1, tree_name ] do
|
||||||
|
|
||||||
parser = HTMLParser.new(:tree => TreeBuilders[tree_name])
|
parser = HTMLParser.new(:tree => TreeBuilders[tree_name])
|
||||||
|
|
||||||
if inner_html
|
if inner_html
|
||||||
parser.parse_fragment(input, inner_html)
|
parser.parse_fragment(input, inner_html)
|
||||||
else
|
else
|
||||||
parser.parse(input)
|
parser.parse(input)
|
||||||
end
|
end
|
||||||
|
|
||||||
actual_output = convertTreeDump(parser.tree.testSerializer(parser.tree.document))
|
actual_output = convertTreeDump(parser.tree.testSerializer(parser.tree.document))
|
||||||
|
|
||||||
assert_equal sortattrs(expected), sortattrs(actual_output), [
|
assert_equal sortattrs(expected), sortattrs(actual_output), [
|
||||||
|
@ -53,13 +47,13 @@ class Html5ParserTestCase < Test::Unit::TestCase
|
||||||
actual_errors = parser.errors.map do |(line, col), message, datavars|
|
actual_errors = parser.errors.map do |(line, col), message, datavars|
|
||||||
'Line: %i Col: %i %s' % [line, col, E[message] % datavars]
|
'Line: %i Col: %i %s' % [line, col, E[message] % datavars]
|
||||||
end
|
end
|
||||||
assert_equal errors.length, parser.errors.length, [
|
|
||||||
|
assert_equal errors, actual_errors, [
|
||||||
'', 'Input', input,
|
'', 'Input', input,
|
||||||
'', "Expected errors (#{errors.length}):", errors.join("\n"),
|
'', "Expected errors (#{errors.length}):", errors.join("\n"),
|
||||||
'', "Actual errors (#{actual_errors.length}):",
|
'', "Actual errors (#{actual_errors.length}):",
|
||||||
actual_errors.join("\n")
|
actual_errors.join("\n") + "\n"
|
||||||
].join("\n")
|
].join("\n")
|
||||||
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
|
@ -110,6 +110,37 @@ class SanitizeTest < Test::Unit::TestCase
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
HTMLSanitizer::SVG_ALLOW_LOCAL_HREF.each do |tag_name|
|
||||||
|
next unless HTMLSanitizer::ALLOWED_ELEMENTS.include?(tag_name)
|
||||||
|
define_method "test_#{tag_name}_should_allow_local_href" do
|
||||||
|
input = %(<#{tag_name} xlink:href="#foo"/>)
|
||||||
|
output = "<#{tag_name.downcase} xlink:href='#foo'/>"
|
||||||
|
xhtmloutput = "<#{tag_name} xlink:href='#foo'></#{tag_name}>"
|
||||||
|
check_sanitization(input, output, xhtmloutput, xhtmloutput)
|
||||||
|
end
|
||||||
|
|
||||||
|
define_method "test_#{tag_name}_should_allow_local_href_with_newline" do
|
||||||
|
input = %(<#{tag_name} xlink:href="\n#foo"/>)
|
||||||
|
output = "<#{tag_name.downcase} xlink:href='\n#foo'/>"
|
||||||
|
xhtmloutput = "<#{tag_name} xlink:href='\n#foo'></#{tag_name}>"
|
||||||
|
check_sanitization(input, output, xhtmloutput, xhtmloutput)
|
||||||
|
end
|
||||||
|
|
||||||
|
define_method "test_#{tag_name}_should_forbid_nonlocal_href" do
|
||||||
|
input = %(<#{tag_name} xlink:href="http://bad.com/foo"/>)
|
||||||
|
output = "<#{tag_name.downcase}/>"
|
||||||
|
xhtmloutput = "<#{tag_name}></#{tag_name}>"
|
||||||
|
check_sanitization(input, output, xhtmloutput, xhtmloutput)
|
||||||
|
end
|
||||||
|
|
||||||
|
define_method "test_#{tag_name}_should_forbid_nonlocal_href_with_newline" do
|
||||||
|
input = %(<#{tag_name} xlink:href="\nhttp://bad.com/foo"/>)
|
||||||
|
output = "<#{tag_name.downcase}/>"
|
||||||
|
xhtmloutput = "<#{tag_name}></#{tag_name}>"
|
||||||
|
check_sanitization(input, output, xhtmloutput, xhtmloutput)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
def test_should_handle_astral_plane_characters
|
def test_should_handle_astral_plane_characters
|
||||||
input = "<p>𝒵 𝔸</p>"
|
input = "<p>𝒵 𝔸</p>"
|
||||||
output = "<p>\360\235\222\265 \360\235\224\270</p>"
|
output = "<p>\360\235\222\265 \360\235\224\270</p>"
|
|
@ -42,9 +42,10 @@ class HTMLInputStreamTest < Test::Unit::TestCase
|
||||||
require 'iconv'
|
require 'iconv'
|
||||||
|
|
||||||
def test_utf_16
|
def test_utf_16
|
||||||
stream = HTMLInputStream.new("\xff\xfe" + " \x00"*1025)
|
input = Iconv.new('utf-16', 'utf-8').iconv(' '*1025)
|
||||||
assert(stream.char_encoding, 'utf-16-le')
|
stream = HTMLInputStream.new(input)
|
||||||
assert_equal(1025, stream.chars_until(' ',true).length)
|
assert('utf-16-le', stream.char_encoding)
|
||||||
|
assert_equal(1025, stream.chars_until(' ', true).length)
|
||||||
end
|
end
|
||||||
rescue LoadError
|
rescue LoadError
|
||||||
puts "iconv not found, skipping iconv tests"
|
puts "iconv not found, skipping iconv tests"
|
|
@ -433,9 +433,9 @@
|
||||||
{
|
{
|
||||||
"name": "uri_ref_with_space_in svg_attribute",
|
"name": "uri_ref_with_space_in svg_attribute",
|
||||||
"input": "<rect fill='url(\n#foo)' />",
|
"input": "<rect fill='url(\n#foo)' />",
|
||||||
"rexml": "<rect fill=\'url(\n#foo)\'></rect>",
|
"rexml": "<rect fill='url(\n#foo)'></rect>",
|
||||||
"xhtml": "<rect fill=\'url(\n#foo)\'></rect>",
|
"xhtml": "<rect fill='url(\n#foo)'></rect>",
|
||||||
"output": "<rect fill=\'url(\n#foo)\'/>"
|
"output": "<rect fill='url(\n#foo)'/>"
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
|
|
|
@ -41,7 +41,7 @@ plaintext
|
||||||
#data
|
#data
|
||||||
setting html's innerHTML
|
setting html's innerHTML
|
||||||
#errors
|
#errors
|
||||||
Line: 1 Col: 24 XXX innerHTML EOF
|
Line: 1 Col: 24 Unexpected EOF in inner html mode.
|
||||||
#document-fragment
|
#document-fragment
|
||||||
html
|
html
|
||||||
#document
|
#document
|
||||||
|
|
|
@ -608,4 +608,25 @@ Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
|
||||||
| <html>
|
| <html>
|
||||||
| <head>
|
| <head>
|
||||||
| <body>
|
| <body>
|
||||||
| <!-- foo -->
|
| <!-- foo -->
|
||||||
|
|
||||||
|
#data
|
||||||
|
<body></body></html>
|
||||||
|
#document-fragment
|
||||||
|
html
|
||||||
|
#errors
|
||||||
|
Line: 1 Col: 20 Unexpected html end tag in inner html mode.
|
||||||
|
Line: 1 Col: 20 Unexpected EOF in inner html mode.
|
||||||
|
#document
|
||||||
|
| <head>
|
||||||
|
| <body>
|
||||||
|
|
||||||
|
#data
|
||||||
|
<html><frameset></frameset></html>
|
||||||
|
#errors
|
||||||
|
Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
|
||||||
|
#document
|
||||||
|
| <html>
|
||||||
|
| <head>
|
||||||
|
| <frameset>
|
||||||
|
| " "
|
||||||
|
|
Loading…
Reference in a new issue