Sync with latest HTML5lib
This commit is contained in:
parent
bf572e295f
commit
8e92e4a3ab
41 changed files with 1334 additions and 564 deletions
115
vendor/plugins/HTML5lib/parse.rb
vendored
115
vendor/plugins/HTML5lib/parse.rb
vendored
|
@ -5,12 +5,20 @@
|
|||
$:.unshift File.dirname(__FILE__),'lib'
|
||||
|
||||
def parse(opts, args)
|
||||
encoding = nil
|
||||
|
||||
f = args[-1]
|
||||
if f
|
||||
begin
|
||||
require 'open-uri' if f[0..6] == 'http://'
|
||||
f = open(f)
|
||||
if f[0..6] == 'http://'
|
||||
require 'open-uri'
|
||||
f = URI.parse(f).open
|
||||
encoding = f.charset
|
||||
elsif f == '-'
|
||||
f = $stdin
|
||||
else
|
||||
f = open(f)
|
||||
end
|
||||
rescue
|
||||
end
|
||||
else
|
||||
|
@ -29,22 +37,28 @@ def parse(opts, args)
|
|||
p = HTML5lib::HTMLParser.new(:tree=>treebuilder)
|
||||
end
|
||||
|
||||
if opts.parsemethod == :parse
|
||||
args = [f, encoding]
|
||||
else
|
||||
args = [f, 'div', encoding]
|
||||
end
|
||||
|
||||
if opts.profile
|
||||
require 'profiler'
|
||||
Profiler__::start_profile
|
||||
p.send(opts.parsemethod,f)
|
||||
p.send(opts.parsemethod, *args)
|
||||
Profiler__::stop_profile
|
||||
Profiler__::print_profile($stderr)
|
||||
elsif opts.time
|
||||
require 'time'
|
||||
t0 = Time.new
|
||||
document = p.send(opts.parsemethod,f)
|
||||
document = p.send(opts.parsemethod, *args)
|
||||
t1 = Time.new
|
||||
printOutput(p, document, opts)
|
||||
t2 = Time.new
|
||||
puts "\n\nRun took: %fs (plus %fs to print the output)"%[t1-t0, t2-t1]
|
||||
else
|
||||
document = p.send(opts.parsemethod,f)
|
||||
document = p.send(opts.parsemethod, *args)
|
||||
printOutput(p, document, opts)
|
||||
end
|
||||
end
|
||||
|
@ -59,7 +73,7 @@ def printOutput(parser, document, opts)
|
|||
require 'html5lib/treewalkers'
|
||||
tokens = HTML5lib::TreeWalkers[opts.treebuilder].new(document)
|
||||
require 'html5lib/serializer'
|
||||
print HTML5lib::HTMLSerializer.serialize(tokens, opts.serializer)
|
||||
puts HTML5lib::HTMLSerializer.serialize(tokens, opts.serializer)
|
||||
when :hilite
|
||||
print document.hilite
|
||||
when :tree
|
||||
|
@ -93,26 +107,35 @@ options.serializer = {
|
|||
|
||||
require 'optparse'
|
||||
opts = OptionParser.new do |opts|
|
||||
opts.on("-p", "--[no-]profile", "Profile the run") do |profile|
|
||||
options.profile = profile
|
||||
end
|
||||
|
||||
opts.on("-t", "--[no-]time", "Time the run") do |time|
|
||||
options.time = time
|
||||
end
|
||||
|
||||
opts.separator ""
|
||||
opts.separator "Parse Options:"
|
||||
|
||||
opts.on("-b", "--treebuilder NAME") do |treebuilder|
|
||||
options.treebuilder = treebuilder
|
||||
end
|
||||
|
||||
opts.on("-e", "--error", "Print a list of parse errors") do |error|
|
||||
options.error = error
|
||||
end
|
||||
|
||||
opts.on("-f", "--fragment", "Parse as a fragment") do |parse|
|
||||
options.parsemethod = :parseFragment
|
||||
end
|
||||
|
||||
opts.separator ""
|
||||
opts.separator "Filter Options:"
|
||||
|
||||
opts.on("--[no-]inject-meta-charset", "inject <meta charset>") do |inject|
|
||||
options.serializer[:inject_meta_charset] = inject
|
||||
end
|
||||
|
||||
opts.on("--[no-]strip-whitespace", "strip unnecessary whitespace") do |strip|
|
||||
options.serializer[:strip_whitespace] = strip
|
||||
end
|
||||
|
||||
opts.on("--[no-]sanitize", "escape unsafe tags") do |sanitize|
|
||||
options.serializer[:sanitize] = sanitize
|
||||
end
|
||||
|
||||
opts.separator ""
|
||||
opts.separator "Output Options:"
|
||||
|
||||
opts.on("--tree", "output as debug tree") do |tree|
|
||||
options.output = :tree
|
||||
end
|
||||
|
@ -130,26 +153,56 @@ opts = OptionParser.new do |opts|
|
|||
options.output = :hilite
|
||||
end
|
||||
|
||||
opts.on("-c", "--[no-]encoding", "Print character encoding used") do |encoding|
|
||||
options.encoding = encoding
|
||||
opts.on("-e", "--error", "Print a list of parse errors") do |error|
|
||||
options.error = error
|
||||
end
|
||||
|
||||
opts.on("--[no-]inject-meta-charset", "inject <meta charset>") do |inject|
|
||||
options.serializer[:inject_meta_charset] = inject
|
||||
end
|
||||
|
||||
opts.on("--[no-]strip-whitespace", "strip unnecessary whitespace") do |strip|
|
||||
options.serializer[:strip_whitespace] = strip
|
||||
end
|
||||
|
||||
opts.on("--[no-]sanitize", "escape unsafe tags") do |sanitize|
|
||||
options.serializer[:sanitize] = sanitize
|
||||
end
|
||||
opts.separator ""
|
||||
opts.separator "Serialization Options:"
|
||||
|
||||
opts.on("--[no-]omit-optional-tags", "Omit optional tags") do |omit|
|
||||
options.serializer[:omit_optional_tags] = omit
|
||||
end
|
||||
|
||||
opts.on("--[no-]quote-attr-values", "Quote attribute values") do |quote|
|
||||
options.serializer[:quote_attr_values] = quote
|
||||
end
|
||||
|
||||
opts.on("--[no-]use-best-quote-char", "Use best quote character") do |best|
|
||||
options.serializer[:use_best_quote_char] = best
|
||||
end
|
||||
|
||||
opts.on("--quote-char C", "Use specified quote character") do |c|
|
||||
options.serializer[:quote_char] = c
|
||||
end
|
||||
|
||||
opts.on("--[no-]minimize-boolean-attributes", "Minimize boolean attributes") do |min|
|
||||
options.serializer[:minimize_boolean_attributes] = min
|
||||
end
|
||||
|
||||
opts.on("--[no-]use-trailing-solidus", "Use trailing solidus") do |slash|
|
||||
options.serializer[:use_trailing_solidus] = slash
|
||||
end
|
||||
|
||||
opts.on("--[no-]escape-lt-in-attrs", "Escape less than signs in attribute values") do |lt|
|
||||
options.serializer[:escape_lt_in_attrs] = lt
|
||||
end
|
||||
|
||||
opts.separator ""
|
||||
opts.separator "Other Options:"
|
||||
|
||||
opts.on("-p", "--[no-]profile", "Profile the run") do |profile|
|
||||
options.profile = profile
|
||||
end
|
||||
|
||||
opts.on("-t", "--[no-]time", "Time the run") do |time|
|
||||
options.time = time
|
||||
end
|
||||
|
||||
opts.on("-c", "--[no-]encoding", "Print character encoding used") do |encoding|
|
||||
options.encoding = encoding
|
||||
end
|
||||
|
||||
opts.on_tail("-h", "--help", "Show this message") do
|
||||
puts opts
|
||||
exit
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue