Synced with trunk.
This commit is contained in:
commit
3070d6eeae
91 changed files with 44709 additions and 3658 deletions
|
@ -1,7 +1,7 @@
|
|||
# Controller responsible for serving files and pictures.
|
||||
|
||||
require 'zip/zip'
|
||||
require 'string_utils'
|
||||
require 'sanitize'
|
||||
|
||||
class FileController < ApplicationController
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
require 'fileutils'
|
||||
require 'redcloth_for_tex'
|
||||
#require 'redcloth_for_tex'
|
||||
require 'maruku'
|
||||
require 'parsedate'
|
||||
require 'zip/zip'
|
||||
require 'sanitize'
|
||||
require 'string_utils'
|
||||
|
||||
class WikiController < ApplicationController
|
||||
|
||||
|
@ -11,7 +11,7 @@ class WikiController < ApplicationController
|
|||
caches_action :show, :published, :authors, :tex, :s5, :print, :recently_revised, :list, :atom_with_content, :atom_with_headlines
|
||||
cache_sweeper :revision_sweeper
|
||||
|
||||
layout 'default', :except => [:atom_with_content, :atom_with_headlines, :atom, :tex, :pdf, :s5, :export_tex, :export_html]
|
||||
layout 'default', :except => [:atom_with_content, :atom_with_headlines, :atom, :tex, :s5, :export_html]
|
||||
|
||||
include Sanitize
|
||||
|
||||
|
@ -95,21 +95,21 @@ class WikiController < ApplicationController
|
|||
export_pages_as_zip(@web.markup) { |page| page.content }
|
||||
end
|
||||
|
||||
def export_pdf
|
||||
file_name = "#{@web.address}-tex-#{@web.revised_at.strftime('%Y-%m-%d-%H-%M-%S')}"
|
||||
file_path = File.join(@wiki.storage_path, file_name)
|
||||
# def export_pdf
|
||||
# file_name = "#{@web.address}-tex-#{@web.revised_at.strftime('%Y-%m-%d-%H-%M-%S')}"
|
||||
# file_path = File.join(@wiki.storage_path, file_name)
|
||||
#
|
||||
# export_web_to_tex "#{file_path}.tex" unless FileTest.exists? "#{file_path}.tex"
|
||||
# convert_tex_to_pdf "#{file_path}.tex"
|
||||
# send_file "#{file_path}.pdf"
|
||||
# end
|
||||
|
||||
export_web_to_tex "#{file_path}.tex" unless FileTest.exists? "#{file_path}.tex"
|
||||
convert_tex_to_pdf "#{file_path}.tex"
|
||||
send_file "#{file_path}.pdf"
|
||||
end
|
||||
|
||||
def export_tex
|
||||
file_name = "#{@web.address}-tex-#{@web.revised_at.strftime('%Y-%m-%d-%H-%M-%S')}.tex"
|
||||
file_path = File.join(@wiki.storage_path, file_name)
|
||||
export_web_to_tex(file_path) unless FileTest.exists?(file_path)
|
||||
send_file file_path
|
||||
end
|
||||
# def export_tex
|
||||
# file_name = "#{@web.address}-tex-#{@web.revised_at.strftime('%Y-%m-%d-%H-%M-%S')}.tex"
|
||||
# file_path = File.join(@wiki.storage_path, file_name)
|
||||
# export_web_to_tex(file_path) unless FileTest.exists?(file_path)
|
||||
# send_file file_path
|
||||
# end
|
||||
|
||||
def feeds
|
||||
@rss_with_content_allowed = rss_with_content_allowed?
|
||||
|
@ -180,17 +180,17 @@ class WikiController < ApplicationController
|
|||
# to template
|
||||
end
|
||||
|
||||
def pdf
|
||||
page = wiki.read_page(@web_name, @page_name)
|
||||
safe_page_name = @page.name.gsub(/\W/, '')
|
||||
file_name = "#{safe_page_name}-#{@web.address}-#{@page.revised_at.strftime('%Y-%m-%d-%H-%M-%S')}"
|
||||
file_path = File.join(@wiki.storage_path, file_name)
|
||||
|
||||
export_page_to_tex("#{file_path}.tex") unless FileTest.exists?("#{file_path}.tex")
|
||||
# NB: this is _very_ slow
|
||||
convert_tex_to_pdf("#{file_path}.tex")
|
||||
send_file "#{file_path}.pdf"
|
||||
end
|
||||
# def pdf
|
||||
# page = wiki.read_page(@web_name, @page_name)
|
||||
# safe_page_name = @page.name.gsub(/\W/, '')
|
||||
# file_name = "#{safe_page_name}-#{@web.address}-#{@page.revised_at.strftime('%Y-%m-%d-%H-%M-%S')}"
|
||||
# file_path = File.join(@wiki.storage_path, file_name)
|
||||
#
|
||||
# export_page_to_tex("#{file_path}.tex") unless FileTest.exists?("#{file_path}.tex")
|
||||
# # NB: this is _very_ slow
|
||||
# convert_tex_to_pdf("#{file_path}.tex")
|
||||
# send_file "#{file_path}.pdf"
|
||||
# end
|
||||
|
||||
def print
|
||||
if @page.nil?
|
||||
|
@ -285,10 +285,10 @@ class WikiController < ApplicationController
|
|||
end
|
||||
|
||||
def tex
|
||||
if @web.markup == :markdownMML
|
||||
if @web.markup == :markdownMML or @web.markup == :markdown
|
||||
@tex_content = Maruku.new(@page.content).to_latex
|
||||
else
|
||||
@tex_content = RedClothForTex.new(@page.content).to_tex
|
||||
@tex_content = 'TeX export only supported with the Markdown text filters.'
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -315,23 +315,23 @@ class WikiController < ApplicationController
|
|||
|
||||
private
|
||||
|
||||
def convert_tex_to_pdf(tex_path)
|
||||
# TODO remove earlier PDF files with the same prefix
|
||||
# TODO handle gracefully situation where pdflatex is not available
|
||||
begin
|
||||
wd = Dir.getwd
|
||||
Dir.chdir(File.dirname(tex_path))
|
||||
logger.info `pdflatex --interaction=nonstopmode #{File.basename(tex_path)}`
|
||||
ensure
|
||||
Dir.chdir(wd)
|
||||
end
|
||||
end
|
||||
# def convert_tex_to_pdf(tex_path)
|
||||
# # TODO remove earlier PDF files with the same prefix
|
||||
# # TODO handle gracefully situation where pdflatex is not available
|
||||
# begin
|
||||
# wd = Dir.getwd
|
||||
# Dir.chdir(File.dirname(tex_path))
|
||||
# logger.info `pdflatex --interaction=nonstopmode #{File.basename(tex_path)}`
|
||||
# ensure
|
||||
# Dir.chdir(wd)
|
||||
# end
|
||||
# end
|
||||
|
||||
def export_page_to_tex(file_path)
|
||||
if @web.markup == :markdownMML
|
||||
@tex_content = Maruku.new(@page.content).to_latex
|
||||
else
|
||||
@tex_content = RedClothForTex.new(@page.content).to_tex
|
||||
@tex_content = 'TeX export only supported with the Markdown text filters.'
|
||||
end
|
||||
File.open(file_path, 'w') { |f| f.write(render_to_string(:template => 'wiki/tex', :layout => 'tex')) }
|
||||
end
|
||||
|
@ -360,15 +360,15 @@ class WikiController < ApplicationController
|
|||
send_file file_path
|
||||
end
|
||||
|
||||
def export_web_to_tex(file_path)
|
||||
# def export_web_to_tex(file_path)
|
||||
# if @web.markup == :markdownMML
|
||||
# @tex_content = Maruku.new(@page.content).to_latex
|
||||
# else
|
||||
# @tex_content = RedClothForTex.new(@page.content).to_tex
|
||||
# @tex_content = 'TeX export only supported with the Markdown text filters.'
|
||||
# end
|
||||
@tex_content = table_of_contents(@web.page('HomePage').content, render_tex_web)
|
||||
File.open(file_path, 'w') { |f| f.write(render_to_string(:template => 'wiki/tex_web', :layout => tex)) }
|
||||
end
|
||||
# @tex_content = table_of_contents(@web.page('HomePage').content, render_tex_web)
|
||||
# File.open(file_path, 'w') { |f| f.write(render_to_string(:template => 'wiki/tex_web', :layout => tex)) }
|
||||
# end
|
||||
|
||||
def get_page_and_revision
|
||||
if params['rev']
|
||||
|
@ -411,7 +411,7 @@ class WikiController < ApplicationController
|
|||
if @web.markup == :markdownMML
|
||||
tex_web[page.name] = Maruku.new(page.content).to_latex
|
||||
else
|
||||
tex_web[page.name] = RedClothForTex.new(page.content).to_tex
|
||||
tex_web[page.name] = 'TeX export only supported with the Markdown text filters.'
|
||||
end
|
||||
tex_web
|
||||
end
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
{ 'id' => 'editForm', 'method' => 'post', 'onsubmit' => 'cleanAuthorName()',
|
||||
'accept-charset' => 'utf-8' }) do %>
|
||||
<div>
|
||||
<textarea name="content" id="content" rows="24" cols="60"><%= h(@flash[:content] || @page.content.delete("\x01-\x08\x0B\x0C\x0E-\x1F")) %></textarea>
|
||||
<textarea name="content" id="content" rows="24" cols="60"><%= h(flash[:content] || @page.content.delete("\x01-\x08\x0B\x0C\x0E-\x1F")) %></textarea>
|
||||
<div id="editFormButtons">
|
||||
<input type="submit" value="Submit" accesskey="s"/> as
|
||||
<%= text_field_tag :author, h(@author.delete("\x01-\x08\x0B\x0C\x0E-\x1F")),
|
||||
|
|
|
@ -5,8 +5,4 @@
|
|||
<ul id="feedsList">
|
||||
<li><%= link_to 'HTML', :web => @web.address, :action => 'export_html' %></li>
|
||||
<li><%= link_to "Markup (#{@web.markup.to_s.capitalize})", :web => @web.address, :action => 'export_markup' %></li>
|
||||
<% if OPTIONS[:pdflatex] and @web.markup == :textile || @web.markup == :markdownMML %>
|
||||
<li><%= link_to 'TeX', :web => @web.address, :action => 'export_tex' %></li>
|
||||
<li><%= link_to 'PDF', :web => @web.address, :action => 'export_pdf' %></li>
|
||||
<% end %>
|
||||
</ul>
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
<% form_tag({ :action => 'save', :web => @web.address, :id => @page_name },
|
||||
{ 'id' => 'editForm', 'method' => 'post', 'onsubmit' => 'cleanAuthorName();', 'accept-charset' => 'utf-8' }) do %>
|
||||
|
||||
<textarea name="content" id="content" rows="24" cols="60"><%= h(@flash[:content] || '') %></textarea>
|
||||
<textarea name="content" id="content" rows="24" cols="60"><%= h(flash[:content] || '') %></textarea>
|
||||
<div id="editFormButtons">
|
||||
<input type="submit" value="Submit" accesskey="s"/> as
|
||||
<%= text_field_tag :author, @author,
|
||||
|
|
|
@ -35,15 +35,10 @@
|
|||
<%= link_to('Print',
|
||||
{ :web => @web.address, :action => 'print', :id => @page.name },
|
||||
{ :accesskey => 'p', :id => 'view_print' }) %>
|
||||
<% if defined? RedClothForTex and RedClothForTex.available? and @web.markup == :textile or @web.markup == :markdownMML %>
|
||||
<% if @web.markup == :markdownMML or @web.markup == :markdown %>
|
||||
|
|
||||
<%= link_to 'TeX', {:web => @web.address, :action => 'tex', :id => @page.name},
|
||||
{:id => 'view_tex'} %>
|
||||
<% if OPTIONS[:pdflatex] %>
|
||||
|
|
||||
<%= link_to 'PDF', {:web => @web.address, :action => 'pdf', :id => @page.name},
|
||||
{:id => 'view_pdf'} %>
|
||||
<% end %>
|
||||
<% if WikiReference.pages_in_category(@web, 'S5-slideshow').map.include?(@page.name) %>
|
||||
|
|
||||
<%= link_to 'S5', {:web => @web.address, :action => 's5', :id => @page.name},
|
||||
|
|
|
@ -2,11 +2,17 @@
|
|||
|
||||
\usepackage{amsmath}
|
||||
\usepackage{amsfonts}
|
||||
\usepackage{amssymb}
|
||||
\usepackage{graphicx}
|
||||
\usepackage{ucs}
|
||||
\usepackage[utf8x]{inputenc}
|
||||
\usepackage{hyperref}
|
||||
|
||||
%----Macros----------
|
||||
\newcommand{\gt}{>}
|
||||
\newcommand{\lt}{<}
|
||||
\newcommand{\qed}{\blacksquare}
|
||||
|
||||
%-------------------------------------------------------------------
|
||||
|
||||
\begin{document}
|
||||
|
|
|
@ -24,10 +24,10 @@ module Engines
|
|||
end
|
||||
|
||||
class Textile < AbstractEngine
|
||||
require_dependency 'sanitize'
|
||||
require 'sanitize'
|
||||
include Sanitize
|
||||
def mask
|
||||
require_dependency 'redcloth'
|
||||
require 'redcloth'
|
||||
redcloth = RedCloth.new(@content, [:hard_breaks] + @content.options[:engine_opts])
|
||||
redcloth.filter_html = false
|
||||
redcloth.no_span_caps = false
|
||||
|
@ -37,33 +37,34 @@ module Engines
|
|||
end
|
||||
|
||||
class Markdown < AbstractEngine
|
||||
require_dependency 'sanitize'
|
||||
require 'sanitize'
|
||||
include Sanitize
|
||||
def mask
|
||||
require_dependency 'maruku'
|
||||
require_dependency 'maruku/ext/math'
|
||||
html = Maruku.new(@content.delete("\r\x01-\x08\x0B\x0C\x0E-\x1F"), {:math_enabled => false}).to_html
|
||||
sanitize_xhtml(html.to_ncr)
|
||||
require 'maruku'
|
||||
require 'maruku/ext/math'
|
||||
html = sanitize_rexml(Maruku.new(@content.delete("\r\x01-\x08\x0B\x0C\x0E-\x1F"),
|
||||
{:math_enabled => false}).to_html_tree)
|
||||
html.gsub(/\A<div class="maruku_wrapper_div">\n?(.*?)\n?<\/div>\Z/m, '\1')
|
||||
end
|
||||
end
|
||||
|
||||
class MarkdownMML < AbstractEngine
|
||||
require_dependency 'sanitize'
|
||||
require 'sanitize'
|
||||
include Sanitize
|
||||
def mask
|
||||
require_dependency 'maruku'
|
||||
require_dependency 'maruku/ext/math'
|
||||
html = Maruku.new(@content.delete("\r\x01-\x08\x0B\x0C\x0E-\x1F"),
|
||||
{:math_enabled => true, :math_numbered => ['\\[','\\begin{equation}']}).to_html
|
||||
sanitize_xhtml(html.to_ncr)
|
||||
require 'maruku'
|
||||
require 'maruku/ext/math'
|
||||
html = sanitize_rexml(Maruku.new(@content.delete("\r\x01-\x08\x0B\x0C\x0E-\x1F"),
|
||||
{:math_enabled => true, :math_numbered => ['\\[','\\begin{equation}']}).to_html_tree)
|
||||
html.gsub(/\A<div class="maruku_wrapper_div">\n?(.*?)\n?<\/div>\Z/m, '\1')
|
||||
end
|
||||
end
|
||||
|
||||
class Mixed < AbstractEngine
|
||||
require_dependency 'sanitize'
|
||||
require 'sanitize'
|
||||
include Sanitize
|
||||
def mask
|
||||
require_dependency 'redcloth'
|
||||
require 'redcloth'
|
||||
redcloth = RedCloth.new(@content, @content.options[:engine_opts])
|
||||
redcloth.filter_html = false
|
||||
redcloth.no_span_caps = false
|
||||
|
@ -73,7 +74,7 @@ module Engines
|
|||
end
|
||||
|
||||
class RDoc < AbstractEngine
|
||||
require_dependency 'sanitize'
|
||||
require 'sanitize'
|
||||
include Sanitize
|
||||
def mask
|
||||
require_dependency 'rdocsupport'
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
require 'xhtmldiff'
|
||||
|
||||
# Temporary class containing all rendering stuff from a Revision
|
||||
# I want to shift all rendering loguc to the controller eventually
|
||||
|
||||
|
@ -40,10 +41,12 @@ class PageRenderer
|
|||
previous_revision = @revision.page.previous_revision(@revision)
|
||||
if previous_revision
|
||||
|
||||
previous_content = "<div>\n" + WikiContent.new(previous_revision, @@url_generator).render!.to_s + "\n</div>"
|
||||
current_content = "<div>\n" + display_content.to_s + "\n</div>"
|
||||
previous_content = "<div>" + WikiContent.new(previous_revision, @@url_generator).render!.to_s + "</div>"
|
||||
current_content = "<div>" + display_content.to_s + "</div>"
|
||||
diff_doc = REXML::Document.new
|
||||
diff_doc << (div = REXML::Element.new 'div')
|
||||
div = REXML::Element.new('div', nil, {:respect_whitespace =>:all})
|
||||
div.attributes['class'] = 'xhtmldiff_wrapper'
|
||||
diff_doc << div
|
||||
hd = XHTMLDiff.new(div)
|
||||
|
||||
parsed_previous_revision = REXML::HashableElementDelegator.new(
|
||||
|
@ -54,7 +57,7 @@ class PageRenderer
|
|||
|
||||
diffs = ''
|
||||
diff_doc.write(diffs, -1, true, true)
|
||||
diffs
|
||||
diffs.gsub(/\A<div class='xhtmldiff_wrapper'>(.*)<\/div>\Z/m, '\1')
|
||||
else
|
||||
display_content
|
||||
end
|
||||
|
|
|
@ -1,736 +0,0 @@
|
|||
# This is RedCloth (http://www.whytheluckystiff.net/ruby/redcloth/)
|
||||
# converted by David Heinemeier Hansson to emit Tex
|
||||
|
||||
class String
|
||||
# Flexible HTML escaping
|
||||
def texesc!( mode )
|
||||
gsub!( '&', '\\\\&' )
|
||||
gsub!( '%', '\%' )
|
||||
gsub!( '$', '\$' )
|
||||
gsub!( '~', '$\sim$' )
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def table_of_contents(text, pages)
|
||||
text.gsub( /^([#*]+? .*?)$(?![^#*])/m ) do |match|
|
||||
lines = match.split( /\n/ )
|
||||
last_line = -1
|
||||
depth = []
|
||||
lines.each_with_index do |line, line_id|
|
||||
if line =~ /^([#*]+) (.*)$/m
|
||||
tl,content = $~[1..2]
|
||||
content.gsub! /[\[\]]/, ""
|
||||
content.strip!
|
||||
|
||||
if depth.last
|
||||
if depth.last.length > tl.length
|
||||
(depth.length - 1).downto(0) do |i|
|
||||
break if depth[i].length == tl.length
|
||||
lines[line_id - 1] << "" # "\n\t\\end{#{ lT( depth[i] ) }}\n\t"
|
||||
depth.pop
|
||||
end
|
||||
end
|
||||
if !depth.last.nil? && !tl.length.nil? && depth.last.length == tl.length
|
||||
lines[line_id - 1] << ''
|
||||
end
|
||||
end
|
||||
|
||||
depth << tl unless depth.last == tl
|
||||
|
||||
subsection_depth = [depth.length - 1, 2].min
|
||||
|
||||
lines[line_id] = "\n\\#{ "sub" * subsection_depth }section{#{ content }}"
|
||||
lines[line_id] += "\n#{pages[content]}" if pages.keys.include?(content)
|
||||
|
||||
lines[line_id] = "\\pagebreak\n#{lines[line_id]}" if subsection_depth == 0
|
||||
|
||||
last_line = line_id
|
||||
|
||||
elsif line =~ /^\s+\S/
|
||||
last_line = line_id
|
||||
elsif line_id - last_line < 2 and line =~ /^\S/
|
||||
last_line = line_id
|
||||
end
|
||||
if line_id - last_line > 1 or line_id == lines.length - 1
|
||||
depth.delete_if do |v|
|
||||
lines[last_line] << "" # "\n\t\\end{#{ lT( v ) }}"
|
||||
end
|
||||
end
|
||||
end
|
||||
lines.join( "\n" )
|
||||
end
|
||||
end
|
||||
|
||||
class RedClothForTex < String
|
||||
|
||||
VERSION = '2.0.7'
|
||||
|
||||
#
|
||||
# Mapping of 8-bit ASCII codes to HTML numerical entity equivalents.
|
||||
# (from PyTextile)
|
||||
#
|
||||
TEXTILE_TAGS =
|
||||
|
||||
[[128, 8364], [129, 0], [130, 8218], [131, 402], [132, 8222], [133, 8230],
|
||||
[134, 8224], [135, 8225], [136, 710], [137, 8240], [138, 352], [139, 8249],
|
||||
[140, 338], [141, 0], [142, 0], [143, 0], [144, 0], [145, 8216], [146, 8217],
|
||||
[147, 8220], [148, 8221], [149, 8226], [150, 8211], [151, 8212], [152, 732],
|
||||
[153, 8482], [154, 353], [155, 8250], [156, 339], [157, 0], [158, 0], [159, 376]].
|
||||
|
||||
collect! do |a, b|
|
||||
[a.chr, ( b.zero? and "" or "&#{ b };" )]
|
||||
end
|
||||
|
||||
#
|
||||
# Regular expressions to convert to HTML.
|
||||
#
|
||||
A_HLGN = /(?:(?:<>|<|>|\=|[()]+)+)/
|
||||
A_VLGN = /[\-^~]/
|
||||
C_CLAS = '(?:\([^)]+\))'
|
||||
C_LNGE = '(?:\[[^\]]+\])'
|
||||
C_STYL = '(?:\{[^}]+\})'
|
||||
S_CSPN = '(?:\\\\\d+)'
|
||||
S_RSPN = '(?:/\d+)'
|
||||
A = "(?:#{A_HLGN}?#{A_VLGN}?|#{A_VLGN}?#{A_HLGN}?)"
|
||||
S = "(?:#{S_CSPN}?#{S_RSPN}|#{S_RSPN}?#{S_CSPN}?)"
|
||||
C = "(?:#{C_CLAS}?#{C_STYL}?#{C_LNGE}?|#{C_STYL}?#{C_LNGE}?#{C_CLAS}?|#{C_LNGE}?#{C_STYL}?#{C_CLAS}?)"
|
||||
# PUNCT = Regexp::quote( '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~' )
|
||||
PUNCT = Regexp::quote( '!"#$%&\'*+,-./:;=?@\\^_`|~' )
|
||||
HYPERLINK = '(\S+?)([^\w\s/;=\?]*?)(\s|$)'
|
||||
|
||||
GLYPHS = [
|
||||
# [ /([^\s\[{(>])?\'([dmst]\b|ll\b|ve\b|\s|:|$)/, '\1’\2' ], # single closing
|
||||
[ /([^\s\[{(>])\'/, '\1’' ], # single closing
|
||||
[ /\'(?=\s|s\b|[#{PUNCT}])/, '’' ], # single closing
|
||||
[ /\'/, '‘' ], # single opening
|
||||
# [ /([^\s\[{(])?"(\s|:|$)/, '\1”\2' ], # double closing
|
||||
[ /([^\s\[{(>])"/, '\1”' ], # double closing
|
||||
[ /"(?=\s|[#{PUNCT}])/, '”' ], # double closing
|
||||
[ /"/, '“' ], # double opening
|
||||
[ /\b( )?\.{3}/, '\1…' ], # ellipsis
|
||||
[ /\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])/, '<acronym title="\2">\1</acronym>' ], # 3+ uppercase acronym
|
||||
[ /(^|[^"][>\s])([A-Z][A-Z0-9 ]{2,})([^<a-z0-9]|$)/, '\1<span class="caps">\2</span>\3' ], # 3+ uppercase caps
|
||||
[ /(\.\s)?\s?--\s?/, '\1—' ], # em dash
|
||||
[ /\s->\s/, ' → ' ], # en dash
|
||||
[ /\s-\s/, ' – ' ], # en dash
|
||||
[ /(\d+) ?x ?(\d+)/, '\1×\2' ], # dimension sign
|
||||
[ /\b ?[(\[]TM[\])]/i, '™' ], # trademark
|
||||
[ /\b ?[(\[]R[\])]/i, '®' ], # registered
|
||||
[ /\b ?[(\[]C[\])]/i, '©' ] # copyright
|
||||
]
|
||||
|
||||
I_ALGN_VALS = {
|
||||
'<' => 'left',
|
||||
'=' => 'center',
|
||||
'>' => 'right'
|
||||
}
|
||||
|
||||
H_ALGN_VALS = {
|
||||
'<' => 'left',
|
||||
'=' => 'center',
|
||||
'>' => 'right',
|
||||
'<>' => 'justify'
|
||||
}
|
||||
|
||||
V_ALGN_VALS = {
|
||||
'^' => 'top',
|
||||
'-' => 'middle',
|
||||
'~' => 'bottom'
|
||||
}
|
||||
|
||||
QTAGS = [
|
||||
['**', 'bf'],
|
||||
['*', 'bf'],
|
||||
['??', 'cite'],
|
||||
['-', 'del'],
|
||||
['__', 'underline'],
|
||||
['_', 'em'],
|
||||
['%', 'span'],
|
||||
['+', 'ins'],
|
||||
['^', 'sup'],
|
||||
['~', 'sub']
|
||||
]
|
||||
|
||||
def self.available?
|
||||
if not defined? @@available
|
||||
begin
|
||||
@@available = system "pdflatex -version"
|
||||
rescue Errno::ENOENT
|
||||
@@available = false
|
||||
end
|
||||
end
|
||||
@@available
|
||||
end
|
||||
|
||||
#
|
||||
# Two accessor for setting security restrictions.
|
||||
#
|
||||
# This is a nice thing if you're using RedCloth for
|
||||
# formatting in public places (e.g. Wikis) where you
|
||||
# don't want users to abuse HTML for bad things.
|
||||
#
|
||||
# If +:filter_html+ is set, HTML which wasn't
|
||||
# created by the Textile processor will be escaped.
|
||||
#
|
||||
# If +:filter_styles+ is set, it will also disable
|
||||
# the style markup specifier. ('{color: red}')
|
||||
#
|
||||
attr_accessor :filter_html, :filter_styles
|
||||
|
||||
#
|
||||
# Accessor for toggling line folding.
|
||||
#
|
||||
# If +:fold_lines+ is set, single newlines will
|
||||
# not be converted to break tags.
|
||||
#
|
||||
attr_accessor :fold_lines
|
||||
|
||||
def initialize( string, restrictions = [] )
|
||||
restrictions.each { |r| method( "#{ r }=" ).call( true ) }
|
||||
super( string )
|
||||
end
|
||||
|
||||
#
|
||||
# Generate tex.
|
||||
#
|
||||
def to_tex( lite = false )
|
||||
|
||||
# make our working copy
|
||||
text = self.dup
|
||||
|
||||
@urlrefs = {}
|
||||
@shelf = []
|
||||
|
||||
# incoming_entities text
|
||||
fix_entities text
|
||||
clean_white_space text
|
||||
|
||||
get_refs text
|
||||
|
||||
no_textile text
|
||||
|
||||
unless lite
|
||||
lists text
|
||||
table text
|
||||
end
|
||||
|
||||
glyphs text
|
||||
|
||||
unless lite
|
||||
fold text
|
||||
block text
|
||||
end
|
||||
|
||||
retrieve text
|
||||
encode_entities text
|
||||
|
||||
text.gsub!(/\[\[(.*?)\]\]/, "\\1")
|
||||
text.gsub!(/_/, "\\_")
|
||||
text.gsub!( /<\/?notextile>/, '' )
|
||||
# text.gsub!( /x%x%/, '&' )
|
||||
# text.gsub!( /<br \/>/, "<br />\n" )
|
||||
text.strip!
|
||||
text
|
||||
|
||||
end
|
||||
|
||||
def pgl( text )
|
||||
GLYPHS.each do |re, resub|
|
||||
text.gsub! re, resub
|
||||
end
|
||||
end
|
||||
|
||||
def pba( text_in, element = "" )
|
||||
|
||||
return '' unless text_in
|
||||
|
||||
style = []
|
||||
text = text_in.dup
|
||||
if element == 'td'
|
||||
colspan = $1 if text =~ /\\(\d+)/
|
||||
rowspan = $1 if text =~ /\/(\d+)/
|
||||
style << "vertical-align:#{ v_align( $& ) };" if text =~ A_VLGN
|
||||
end
|
||||
|
||||
style << "#{ $1 };" if not @filter_styles and
|
||||
text.sub!( /\{([^}]*)\}/, '' )
|
||||
|
||||
lang = $1 if
|
||||
text.sub!( /\[([^)]+?)\]/, '' )
|
||||
|
||||
cls = $1 if
|
||||
text.sub!( /\(([^()]+?)\)/, '' )
|
||||
|
||||
style << "padding-left:#{ $1.length }em;" if
|
||||
text.sub!( /([(]+)/, '' )
|
||||
|
||||
style << "padding-right:#{ $1.length }em;" if text.sub!( /([)]+)/, '' )
|
||||
|
||||
style << "text-align:#{ h_align( $& ) };" if text =~ A_HLGN
|
||||
|
||||
cls, id = $1, $2 if cls =~ /^(.*?)#(.*)$/
|
||||
|
||||
atts = ''
|
||||
atts << " style=\"#{ style.join }\"" unless style.empty?
|
||||
atts << " class=\"#{ cls }\"" unless cls.to_s.empty?
|
||||
atts << " lang=\"#{ lang }\"" if lang
|
||||
atts << " id=\"#{ id }\"" if id
|
||||
atts << " colspan=\"#{ colspan }\"" if colspan
|
||||
atts << " rowspan=\"#{ rowspan }\"" if rowspan
|
||||
|
||||
atts
|
||||
end
|
||||
|
||||
def table( text )
|
||||
text << "\n\n"
|
||||
text.gsub!( /^(?:table(_?#{S}#{A}#{C})\. ?\n)?^(#{A}#{C}\.? ?\|.*?\|)\n\n/m ) do |matches|
|
||||
|
||||
tatts, fullrow = $~[1..2]
|
||||
tatts = pba( tatts, 'table' )
|
||||
rows = []
|
||||
|
||||
fullrow.
|
||||
split( /\|$/m ).
|
||||
delete_if { |x| x.empty? }.
|
||||
each do |row|
|
||||
|
||||
ratts, row = pba( $1, 'tr' ), $2 if row =~ /^(#{A}#{C}\. )(.*)/m
|
||||
|
||||
cells = []
|
||||
row.split( '|' ).each do |cell|
|
||||
ctyp = 'd'
|
||||
ctyp = 'h' if cell =~ /^_/
|
||||
|
||||
catts = ''
|
||||
catts, cell = pba( $1, 'td' ), $2 if cell =~ /^(_?#{S}#{A}#{C}\. )(.*)/
|
||||
|
||||
unless cell.strip.empty?
|
||||
cells << "\t\t\t<t#{ ctyp }#{ catts }>#{ cell }</t#{ ctyp }>"
|
||||
end
|
||||
end
|
||||
rows << "\t\t<tr#{ ratts }>\n#{ cells.join( "\n" ) }\n\t\t</tr>"
|
||||
end
|
||||
"\t<table#{ tatts }>\n#{ rows.join( "\n" ) }\n\t</table>\n\n"
|
||||
end
|
||||
end
|
||||
|
||||
def lists( text )
|
||||
text.gsub!( /^([#*]+?#{C} .*?)$(?![^#*])/m ) do |match|
|
||||
lines = match.split( /\n/ )
|
||||
last_line = -1
|
||||
depth = []
|
||||
lines.each_with_index do |line, line_id|
|
||||
if line =~ /^([#*]+)(#{A}#{C}) (.*)$/m
|
||||
tl,atts,content = $~[1..3]
|
||||
if depth.last
|
||||
if depth.last.length > tl.length
|
||||
(depth.length - 1).downto(0) do |i|
|
||||
break if depth[i].length == tl.length
|
||||
lines[line_id - 1] << "\n\t\\end{#{ lT( depth[i] ) }}\n\t"
|
||||
depth.pop
|
||||
end
|
||||
end
|
||||
if !depth.last.nil? && !tl.length.nil? && depth.last.length == tl.length
|
||||
lines[line_id - 1] << ''
|
||||
end
|
||||
end
|
||||
unless depth.last == tl
|
||||
depth << tl
|
||||
atts = pba( atts )
|
||||
lines[line_id] = "\t\\begin{#{ lT(tl) }}\n\t\\item #{ content }"
|
||||
else
|
||||
lines[line_id] = "\t\t\\item #{ content }"
|
||||
end
|
||||
last_line = line_id
|
||||
|
||||
elsif line =~ /^\s+\S/
|
||||
last_line = line_id
|
||||
elsif line_id - last_line < 2 and line =~ /^\S/
|
||||
last_line = line_id
|
||||
end
|
||||
if line_id - last_line > 1 or line_id == lines.length - 1
|
||||
depth.delete_if do |v|
|
||||
lines[last_line] << "\n\t\\end{#{ lT( v ) }}"
|
||||
end
|
||||
end
|
||||
end
|
||||
lines.join( "\n" )
|
||||
end
|
||||
end
|
||||
|
||||
def lT( text )
|
||||
text =~ /\#$/ ? 'enumerate' : 'itemize'
|
||||
end
|
||||
|
||||
def fold( text )
|
||||
text.gsub!( /(.+)\n(?![#*\s|])/, "\\1\\\\\\\\" )
|
||||
# text.gsub!( /(.+)\n(?![#*\s|])/, "\\1#{ @fold_lines ? ' ' : '<br />' }" )
|
||||
end
|
||||
|
||||
def block( text )
|
||||
pre = false
|
||||
find = ['bq','h[1-6]','fn\d+']
|
||||
|
||||
regexp_cue = []
|
||||
|
||||
lines = text.split( /\n/ ) + [' ']
|
||||
new_text =
|
||||
lines.collect do |line|
|
||||
pre = true if line =~ /<(pre|notextile)>/i
|
||||
find.each do |tag|
|
||||
line.gsub!( /^(#{ tag })(#{A}#{C})\.(?::(\S+))? (.*)$/ ) do |m|
|
||||
tag,atts,cite,content = $~[1..4]
|
||||
|
||||
atts = pba( atts )
|
||||
|
||||
if tag =~ /fn(\d+)/
|
||||
# tag = 'p';
|
||||
# atts << " id=\"fn#{ $1 }\""
|
||||
regexp_cue << [ /footnote\{#{$1}}/, "footnote{#{content}}" ]
|
||||
content = ""
|
||||
end
|
||||
|
||||
if tag =~ /h([1-6])/
|
||||
section_type = "sub" * [$1.to_i - 1, 2].min
|
||||
start = "\t\\#{section_type}section*{"
|
||||
tend = "}"
|
||||
end
|
||||
|
||||
if tag == "bq"
|
||||
cite = check_refs( cite )
|
||||
cite = " cite=\"#{ cite }\"" if cite
|
||||
start = "\t\\begin{quotation}\n\\noindent {\\em ";
|
||||
tend = "}\n\t\\end{quotation}";
|
||||
end
|
||||
|
||||
"#{ start }#{ content }#{ tend }"
|
||||
end unless pre
|
||||
end
|
||||
|
||||
#line.gsub!( /^(?!\t|<\/?pre|<\/?notextile|<\/?code|$| )(.*)/, "\t<p>\\1</p>" )
|
||||
|
||||
#line.gsub!( "<br />", "\n" ) if pre
|
||||
# pre = false if line =~ /<\/(pre|notextile)>/i
|
||||
|
||||
line
|
||||
end.join( "\n" )
|
||||
text.replace( new_text )
|
||||
regexp_cue.each { |pair| text.gsub!(pair.first, pair.last) }
|
||||
end
|
||||
|
||||
def span( text )
|
||||
QTAGS.each do |tt, ht|
|
||||
ttr = Regexp::quote( tt )
|
||||
text.gsub!(
|
||||
|
||||
/(^|\s|\>|[#{PUNCT}{(\[])
|
||||
#{ttr}
|
||||
(#{C})
|
||||
(?::(\S+?))?
|
||||
([^\s#{ttr}]+?(?:[^\n]|\n(?!\n))*?)
|
||||
([#{PUNCT}]*?)
|
||||
#{ttr}
|
||||
(?=[\])}]|[#{PUNCT}]+?|<|\s|$)/xm
|
||||
|
||||
) do |m|
|
||||
|
||||
start,atts,cite,content,tend = $~[1..5]
|
||||
atts = pba( atts )
|
||||
atts << " cite=\"#{ cite }\"" if cite
|
||||
|
||||
"#{ start }{\\#{ ht } #{ content }#{ tend }}"
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def links( text )
|
||||
text.gsub!( /
|
||||
([\s\[{(]|[#{PUNCT}])? # $pre
|
||||
" # start
|
||||
(#{C}) # $atts
|
||||
([^"]+?) # $text
|
||||
\s?
|
||||
(?:\(([^)]+?)\)(?="))? # $title
|
||||
":
|
||||
(\S+?) # $url
|
||||
(\/)? # $slash
|
||||
([^\w\/;]*?) # $post
|
||||
(?=\s|$)
|
||||
/x ) do |m|
|
||||
pre,atts,text,title,url,slash,post = $~[1..7]
|
||||
|
||||
url.gsub!(/(\\)(.)/, '\2')
|
||||
url = check_refs( url )
|
||||
|
||||
atts = pba( atts )
|
||||
atts << " title=\"#{ title }\"" if title
|
||||
atts = shelve( atts ) if atts
|
||||
|
||||
"#{ pre }\\textit{#{ text }} \\footnote{\\texttt{\\textless #{ url }#{ slash }" +
|
||||
"\\textgreater}#{ post }}"
|
||||
end
|
||||
end
|
||||
|
||||
def get_refs( text )
|
||||
text.gsub!( /(^|\s)\[(.+?)\]((?:http:\/\/|javascript:|ftp:\/\/|\/)\S+?)(?=\s|$)/ ) do |m|
|
||||
flag, url = $~[1..2]
|
||||
@urlrefs[flag] = url
|
||||
end
|
||||
end
|
||||
|
||||
def check_refs( text )
|
||||
@urlrefs[text] || text
|
||||
end
|
||||
|
||||
def image( text )
|
||||
text.gsub!( /
|
||||
\! # opening
|
||||
(\<|\=|\>)? # optional alignment atts
|
||||
(#{C}) # optional style,class atts
|
||||
(?:\. )? # optional dot-space
|
||||
([^\s(!]+?) # presume this is the src
|
||||
\s? # optional space
|
||||
(?:\(((?:[^\(\)]|\([^\)]+\))+?)\))? # optional title
|
||||
\! # closing
|
||||
(?::#{ HYPERLINK })? # optional href
|
||||
/x ) do |m|
|
||||
algn,atts,url,title,href,href_a1,href_a2 = $~[1..7]
|
||||
atts = pba( atts )
|
||||
atts << " align=\"#{ i_align( algn ) }\"" if algn
|
||||
atts << " title=\"#{ title }\"" if title
|
||||
atts << " alt=\"#{ title }\""
|
||||
# size = @getimagesize($url);
|
||||
# if($size) $atts.= " $size[3]";
|
||||
|
||||
href = check_refs( href ) if href
|
||||
url = check_refs( url )
|
||||
|
||||
out = ''
|
||||
out << "<a href=\"#{ href }\">" if href
|
||||
out << "<img src=\"#{ url }\"#{ atts } />"
|
||||
out << "</a>#{ href_a1 }#{ href_a2 }" if href
|
||||
|
||||
out
|
||||
end
|
||||
end
|
||||
|
||||
def code( text )
|
||||
text.gsub!( /
|
||||
(?:^|([\s\(\[{])) # 1 open bracket?
|
||||
@ # opening
|
||||
(?:\|(\w+?)\|)? # 2 language
|
||||
(\S(?:[^\n]|\n(?!\n))*?) # 3 code
|
||||
@ # closing
|
||||
(?:$|([\]})])|
|
||||
(?=[#{PUNCT}]{1,2}|
|
||||
\s)) # 4 closing bracket?
|
||||
/x ) do |m|
|
||||
before,lang,code,after = $~[1..4]
|
||||
lang = " language=\"#{ lang }\"" if lang
|
||||
"#{ before }<code#{ lang }>#{ code }</code>#{ after }"
|
||||
end
|
||||
end
|
||||
|
||||
def shelve( val )
|
||||
@shelf << val
|
||||
" <#{ @shelf.length }>"
|
||||
end
|
||||
|
||||
def retrieve( text )
|
||||
@shelf.each_with_index do |r, i|
|
||||
text.gsub!( " <#{ i + 1 }>", r )
|
||||
end
|
||||
end
|
||||
|
||||
def incoming_entities( text )
|
||||
## turn any incoming ampersands into a dummy character for now.
|
||||
## This uses a negative lookahead for alphanumerics followed by a semicolon,
|
||||
## implying an incoming html entity, to be skipped
|
||||
|
||||
text.gsub!( /&(?![#a-z0-9]+;)/i, "x%x%" )
|
||||
end
|
||||
|
||||
def encode_entities( text )
|
||||
## Convert high and low ascii to entities.
|
||||
# if $-K == "UTF-8"
|
||||
# encode_high( text )
|
||||
# else
|
||||
text.texesc!( :NoQuotes )
|
||||
# end
|
||||
end
|
||||
|
||||
def fix_entities( text )
|
||||
## de-entify any remaining angle brackets or ampersands
|
||||
text.gsub!( "\&", "&" )
|
||||
text.gsub!( "\%", "%" )
|
||||
end
|
||||
|
||||
def clean_white_space( text )
|
||||
text.gsub!( /\r\n/, "\n" )
|
||||
text.gsub!( /\t/, '' )
|
||||
text.gsub!( /\n{3,}/, "\n\n" )
|
||||
text.gsub!( /\n *\n/, "\n\n" )
|
||||
text.gsub!( /"$/, "\" " )
|
||||
end
|
||||
|
||||
def no_textile( text )
|
||||
text.gsub!( /(^|\s)==(.*?)==(\s|$)?/,
|
||||
'\1<notextile>\2</notextile>\3' )
|
||||
end
|
||||
|
||||
def footnote_ref( text )
|
||||
text.gsub!( /\[([0-9]+?)\](\s)?/,
|
||||
'\footnote{\1}\2')
|
||||
#'<sup><a href="#fn\1">\1</a></sup>\2' )
|
||||
end
|
||||
|
||||
def inline( text )
|
||||
image text
|
||||
links text
|
||||
code text
|
||||
span text
|
||||
end
|
||||
|
||||
def glyphs_deep( text )
|
||||
codepre = 0
|
||||
offtags = /(?:code|pre|kbd|notextile)/
|
||||
if text !~ /<.*>/
|
||||
# pgl text
|
||||
footnote_ref text
|
||||
else
|
||||
used_offtags = {}
|
||||
text.gsub!( /(?:[^<].*?(?=<[^\n]*?>|$)|<[^\n]*?>+)/m ) do |line|
|
||||
tagline = ( line =~ /^<.*>/ )
|
||||
|
||||
## matches are off if we're between <code>, <pre> etc.
|
||||
if tagline
|
||||
if line =~ /<(#{ offtags })>/i
|
||||
codepre += 1
|
||||
used_offtags[$1] = true
|
||||
line.texesc!( :NoQuotes ) if codepre - used_offtags.length > 0
|
||||
elsif line =~ /<\/(#{ offtags })>/i
|
||||
line.texesc!( :NoQuotes ) if codepre - used_offtags.length > 0
|
||||
codepre -= 1 unless codepre.zero?
|
||||
used_offtags = {} if codepre.zero?
|
||||
elsif @filter_html or codepre > 0
|
||||
line.texesc!( :NoQuotes )
|
||||
## line.gsub!( /<(\/?#{ offtags })>/, '<\1>' )
|
||||
end
|
||||
## do htmlspecial if between <code>
|
||||
elsif codepre > 0
|
||||
line.texesc!( :NoQuotes )
|
||||
## line.gsub!( /<(\/?#{ offtags })>/, '<\1>' )
|
||||
elsif not tagline
|
||||
inline line
|
||||
glyphs_deep line
|
||||
end
|
||||
|
||||
line
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def glyphs( text )
|
||||
text.gsub!( /"\z/, "\" " )
|
||||
## if no html, do a simple search and replace...
|
||||
if text !~ /<.*>/
|
||||
inline text
|
||||
end
|
||||
glyphs_deep text
|
||||
end
|
||||
|
||||
def i_align( text )
|
||||
I_ALGN_VALS[text]
|
||||
end
|
||||
|
||||
def h_align( text )
|
||||
H_ALGN_VALS[text]
|
||||
end
|
||||
|
||||
def v_align( text )
|
||||
V_ALGN_VALS[text]
|
||||
end
|
||||
|
||||
def encode_high( text )
|
||||
## mb_encode_numericentity($text, $cmap, $charset);
|
||||
end
|
||||
|
||||
def decode_high( text )
|
||||
## mb_decode_numericentity($text, $cmap, $charset);
|
||||
end
|
||||
|
||||
def textile_popup_help( name, helpvar, windowW, windowH )
|
||||
' <a target="_blank" href="http://www.textpattern.com/help/?item=' + helpvar + '" onclick="window.open(this.href, \'popupwindow\', \'width=' + windowW + ',height=' + windowH + ',scrollbars,resizable\'); return false;">' + name + '</a><br />'
|
||||
end
|
||||
|
||||
CMAP = [
|
||||
160, 255, 0, 0xffff,
|
||||
402, 402, 0, 0xffff,
|
||||
913, 929, 0, 0xffff,
|
||||
931, 937, 0, 0xffff,
|
||||
945, 969, 0, 0xffff,
|
||||
977, 978, 0, 0xffff,
|
||||
982, 982, 0, 0xffff,
|
||||
8226, 8226, 0, 0xffff,
|
||||
8230, 8230, 0, 0xffff,
|
||||
8242, 8243, 0, 0xffff,
|
||||
8254, 8254, 0, 0xffff,
|
||||
8260, 8260, 0, 0xffff,
|
||||
8465, 8465, 0, 0xffff,
|
||||
8472, 8472, 0, 0xffff,
|
||||
8476, 8476, 0, 0xffff,
|
||||
8482, 8482, 0, 0xffff,
|
||||
8501, 8501, 0, 0xffff,
|
||||
8592, 8596, 0, 0xffff,
|
||||
8629, 8629, 0, 0xffff,
|
||||
8656, 8660, 0, 0xffff,
|
||||
8704, 8704, 0, 0xffff,
|
||||
8706, 8707, 0, 0xffff,
|
||||
8709, 8709, 0, 0xffff,
|
||||
8711, 8713, 0, 0xffff,
|
||||
8715, 8715, 0, 0xffff,
|
||||
8719, 8719, 0, 0xffff,
|
||||
8721, 8722, 0, 0xffff,
|
||||
8727, 8727, 0, 0xffff,
|
||||
8730, 8730, 0, 0xffff,
|
||||
8733, 8734, 0, 0xffff,
|
||||
8736, 8736, 0, 0xffff,
|
||||
8743, 8747, 0, 0xffff,
|
||||
8756, 8756, 0, 0xffff,
|
||||
8764, 8764, 0, 0xffff,
|
||||
8773, 8773, 0, 0xffff,
|
||||
8776, 8776, 0, 0xffff,
|
||||
8800, 8801, 0, 0xffff,
|
||||
8804, 8805, 0, 0xffff,
|
||||
8834, 8836, 0, 0xffff,
|
||||
8838, 8839, 0, 0xffff,
|
||||
8853, 8853, 0, 0xffff,
|
||||
8855, 8855, 0, 0xffff,
|
||||
8869, 8869, 0, 0xffff,
|
||||
8901, 8901, 0, 0xffff,
|
||||
8968, 8971, 0, 0xffff,
|
||||
9001, 9002, 0, 0xffff,
|
||||
9674, 9674, 0, 0xffff,
|
||||
9824, 9824, 0, 0xffff,
|
||||
9827, 9827, 0, 0xffff,
|
||||
9829, 9830, 0, 0xffff,
|
||||
338, 339, 0, 0xffff,
|
||||
352, 353, 0, 0xffff,
|
||||
376, 376, 0, 0xffff,
|
||||
710, 710, 0, 0xffff,
|
||||
732, 732, 0, 0xffff,
|
||||
8194, 8195, 0, 0xffff,
|
||||
8201, 8201, 0, 0xffff,
|
||||
8204, 8207, 0, 0xffff,
|
||||
8211, 8212, 0, 0xffff,
|
||||
8216, 8218, 0, 0xffff,
|
||||
8218, 8218, 0, 0xffff,
|
||||
8220, 8222, 0, 0xffff,
|
||||
8224, 8225, 0, 0xffff,
|
||||
8240, 8240, 0, 0xffff,
|
||||
8249, 8250, 0, 0xffff,
|
||||
8364, 8364, 0, 0xffff
|
||||
]
|
||||
end
|
2307
lib/sanitize.rb
2307
lib/sanitize.rb
File diff suppressed because it is too large
Load diff
2157
lib/string_utils.rb
2157
lib/string_utils.rb
File diff suppressed because it is too large
Load diff
|
@ -1,11 +1,11 @@
|
|||
require 'cgi'
|
||||
require_dependency 'chunks/engines'
|
||||
require_dependency 'chunks/category'
|
||||
require 'chunks/engines'
|
||||
require 'chunks/category'
|
||||
require_dependency 'chunks/include'
|
||||
require_dependency 'chunks/wiki'
|
||||
require_dependency 'chunks/literal'
|
||||
require_dependency 'chunks/uri'
|
||||
require_dependency 'chunks/nowiki'
|
||||
require 'chunks/nowiki'
|
||||
|
||||
# Wiki content is just a string that can process itself with a chain of
|
||||
# actions. The actions can modify wiki content so that certain parts of
|
||||
|
|
|
@ -336,6 +336,7 @@ font-size:70%;
|
|||
|
||||
div.rightHandSide {
|
||||
border-left:1px dotted #ccc;
|
||||
border-bottom:1px dotted #ccc;
|
||||
float:right;
|
||||
font-size:80%;
|
||||
margin-left:0.7em;
|
||||
|
|
|
@ -89,7 +89,6 @@ class FileControllerTest < Test::Unit::TestCase
|
|||
# updated from post to get - post fails the spam protection (no javascript)
|
||||
r = get :file, :web => 'wiki1',
|
||||
:file => {:file_name => 'rails-e2e.gif', :content => StringIO.new(picture)}
|
||||
assert_redirected_to({})
|
||||
assert @web.has_file?('rails-e2e.gif')
|
||||
assert_equal(picture, WikiFile.find_by_file_name('rails-e2e.gif').content)
|
||||
end
|
||||
|
|
|
@ -21,7 +21,7 @@ class RoutesTest < Test::Unit::TestCase
|
|||
:controller => 'wiki',
|
||||
:action => 'an_action', :id => 'HomePage'
|
||||
)
|
||||
assert_recognizes({:controller => 'wiki', :action => 'index'}, '///')
|
||||
# assert_recognizes({:controller => 'wiki', :action => 'index'}, '///')
|
||||
end
|
||||
|
||||
def test_parse_uri_liberal_with_pagenames
|
||||
|
@ -29,13 +29,13 @@ class RoutesTest < Test::Unit::TestCase
|
|||
assert_routing('web/show/%24HOME_PAGE',
|
||||
:controller => 'wiki', :web => 'web', :action => 'show', :id => '$HOME_PAGE')
|
||||
|
||||
assert_routing('web/show/HomePage%3F',
|
||||
:controller => 'wiki', :web => 'web', :action => 'show',
|
||||
:id => 'HomePage')
|
||||
# assert_routing('web/show/HomePage%3F',
|
||||
# :controller => 'wiki', :web => 'web', :action => 'show',
|
||||
# :id => 'HomePage')
|
||||
|
||||
assert_routing('web/show/HomePage%3Farg1%3Dvalue1%26arg2%3Dvalue2',
|
||||
:controller => 'wiki', :web => 'web', :action => 'show',
|
||||
:id => 'HomePage?arg1=value1&arg2=value2')
|
||||
# assert_routing('web/show/HomePage%3Farg1%3Dvalue1%26arg2%3Dvalue2',
|
||||
# :controller => 'wiki', :web => 'web', :action => 'show',
|
||||
# :id => 'HomePage?arg1=value1&arg2=value2')
|
||||
|
||||
assert_routing('web/files/abc.zip',
|
||||
:web => 'web', :controller => 'file', :action => 'file', :id => 'abc.zip')
|
||||
|
|
|
@ -32,7 +32,7 @@ class WikiControllerTest < Test::Unit::TestCase
|
|||
|
||||
get :authenticate, :web => 'wiki1', :password => 'pswd'
|
||||
assert_redirected_to :web => 'wiki1', :action => 'show', :id => 'HomePage'
|
||||
assert_equal ['pswd'], @response.cookies['web_address']
|
||||
assert_equal ['pswd'], @response.cookies['wiki1']
|
||||
end
|
||||
|
||||
def test_authenticate_wrong_password
|
||||
|
@ -159,15 +159,15 @@ class WikiControllerTest < Test::Unit::TestCase
|
|||
|
||||
if ENV['INSTIKI_TEST_LATEX'] or defined? $INSTIKI_TEST_PDFLATEX
|
||||
|
||||
def test_export_pdf
|
||||
r = process 'export_pdf', 'web' => 'wiki1'
|
||||
assert_response(:success, bypass_body_parsing = true)
|
||||
assert_equal 'application/pdf', r.headers['Content-Type']
|
||||
assert_match /attachment; filename="wiki1-tex-\d\d\d\d-\d\d-\d\d-\d\d-\d\d-\d\d.pdf"/,
|
||||
r.headers['Content-Disposition']
|
||||
assert_equal '%PDF', r.body[0..3]
|
||||
assert_equal "EOF\n", r.body[-4..-1]
|
||||
end
|
||||
# def test_export_pdf
|
||||
# r = process 'export_pdf', 'web' => 'wiki1'
|
||||
# assert_response(:success, bypass_body_parsing = true)
|
||||
# assert_equal 'application/pdf', r.headers['Content-Type']
|
||||
# assert_match /attachment; filename="wiki1-tex-\d\d\d\d-\d\d-\d\d-\d\d-\d\d-\d\d.pdf"/,
|
||||
# r.headers['Content-Disposition']
|
||||
# assert_equal '%PDF', r.body[0..3]
|
||||
# assert_equal "EOF\n", r.body[-4..-1]
|
||||
# end
|
||||
|
||||
else
|
||||
puts 'Warning: tests involving pdflatex are very slow, therefore they are disabled by default.'
|
||||
|
@ -175,15 +175,15 @@ class WikiControllerTest < Test::Unit::TestCase
|
|||
puts ' $INSTIKI_TEST_PDFLATEX to enable them.'
|
||||
end
|
||||
|
||||
def test_export_tex
|
||||
r = process 'export_tex', 'web' => 'wiki1'
|
||||
|
||||
assert_response(:success, bypass_body_parsing = true)
|
||||
assert_equal 'application/octet-stream', r.headers['Content-Type']
|
||||
assert_match /attachment; filename="wiki1-tex-\d\d\d\d-\d\d-\d\d-\d\d-\d\d-\d\d.tex"/,
|
||||
r.headers['Content-Disposition']
|
||||
assert_equal '\documentclass', r.body[0..13], 'Content is not a TeX file'
|
||||
end
|
||||
# def test_export_tex
|
||||
# r = process 'export_tex', 'web' => 'wiki1'
|
||||
#
|
||||
# assert_response(:success, bypass_body_parsing = true)
|
||||
# assert_equal 'application/octet-stream', r.headers['Content-Type']
|
||||
# assert_match /attachment; filename="wiki1-tex-\d\d\d\d-\d\d-\d\d-\d\d-\d\d-\d\d.tex"/,
|
||||
# r.headers['Content-Disposition']
|
||||
# assert_equal '\documentclass', r.body[0..13], 'Content is not a TeX file'
|
||||
# end
|
||||
|
||||
def test_feeds
|
||||
process('feeds', 'web' => 'wiki1')
|
||||
|
@ -251,18 +251,18 @@ class WikiControllerTest < Test::Unit::TestCase
|
|||
|
||||
if ENV['INSTIKI_TEST_LATEX'] or defined? $INSTIKI_TEST_PDFLATEX
|
||||
|
||||
def test_pdf
|
||||
assert RedClothForTex.available?, 'Cannot do test_pdf when pdflatex is not available'
|
||||
r = process('pdf', 'web' => 'wiki1', 'id' => 'HomePage')
|
||||
assert_response(:success, bypass_body_parsing = true)
|
||||
|
||||
assert_equal '%PDF', r.body[0..3]
|
||||
assert_equal "EOF\n", r.body[-4..-1]
|
||||
|
||||
assert_equal 'application/pdf', r.headers['Content-Type']
|
||||
assert_match /attachment; filename="HomePage-wiki1-\d\d\d\d-\d\d-\d\d-\d\d-\d\d-\d\d.pdf"/,
|
||||
r.headers['Content-Disposition']
|
||||
end
|
||||
# def test_pdf
|
||||
# assert RedClothForTex.available?, 'Cannot do test_pdf when pdflatex is not available'
|
||||
# r = process('pdf', 'web' => 'wiki1', 'id' => 'HomePage')
|
||||
# assert_response(:success, bypass_body_parsing = true)
|
||||
#
|
||||
# assert_equal '%PDF', r.body[0..3]
|
||||
# assert_equal "EOF\n", r.body[-4..-1]
|
||||
#
|
||||
# assert_equal 'application/pdf', r.headers['Content-Type']
|
||||
# assert_match /attachment; filename="HomePage-wiki1-\d\d\d\d-\d\d-\d\d-\d\d-\d\d-\d\d.pdf"/,
|
||||
# r.headers['Content-Disposition']
|
||||
# end
|
||||
|
||||
end
|
||||
|
||||
|
@ -387,8 +387,8 @@ class WikiControllerTest < Test::Unit::TestCase
|
|||
assert_equal @home.revisions[0], r.template_objects['revision']
|
||||
end
|
||||
|
||||
def test_rss_with_content
|
||||
r = process 'rss_with_content', 'web' => 'wiki1'
|
||||
def test_atom_with_content
|
||||
r = process 'atom_with_content', 'web' => 'wiki1'
|
||||
|
||||
assert_response(:success)
|
||||
pages = r.template_objects['pages_by_revision']
|
||||
|
@ -397,24 +397,24 @@ class WikiControllerTest < Test::Unit::TestCase
|
|||
assert !r.template_objects['hide_description']
|
||||
end
|
||||
|
||||
def test_rss_with_content_when_blocked
|
||||
def test_atom_with_content_when_blocked
|
||||
@web.update_attributes(:password => 'aaa', :published => false)
|
||||
@web = Web.find(@web.id)
|
||||
|
||||
r = process 'rss_with_content', 'web' => 'wiki1'
|
||||
r = process 'atom_with_content', 'web' => 'wiki1'
|
||||
|
||||
assert_equal 403, r.response_code
|
||||
end
|
||||
|
||||
|
||||
def test_rss_with_headlines
|
||||
def test_atom_with_headlines
|
||||
@title_with_spaces = @wiki.write_page('wiki1', 'Title With Spaces',
|
||||
'About spaces', 1.hour.ago, Author.new('TreeHugger', '127.0.0.2'), test_renderer)
|
||||
|
||||
@request.host = 'localhost'
|
||||
@request.port = 8080
|
||||
|
||||
r = process 'rss_with_headlines', 'web' => 'wiki1'
|
||||
r = process 'atom_with_headlines', 'web' => 'wiki1'
|
||||
|
||||
assert_response(:success)
|
||||
pages = r.template_objects['pages_by_revision']
|
||||
|
@ -435,20 +435,25 @@ class WikiControllerTest < Test::Unit::TestCase
|
|||
'http://localhost:8080/wiki1/show/HomePage',
|
||||
]
|
||||
|
||||
assert_template_xpath_match '/rss/channel/link',
|
||||
'http://localhost:8080/wiki1/show/HomePage'
|
||||
assert_template_xpath_match '/rss/channel/item/guid', expected_page_links
|
||||
assert_template_xpath_match '/rss/channel/item/link', expected_page_links
|
||||
assert_tag :tag => 'link',
|
||||
:parent => {:tag => 'feed'},
|
||||
:attributes => { :rel => 'alternate',
|
||||
:href => 'http://localhost:8080/wiki1/show/HomePage'}
|
||||
expected_page_links.each do |link|
|
||||
assert_tag :tag => 'link',
|
||||
:parent => {:tag => 'entry'},
|
||||
:attributes => {:href => link }
|
||||
end
|
||||
end
|
||||
|
||||
def test_rss_switch_links_to_published
|
||||
def test_atom_switch_links_to_published
|
||||
@web.update_attributes(:password => 'aaa', :published => true)
|
||||
@web = Web.find(@web.id)
|
||||
|
||||
@request.host = 'foo.bar.info'
|
||||
@request.port = 80
|
||||
|
||||
r = process 'rss_with_headlines', 'web' => 'wiki1'
|
||||
r = process 'atom_with_headlines', 'web' => 'wiki1'
|
||||
|
||||
assert_response(:success)
|
||||
xml = REXML::Document.new(r.body)
|
||||
|
@ -463,69 +468,76 @@ class WikiControllerTest < Test::Unit::TestCase
|
|||
'http://foo.bar.info/wiki1/published/FirstPage',
|
||||
'http://foo.bar.info/wiki1/published/HomePage']
|
||||
|
||||
assert_template_xpath_match '/rss/channel/link',
|
||||
'http://foo.bar.info/wiki1/published/HomePage'
|
||||
assert_template_xpath_match '/rss/channel/item/guid', expected_page_links
|
||||
assert_template_xpath_match '/rss/channel/item/link', expected_page_links
|
||||
assert_tag :tag => 'link',
|
||||
:parent =>{:tag =>'feed'},
|
||||
:attributes => {:rel => 'alternate',
|
||||
:href => 'http://foo.bar.info/wiki1/published/HomePage'}
|
||||
expected_page_links.each do |link|
|
||||
assert_tag :tag => 'link',
|
||||
:parent => {:tag => 'entry'},
|
||||
:attributes => {:href => link}
|
||||
end
|
||||
end
|
||||
|
||||
def test_rss_with_params
|
||||
setup_wiki_with_30_pages
|
||||
# def test_atom_with_params
|
||||
# setup_wiki_with_30_pages
|
||||
#
|
||||
# r = process 'atom_with_headlines', 'web' => 'wiki1'
|
||||
# assert_response(:success)
|
||||
# pages = r.template_objects['pages_by_revision']
|
||||
# assert_equal 15, pages.size, 15
|
||||
#
|
||||
# r = process 'atom_with_headlines', 'web' => 'wiki1', 'limit' => '5'
|
||||
# assert_response(:success)
|
||||
# pages = r.template_objects['pages_by_revision']
|
||||
# assert_equal 5, pages.size
|
||||
#
|
||||
# r = process 'atom_with_headlines', 'web' => 'wiki1', 'limit' => '25'
|
||||
# assert_response(:success)
|
||||
# pages = r.template_objects['pages_by_revision']
|
||||
# assert_equal 25, pages.size
|
||||
#
|
||||
# r = process 'atom_with_headlines', 'web' => 'wiki1', 'limit' => 'all'
|
||||
# assert_response(:success)
|
||||
# pages = r.template_objects['pages_by_revision']
|
||||
# assert_equal 38, pages.size
|
||||
#
|
||||
# r = process 'atom_with_headlines', 'web' => 'wiki1', 'start' => '1976-10-16'
|
||||
# assert_response(:success)
|
||||
# pages = r.template_objects['pages_by_revision']
|
||||
# assert_equal 23, pages.size
|
||||
#
|
||||
# r = process 'atom_with_headlines', 'web' => 'wiki1', 'end' => '1976-10-16'
|
||||
# assert_response(:success)
|
||||
# pages = r.template_objects['pages_by_revision']
|
||||
# assert_equal 15, pages.size
|
||||
#
|
||||
# r = process 'atom_with_headlines', 'web' => 'wiki1', 'start' => '1976-10-01', 'end' => '1976-10-06'
|
||||
# assert_response(:success)
|
||||
# pages = r.template_objects['pages_by_revision']
|
||||
# assert_equal 5, pages.size
|
||||
# end
|
||||
|
||||
r = process 'rss_with_headlines', 'web' => 'wiki1'
|
||||
assert_response(:success)
|
||||
pages = r.template_objects['pages_by_revision']
|
||||
assert_equal 15, pages.size, 15
|
||||
|
||||
r = process 'rss_with_headlines', 'web' => 'wiki1', 'limit' => '5'
|
||||
assert_response(:success)
|
||||
pages = r.template_objects['pages_by_revision']
|
||||
assert_equal 5, pages.size
|
||||
|
||||
r = process 'rss_with_headlines', 'web' => 'wiki1', 'limit' => '25'
|
||||
assert_response(:success)
|
||||
pages = r.template_objects['pages_by_revision']
|
||||
assert_equal 25, pages.size
|
||||
|
||||
r = process 'rss_with_headlines', 'web' => 'wiki1', 'limit' => 'all'
|
||||
assert_response(:success)
|
||||
pages = r.template_objects['pages_by_revision']
|
||||
assert_equal 38, pages.size
|
||||
|
||||
r = process 'rss_with_headlines', 'web' => 'wiki1', 'start' => '1976-10-16'
|
||||
assert_response(:success)
|
||||
pages = r.template_objects['pages_by_revision']
|
||||
assert_equal 23, pages.size
|
||||
|
||||
r = process 'rss_with_headlines', 'web' => 'wiki1', 'end' => '1976-10-16'
|
||||
assert_response(:success)
|
||||
pages = r.template_objects['pages_by_revision']
|
||||
assert_equal 15, pages.size
|
||||
|
||||
r = process 'rss_with_headlines', 'web' => 'wiki1', 'start' => '1976-10-01', 'end' => '1976-10-06'
|
||||
assert_response(:success)
|
||||
pages = r.template_objects['pages_by_revision']
|
||||
assert_equal 5, pages.size
|
||||
end
|
||||
|
||||
def test_rss_title_with_ampersand
|
||||
def test_atom_title_with_ampersand
|
||||
# was ticket:143
|
||||
@wiki.write_page('wiki1', 'Title&With&Ampersands',
|
||||
'About spaces', 1.hour.ago, Author.new('NitPicker', '127.0.0.3'), test_renderer)
|
||||
|
||||
r = process 'rss_with_headlines', 'web' => 'wiki1'
|
||||
r = process 'atom_with_headlines', 'web' => 'wiki1'
|
||||
|
||||
assert r.body.include?('<title>Home Page</title>')
|
||||
assert r.body.include?('<title>Title&With&Ampersands</title>')
|
||||
assert r.body.include?('<title type="html">Home Page</title>')
|
||||
assert r.body.include?('<title type="html">Title&With&Ampersands</title>')
|
||||
end
|
||||
|
||||
def test_rss_timestamp
|
||||
def test_atom_timestamp
|
||||
new_page = @wiki.write_page('wiki1', 'PageCreatedAtTheBeginningOfCtime',
|
||||
'Created on 1 Jan 1970 at 0:00:00 Z', Time.at(0), Author.new('NitPicker', '127.0.0.3'),
|
||||
test_renderer)
|
||||
|
||||
r = process 'rss_with_headlines', 'web' => 'wiki1'
|
||||
assert_template_xpath_match '/rss/channel/item/pubDate[9]', "Thu, 01 Jan 1970 00:00:00 Z"
|
||||
r = process 'atom_with_headlines', 'web' => 'wiki1'
|
||||
assert_tag :tag =>'published',
|
||||
:parent => {:tag => 'entry'},
|
||||
:content => '2004-04-04T21:50:00Z'
|
||||
end
|
||||
|
||||
def test_save
|
||||
|
@ -565,7 +577,7 @@ class WikiControllerTest < Test::Unit::TestCase
|
|||
'author' => 'SomeOtherAuthor'}, {:return_to => '/wiki1/show/HomePage'}
|
||||
|
||||
assert_redirected_to :action => 'edit', :web => 'wiki1', :id => 'HomePage'
|
||||
assert(@response.has_key(:error))
|
||||
# assert(@response.has_key(:error))
|
||||
assert r.flash[:error].kind_of?(Instiki::ValidationError)
|
||||
|
||||
revisions_after = @home.revisions.size
|
||||
|
@ -653,14 +665,14 @@ class WikiControllerTest < Test::Unit::TestCase
|
|||
r = process('tex', 'web' => 'wiki1', 'id' => 'HomePage')
|
||||
assert_response(:success)
|
||||
|
||||
assert_equal "\\documentclass[12pt,titlepage]{article}\n\n\\usepackage[danish]{babel} " +
|
||||
"%danske tekster\n\\usepackage[OT1]{fontenc} %rigtige danske bogstaver...\n" +
|
||||
"\\usepackage{a4}\n\\usepackage{graphicx}\n\\usepackage{ucs}\n\\usepackage[utf8x]" +
|
||||
"{inputenc}\n\\input epsf \n\n%----------------------------------------------------" +
|
||||
"---------------\n\n\\begin{document}\n\n\\sloppy\n\n%-----------------------------" +
|
||||
"--------------------------------------\n\n\\section*{HomePage}\n\nHisWay would be " +
|
||||
"MyWay in kinda ThatWay in HisWay though MyWay \\OverThere -- see SmartEngine in that " +
|
||||
"SmartEngineGUI\n\n\\end{document}", r.body
|
||||
assert_equal "\\documentclass[12pt,titlepage]{article}\n\n\\usepackage{amsmath}" +
|
||||
"\n\\usepackage{amsfonts}\n\\usepackage{graphicx}\n\\usepackage{ucs}\n" +
|
||||
"\\usepackage[utf8x]{inputenc}\n\\usepackage{hyperref}\n\n" +
|
||||
"%-------------------------------------------------------------------\n\n" +
|
||||
"\\begin{document}\n\n%--------------------------------------------------" +
|
||||
"-----------------\n\n\\section*{HomePage}\n\nTeX export only supported with" +
|
||||
" the Markdown text filters.\n\n\\end{document}\n",
|
||||
r.body
|
||||
end
|
||||
|
||||
|
||||
|
|
|
@ -11,7 +11,9 @@ class DiffTest < Test::Unit::TestCase
|
|||
|
||||
def diff(a,b)
|
||||
diff_doc = REXML::Document.new
|
||||
diff_doc << (div = REXML::Element.new 'div' )
|
||||
div = REXML::Element.new('div', nil, {:respect_whitespace =>:all})
|
||||
div.attributes['class'] = 'xhtmldiff_wrapper'
|
||||
diff_doc << div
|
||||
hd = XHTMLDiff.new(div)
|
||||
parsed_a = REXML::HashableElementDelegator.new(
|
||||
REXML::XPath.first(REXML::Document.new("<div>"+a+"</div>"), '/div'))
|
||||
|
@ -20,14 +22,14 @@ class DiffTest < Test::Unit::TestCase
|
|||
Diff::LCS.traverse_balanced(parsed_a, parsed_b, hd)
|
||||
diffs = ''
|
||||
diff_doc.write(diffs, -1, true, true)
|
||||
diffs
|
||||
diffs.gsub(/\A<div class='xhtmldiff_wrapper'>(.*)<\/div>\Z/m, '\1')
|
||||
end
|
||||
|
||||
def test_html_diff_simple
|
||||
a = 'this was the original string'
|
||||
b = 'this is the new string'
|
||||
assert_equal("<div><span> this<del class='diffmod'> was</del><ins class='diffmod'> is</ins> the" +
|
||||
"<del class='diffmod'> original</del><ins class='diffmod'> new</ins> string</span></div>",
|
||||
assert_equal("<span> this<del class='diffmod'> was</del><ins class='diffmod'> is</ins> the" +
|
||||
"<del class='diffmod'> original</del><ins class='diffmod'> new</ins> string</span>",
|
||||
diff(a, b))
|
||||
end
|
||||
|
||||
|
@ -35,10 +37,10 @@ class DiffTest < Test::Unit::TestCase
|
|||
a = "<p>this was the original string</p>"
|
||||
b = "<p>this is</p>\n<p> the new string</p>\n<p>around the world</p>"
|
||||
assert_equal(
|
||||
"<div><p><span> this<del class='diffmod'> was</del><ins class='diffmod'> is</ins>" +
|
||||
"<p><span> this<del class='diffmod'> was</del><ins class='diffmod'> is</ins>" +
|
||||
"<del class='diffdel'> the</del><del class='diffdel'> original</del><del class='diffdel'> string</del></span></p>" +
|
||||
"<ins class='diffins'>\n</ins><ins class='diffins'><p> the new string</p></ins>" +
|
||||
"<ins class='diffins'>\n</ins><ins class='diffins'><p>around the world</p></ins></div>",
|
||||
"<ins class='diffins'>\n</ins><ins class='diffins'><p>around the world</p></ins>",
|
||||
diff(a, b))
|
||||
end
|
||||
|
||||
|
@ -46,8 +48,8 @@ class DiffTest < Test::Unit::TestCase
|
|||
a = "<p>this is a paragraph</p>\n<p>this is a second paragraph</p>\n<p>this is a third paragraph</p>"
|
||||
b = "<p>this is a paragraph</p>\n<p>this is a third paragraph</p>"
|
||||
assert_equal(
|
||||
"<div><p>this is a paragraph</p>\n<del class='diffdel'><p>this is a second paragraph</p></del>" +
|
||||
"<del class='diffdel'>\n</del><p>this is a third paragraph</p></div>",
|
||||
"<p>this is a paragraph</p>\n<del class='diffdel'><p>this is a second paragraph</p></del>" +
|
||||
"<del class='diffdel'>\n</del><p>this is a third paragraph</p>",
|
||||
diff(a, b))
|
||||
end
|
||||
|
||||
|
@ -55,8 +57,8 @@ class DiffTest < Test::Unit::TestCase
|
|||
a = "<p>foo bar</p>"
|
||||
b = "<p>foo</p><p>bar</p>"
|
||||
assert_equal(
|
||||
"<div><p><span> foo<del class='diffdel'> bar</del></span></p>" +
|
||||
"<ins class='diffins'><p>bar</p></ins></div>",
|
||||
"<p><span> foo<del class='diffdel'> bar</del></span></p>" +
|
||||
"<ins class='diffins'><p>bar</p></ins>",
|
||||
diff(a,b))
|
||||
end
|
||||
|
||||
|
@ -64,8 +66,8 @@ class DiffTest < Test::Unit::TestCase
|
|||
a = "<p>foo</p><p>bar</p>"
|
||||
b = "<p>foo bar</p>"
|
||||
assert_equal(
|
||||
"<div><p><span> foo<ins class='diffins'> bar</ins></span></p>" +
|
||||
"<del class='diffdel'><p>bar</p></del></div>",
|
||||
"<p><span> foo<ins class='diffins'> bar</ins></span></p>" +
|
||||
"<del class='diffdel'><p>bar</p></del>",
|
||||
diff(a,b))
|
||||
end
|
||||
|
||||
|
@ -73,31 +75,31 @@ class DiffTest < Test::Unit::TestCase
|
|||
a = "<p>foo bar</p>"
|
||||
b = "<p>foo <b>bar</b></p>"
|
||||
assert_equal(
|
||||
"<div><p><span> foo<del class='diffdel'> bar</del></span>" +
|
||||
"<ins class='diffins'><b>bar</b></ins></p></div>",
|
||||
"<p><span> foo<del class='diffdel'> bar</del></span>" +
|
||||
"<ins class='diffins'><b>bar</b></ins></p>",
|
||||
diff(a,b))
|
||||
end
|
||||
|
||||
def test_html_diff_with_tags
|
||||
a = ""
|
||||
b = "<div>foo</div>"
|
||||
assert_equal "<ins class='diffins'><div>foo</div></ins>", diff(a, b)
|
||||
end
|
||||
|
||||
# FIXME this test fails (ticket #67, http://dev.instiki.org/ticket/67)
|
||||
def test_html_diff_preserves_endlines_in_pre
|
||||
a = "<pre>a\nb\nc\n</pre>"
|
||||
b = "<pre>a\n</pre>"
|
||||
assert_equal(
|
||||
"<div><pre><span> a\n<del class='diffdel'>b\nc\n</del></span></pre></div>",
|
||||
"<pre><span> a\n<del class='diffdel'>b\nc\n</del></span></pre>",
|
||||
diff(a, b))
|
||||
end
|
||||
|
||||
def test_html_diff_with_tags
|
||||
a = ""
|
||||
b = "<div>foo</div>"
|
||||
assert_equal "<div><ins class='diffins'><div>foo</div></ins></div>", diff(a, b)
|
||||
end
|
||||
|
||||
# FIXME. xhtmldiff fails to detect any change here
|
||||
def test_diff_for_tag_change
|
||||
a = "<a>x</a>"
|
||||
b = "<b>x</b>"
|
||||
# FIXME. xhtmldiff fails to detect any change here
|
||||
assert_equal "<div><del class='diffdel'><a>x</a></del><ins class='diffins'><b>x</b></ins></div>", diff(a, b)
|
||||
assert_equal "<del class='diffdel'><a>x</a></del><ins class='diffins'><b>x</b></ins>", diff(a, b)
|
||||
end
|
||||
|
||||
end
|
||||
|
|
|
@ -1,18 +1,17 @@
|
|||
#!/usr/bin/env ruby
|
||||
|
||||
require File.dirname(__FILE__) + '/../test_helper'
|
||||
require 'redcloth_for_tex'
|
||||
|
||||
class RedClothForTexTest < Test::Unit::TestCase
|
||||
def test_basics
|
||||
assert_equal '{\bf First Page}', RedClothForTex.new("*First Page*").to_tex
|
||||
assert_equal '{\em First Page}', RedClothForTex.new("_First Page_").to_tex
|
||||
assert_equal "\\begin{itemize}\n\t\\item A\n\t\t\\item B\n\t\t\\item C\n\t\\end{itemize}", RedClothForTex.new("* A\n* B\n* C").to_tex
|
||||
assert_equal '{\bf First Page}', Maruku.new('*First Page*').to_latex
|
||||
assert_equal '{\em First Page}', Maruku.new('_First Page_').to_latex
|
||||
assert_equal "\\begin{itemize}\n\t\\item A\n\t\t\\item B\n\t\t\\item C\n\t\\end{itemize}", Maruku.new('* A\n* B\n* C').to_latex
|
||||
end
|
||||
|
||||
def test_blocks
|
||||
assert_equal '\section*{hello}', RedClothForTex.new("h1. hello").to_tex
|
||||
assert_equal '\subsection*{hello}', RedClothForTex.new("h2. hello").to_tex
|
||||
assert_equal '\section*{hello}', Maruku.new('#hello#').to_latex
|
||||
assert_equal '\subsection*{hello}', Maruku.new('##hello##').to_latex
|
||||
end
|
||||
|
||||
def test_table_of_contents
|
|
@ -46,7 +46,7 @@ class PageRendererTest < Test::Unit::TestCase
|
|||
'would be <a class="existingWikiWord" href="../show/MyWay">My Way</a> in kinda ' +
|
||||
'<a class="existingWikiWord" href="../show/ThatWay">That Way</a> in ' +
|
||||
'<span class="newWikiWord">His Way<a href="../show/HisWay">?</a></span> ' +
|
||||
'though <a class="existingWikiWord" href="../show/MyWay">My Way</a> OverThere—see ' +
|
||||
%{though <a class="existingWikiWord" href="../show/MyWay">My Way</a> OverThere—see } +
|
||||
'<a class="existingWikiWord" href="../show/SmartEngine">Smart Engine</a> in that ' +
|
||||
'<span class="newWikiWord">Smart Engine GUI' +
|
||||
'<a href="../show/SmartEngineGUI">?</a></span></p>',
|
||||
|
@ -57,10 +57,15 @@ class PageRendererTest < Test::Unit::TestCase
|
|||
set_web_property :markup, :markdown
|
||||
|
||||
assert_markup_parsed_as(
|
||||
%{<h1>My Headline</h1>\n\n<p>that <span class="newWikiWord">} +
|
||||
%{<h1 id="my_headline">My Headline</h1>\n\n<p>that <span class="newWikiWord">} +
|
||||
%{Smart Engine GUI<a href="../show/SmartEngineGUI">?</a></span></p>},
|
||||
"My Headline\n===========\n\nthat SmartEngineGUI")
|
||||
|
||||
assert_markup_parsed_as(
|
||||
%{<h1 id="my_headline">My Headline</h1>\n\n<p>that <span class="newWikiWord">} +
|
||||
%{Smart Engine GUI<a href="../show/SmartEngineGUI">?</a></span></p>},
|
||||
"#My Headline#\n\nthat SmartEngineGUI")
|
||||
|
||||
code_block = [
|
||||
'This is a code block:',
|
||||
'',
|
||||
|
@ -72,7 +77,7 @@ class PageRendererTest < Test::Unit::TestCase
|
|||
|
||||
assert_markup_parsed_as(
|
||||
%{<p>This is a code block:</p>\n\n<pre><code>def a_method(arg)\n} +
|
||||
%{return ThatWay\n</code></pre>\n\n<p>Nice!</p>},
|
||||
%{return ThatWay</code></pre>\n\n<p>Nice!</p>},
|
||||
code_block)
|
||||
end
|
||||
|
||||
|
@ -100,15 +105,15 @@ class PageRendererTest < Test::Unit::TestCase
|
|||
|
||||
set_web_property :markup, :markdown
|
||||
assert_markup_parsed_as(
|
||||
"<h1>Markdown heading</h1>\n\n" +
|
||||
"<h1 id=\"markdown_heading\">Markdown heading</h1>\n\n" +
|
||||
"<p>h2. Textile heading</p>\n\n" +
|
||||
"<p><em>some</em> <strong>text</strong> <em>with</em> -styles-</p>\n\n" +
|
||||
"<ul>\n<li>list 1</li>\n<li>list 2</li>\n</ul>",
|
||||
"<ul>\n<li>list 1</li>\n\n<li>list 2</li>\n</ul>",
|
||||
textile_and_markdown)
|
||||
|
||||
set_web_property :markup, :textile
|
||||
assert_markup_parsed_as(
|
||||
"<p>Markdown heading<br />================</p>\n\n\n\t<h2>Textile heading</h2>" +
|
||||
"<p>Markdown heading<br/>================</p>\n\n\n\t<h2>Textile heading</h2>" +
|
||||
"\n\n\n\t<p><strong>some</strong> <b>text</b> <em>with</em> <del>styles</del></p>" +
|
||||
"\n\n\n\t<ul>\n\t<li>list 1</li>\n\t\t<li>list 2</li>\n\t</ul>",
|
||||
textile_and_markdown)
|
||||
|
@ -159,14 +164,14 @@ class PageRendererTest < Test::Unit::TestCase
|
|||
# wikiwords are invalid as styles, must be in "name: value" form
|
||||
def test_content_with_wikiword_in_style_tag
|
||||
assert_markup_parsed_as(
|
||||
'<p>That is some <em style="">Stylish Emphasis</em></p>',
|
||||
"<p>That is some <em style=''>Stylish Emphasis</em></p>",
|
||||
'That is some <em style="WikiWord">Stylish Emphasis</em>')
|
||||
end
|
||||
|
||||
# validates format of style..
|
||||
def test_content_with_valid_style_in_style_tag
|
||||
assert_markup_parsed_as(
|
||||
'<p>That is some <em style="text-align: right;">Stylish Emphasis</em></p>',
|
||||
"<p>That is some <em style='text-align: right;'>Stylish Emphasis</em></p>",
|
||||
'That is some <em style="text-align: right">Stylish Emphasis</em>')
|
||||
end
|
||||
|
||||
|
@ -177,37 +182,37 @@ class PageRendererTest < Test::Unit::TestCase
|
|||
|
||||
def test_content_with_pre_blocks
|
||||
assert_markup_parsed_as(
|
||||
'<p>A <code>class SmartEngine end</code> would not mark up <pre>CodeBlocks</pre></p>',
|
||||
'<p>A <code>class SmartEngine end</code> would not mark up </p><pre>CodeBlocks</pre>',
|
||||
'A <code>class SmartEngine end</code> would not mark up <pre>CodeBlocks</pre>')
|
||||
end
|
||||
|
||||
def test_content_with_autolink_in_parentheses
|
||||
assert_markup_parsed_as(
|
||||
'<p>The <span class="caps">W3C</span> body (<a href="http://www.w3c.org">' +
|
||||
'<p>The <span class=\'caps\'>W3C</span> body (<a href="http://www.w3c.org">' +
|
||||
'http://www.w3c.org</a>) sets web standards</p>',
|
||||
'The W3C body (http://www.w3c.org) sets web standards')
|
||||
end
|
||||
|
||||
def test_content_with_link_in_parentheses
|
||||
assert_markup_parsed_as(
|
||||
'<p>(<a href="http://wiki.org/wiki.cgi?WhatIsWiki">What is a wiki?</a>)</p>',
|
||||
"<p>(<a href='http://wiki.org/wiki.cgi?WhatIsWiki'>What is a wiki?</a>)</p>",
|
||||
'("What is a wiki?":http://wiki.org/wiki.cgi?WhatIsWiki)')
|
||||
end
|
||||
|
||||
def test_content_with_image_link
|
||||
assert_markup_parsed_as(
|
||||
'<p>This <img src="http://hobix.com/sample.jpg" alt="" /> is a Textile image link.</p>',
|
||||
"<p>This <img src='http://hobix.com/sample.jpg' alt=''/> is a Textile image link.</p>",
|
||||
'This !http://hobix.com/sample.jpg! is a Textile image link.')
|
||||
end
|
||||
|
||||
def test_content_with_inlined_img_tag
|
||||
assert_markup_parsed_as(
|
||||
'<p>This <img src="http://hobix.com/sample.jpg" alt="" /> is an inline image link.</p>',
|
||||
"<p>This <img src='http://hobix.com/sample.jpg' alt=''/> is an inline image link.</p>",
|
||||
'This <img src="http://hobix.com/sample.jpg" alt="" /> is an inline image link.')
|
||||
|
||||
# currently, upper case HTML elements are not allowed
|
||||
assert_markup_parsed_as(
|
||||
'<p>This <IMG SRC="http://hobix.com/sample.jpg" alt=""> is an inline image link.</p>',
|
||||
'<p>This <IMG SRC="http://hobix.com/sample.jpg" alt=""> is an inline image link.</p>',
|
||||
'This <IMG SRC="http://hobix.com/sample.jpg" alt=""> is an inline image link.')
|
||||
end
|
||||
|
||||
|
@ -239,7 +244,7 @@ class PageRendererTest < Test::Unit::TestCase
|
|||
'<a class="existingWikiWord" href="MyWay.html">My Way</a> in kinda ' +
|
||||
'<a class="existingWikiWord" href="ThatWay.html">That Way</a> in ' +
|
||||
'<span class="newWikiWord">His Way</span> though ' +
|
||||
'<a class="existingWikiWord" href="MyWay.html">My Way</a> OverThere—see ' +
|
||||
%{<a class="existingWikiWord" href="MyWay.html">My Way</a> OverThere—see } +
|
||||
'<a class="existingWikiWord" href="SmartEngine.html">Smart Engine</a> in that ' +
|
||||
'<span class="newWikiWord">Smart Engine GUI</span></p>',
|
||||
test_renderer(@revision).display_content_for_export
|
||||
|
@ -254,7 +259,7 @@ class PageRendererTest < Test::Unit::TestCase
|
|||
test_renderer(@revision).display_content
|
||||
|
||||
@revision.content = "f\r\nVersionHistory\r\n\r\ncry VersionHistory"
|
||||
assert_equal "<p>f<br /><span class=\"newWikiWord\">Version History" +
|
||||
assert_equal "<p>f<br/><span class=\"newWikiWord\">Version History" +
|
||||
"<a href=\"../show/VersionHistory\">?</a></span></p>\n\n\n\t<p>cry " +
|
||||
"<span class=\"newWikiWord\">Version History<a href=\"../show/VersionHistory\">?</a>" +
|
||||
"</span></p>",
|
||||
|
@ -274,8 +279,8 @@ class PageRendererTest < Test::Unit::TestCase
|
|||
Revision.create(:page => @page, :content => 'What a red and lovely morning today',
|
||||
:author => Author.new('DavidHeinemeierHansson'), :revised_at => Time.now)
|
||||
|
||||
assert_equal "<p>What a <del class=\"diffmod\">blue</del><ins class=\"diffmod\">red" +
|
||||
"</ins> and lovely morning<ins class=\"diffins\"> today</ins></p>", test_renderer(@page.revisions.last).display_diff
|
||||
assert_equal "<p><span> What a<del class='diffmod'> blue</del><ins class='diffmod'> red" +
|
||||
"</ins> and lovely morning<ins class='diffins'> today</ins></span></p>", test_renderer(@page.revisions.last).display_diff
|
||||
end
|
||||
|
||||
def test_link_to_file
|
||||
|
@ -321,14 +326,14 @@ class PageRendererTest < Test::Unit::TestCase
|
|||
EOL
|
||||
|
||||
assert_markup_parsed_as(
|
||||
"<ul>\n\t<li><a href=\"~b\">a</a></li>\n\t\t<li>c~ d</li>\n\t</ul>",
|
||||
"<ul>\n\t<li><a href='~b'>a</a></li>\n\t\t<li>c~ d</li>\n\t</ul>",
|
||||
list_with_tildas)
|
||||
end
|
||||
|
||||
def test_textile_image_in_mixed_wiki
|
||||
set_web_property :markup, :mixed
|
||||
assert_markup_parsed_as(
|
||||
"<p><img src=\"http://google.com\" alt=\"\" />\nss</p>",
|
||||
"<p><img src='http://google.com' alt=''/>\nss</p>",
|
||||
"!http://google.com!\r\nss")
|
||||
end
|
||||
|
||||
|
@ -395,4 +400,4 @@ class PageRendererTest < Test::Unit::TestCase
|
|||
test_renderer(page.revisions.last).display_content
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
|
|
|
@ -40,7 +40,7 @@ class WebTest < Test::Unit::TestCase
|
|||
assert_equal '123', web.password
|
||||
|
||||
# new web should be set for maximum features enabled
|
||||
assert_equal :textile, web.markup
|
||||
assert_equal :markdownMML, web.markup
|
||||
assert_equal '008B26', web.color
|
||||
assert !web.safe_mode?
|
||||
assert_equal([], web.pages)
|
||||
|
|
17
vendor/plugins/HTML5lib/LICENSE
vendored
Normal file
17
vendor/plugins/HTML5lib/LICENSE
vendored
Normal file
|
@ -0,0 +1,17 @@
|
|||
Copyright (c) 2006-2007 The Authors
|
||||
|
||||
Contributers:
|
||||
James Graham - jg307@cam.ac.uk
|
||||
Anne van Kesteren - annevankesteren@gmail.com
|
||||
Lachlan Hunt - lachlan.hunt@lachy.id.au
|
||||
Matt McDonald - kanashii@kanashii.ca
|
||||
Sam Ruby - rubys@intertwingly.net
|
||||
Ian Hickson (Google) - ian@hixie.ch
|
||||
Thomas Broyer - t.broyer@ltgt.net
|
||||
Jacques Distler - distler@golem.ph.utexas.edu
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
@ -148,6 +148,38 @@ module HTML5lib
|
|||
input
|
||||
]
|
||||
|
||||
CDATA_ELEMENTS = %w[title textarea]
|
||||
|
||||
RCDATA_ELEMENTS = %w[
|
||||
style
|
||||
script
|
||||
xmp
|
||||
iframe
|
||||
noembed
|
||||
noframes
|
||||
noscript
|
||||
]
|
||||
|
||||
BOOLEAN_ATTRIBUTES = {
|
||||
:global => %w[irrelevant],
|
||||
'style' => %w[scoped],
|
||||
'img' => %w[ismap],
|
||||
'audio' => %w[autoplay controls],
|
||||
'video' => %w[autoplay controls],
|
||||
'script' => %w[defer async],
|
||||
'details' => %w[open],
|
||||
'datagrid' => %w[multiple disabled],
|
||||
'command' => %w[hidden disabled checked default],
|
||||
'menu' => %w[autosubmit],
|
||||
'fieldset' => %w[disabled readonly],
|
||||
'option' => %w[disabled readonly selected],
|
||||
'optgroup' => %w[disabled readonly],
|
||||
'button' => %w[disabled autofocus],
|
||||
'input' => %w[disabled readonly required autofocus checked ismap],
|
||||
'select' => %w[disabled readonly autofocus multiple],
|
||||
'output' => %w[disabled readonly]
|
||||
}
|
||||
|
||||
# entitiesWindows1252 has to be _ordered_ and needs to have an index.
|
||||
ENTITIES_WINDOWS1252 = [
|
||||
8364, # 0x80 0x20AC EURO SIGN
|
||||
|
|
1
vendor/plugins/HTML5lib/lib/html5lib/filters.rb
vendored
Normal file
1
vendor/plugins/HTML5lib/lib/html5lib/filters.rb
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
require 'html5lib/filters/optionaltags'
|
10
vendor/plugins/HTML5lib/lib/html5lib/filters/base.rb
vendored
Normal file
10
vendor/plugins/HTML5lib/lib/html5lib/filters/base.rb
vendored
Normal file
|
@ -0,0 +1,10 @@
|
|||
require 'delegate'
|
||||
require 'enumerator'
|
||||
|
||||
module HTML5lib
|
||||
module Filters
|
||||
class Base < SimpleDelegator
|
||||
include Enumerable
|
||||
end
|
||||
end
|
||||
end
|
85
vendor/plugins/HTML5lib/lib/html5lib/filters/inject_meta_charset.rb
vendored
Normal file
85
vendor/plugins/HTML5lib/lib/html5lib/filters/inject_meta_charset.rb
vendored
Normal file
|
@ -0,0 +1,85 @@
|
|||
require 'html5lib/filters/base'
|
||||
|
||||
module HTML5lib
|
||||
module Filters
|
||||
class InjectMetaCharset < Base
|
||||
def initialize(source, encoding)
|
||||
super(source)
|
||||
@encoding = encoding
|
||||
end
|
||||
|
||||
def each
|
||||
state = :pre_head
|
||||
meta_found = @encoding.nil?
|
||||
pending = []
|
||||
|
||||
__getobj__.each do |token|
|
||||
case token[:type]
|
||||
when :StartTag
|
||||
state = :in_head if token[:name].downcase == "head"
|
||||
|
||||
when :EmptyTag
|
||||
if token[:name].downcase == "meta"
|
||||
# replace charset with actual encoding
|
||||
token[:data].each_with_index do |(name,value),index|
|
||||
if name == 'charset'
|
||||
token[:data][index][1]=@encoding
|
||||
meta_found = true
|
||||
end
|
||||
end
|
||||
|
||||
# replace charset with actual encoding
|
||||
has_http_equiv_content_type = false
|
||||
content_index = -1
|
||||
token[:data].each_with_index do |(name,value),i|
|
||||
if name.downcase == 'charset'
|
||||
token[:data][i] = ['charset', @encoding]
|
||||
meta_found = true
|
||||
break
|
||||
elsif name == 'http-equiv' and value.downcase == 'content-type'
|
||||
has_http_equiv_content_type = true
|
||||
elsif name == 'content'
|
||||
content_index = i
|
||||
end
|
||||
end
|
||||
|
||||
if not meta_found
|
||||
if has_http_equiv_content_type and content_index >= 0
|
||||
token[:data][content_index][1] =
|
||||
'text/html; charset=%s' % @encoding
|
||||
meta_found = true
|
||||
end
|
||||
end
|
||||
|
||||
elsif token[:name].downcase == "head" and not meta_found
|
||||
# insert meta into empty head
|
||||
yield(:type => :StartTag, :name => "head", :data => token[:data])
|
||||
yield(:type => :EmptyTag, :name => "meta",
|
||||
:data => [["charset", @encoding]])
|
||||
yield(:type => :EndTag, :name => "head")
|
||||
meta_found = true
|
||||
next
|
||||
end
|
||||
|
||||
when :EndTag
|
||||
if token[:name].downcase == "head" and pending.any?
|
||||
# insert meta into head (if necessary) and flush pending queue
|
||||
yield pending.shift
|
||||
yield(:type => :EmptyTag, :name => "meta",
|
||||
:data => [["charset", @encoding]]) if not meta_found
|
||||
yield pending.shift while pending.any?
|
||||
meta_found = true
|
||||
state = :post_head
|
||||
end
|
||||
end
|
||||
|
||||
if state == :in_head
|
||||
pending << token
|
||||
else
|
||||
yield token
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
199
vendor/plugins/HTML5lib/lib/html5lib/filters/optionaltags.rb
vendored
Normal file
199
vendor/plugins/HTML5lib/lib/html5lib/filters/optionaltags.rb
vendored
Normal file
|
@ -0,0 +1,199 @@
|
|||
require 'html5lib/constants'
|
||||
require 'html5lib/filters/base'
|
||||
|
||||
module HTML5lib
|
||||
module Filters
|
||||
|
||||
class OptionalTagFilter < Base
|
||||
def slider
|
||||
previous1 = previous2 = nil
|
||||
__getobj__.each do |token|
|
||||
yield previous2, previous1, token if previous1 != nil
|
||||
previous2 = previous1
|
||||
previous1 = token
|
||||
end
|
||||
yield previous2, previous1, nil
|
||||
end
|
||||
|
||||
def each
|
||||
slider do |previous, token, nexttok|
|
||||
type = token[:type]
|
||||
if type == :StartTag
|
||||
yield token unless token[:data].empty? and is_optional_start(token[:name], previous, nexttok)
|
||||
elsif type == :EndTag
|
||||
yield token unless is_optional_end(token[:name], nexttok)
|
||||
else
|
||||
yield token
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def is_optional_start(tagname, previous, nexttok)
|
||||
type = nexttok ? nexttok[:type] : nil
|
||||
if tagname == 'html'
|
||||
# An html element's start tag may be omitted if the first thing
|
||||
# inside the html element is not a space character or a comment.
|
||||
return ![:Comment, :SpaceCharacters].include?(type)
|
||||
elsif tagname == 'head'
|
||||
# A head element's start tag may be omitted if the first thing
|
||||
# inside the head element is an element.
|
||||
return type == :StartTag
|
||||
elsif tagname == 'body'
|
||||
# A body element's start tag may be omitted if the first thing
|
||||
# inside the body element is not a space character or a comment,
|
||||
# except if the first thing inside the body element is a script
|
||||
# or style element and the node immediately preceding the body
|
||||
# element is a head element whose end tag has been omitted.
|
||||
if [:Comment, :SpaceCharacters].include?(type)
|
||||
return false
|
||||
elsif type == :StartTag
|
||||
# XXX: we do not look at the preceding event, so we never omit
|
||||
# the body element's start tag if it's followed by a script or
|
||||
# a style element.
|
||||
return !%w[script style].include?(nexttok[:name])
|
||||
else
|
||||
return true
|
||||
end
|
||||
elsif tagname == 'colgroup'
|
||||
# A colgroup element's start tag may be omitted if the first thing
|
||||
# inside the colgroup element is a col element, and if the element
|
||||
# is not immediately preceeded by another colgroup element whose
|
||||
# end tag has been omitted.
|
||||
if type == :StartTag
|
||||
# XXX: we do not look at the preceding event, so instead we never
|
||||
# omit the colgroup element's end tag when it is immediately
|
||||
# followed by another colgroup element. See is_optional_end.
|
||||
return nexttok[:name] == "col"
|
||||
else
|
||||
return false
|
||||
end
|
||||
elsif tagname == 'tbody'
|
||||
# A tbody element's start tag may be omitted if the first thing
|
||||
# inside the tbody element is a tr element, and if the element is
|
||||
# not immediately preceeded by a tbody, thead, or tfoot element
|
||||
# whose end tag has been omitted.
|
||||
if type == :StartTag
|
||||
# omit the thead and tfoot elements' end tag when they are
|
||||
# immediately followed by a tbody element. See is_optional_end.
|
||||
if previous and previous[:type] == :EndTag and \
|
||||
%w(tbody thead tfoot).include?(previous[:name])
|
||||
return false
|
||||
end
|
||||
|
||||
return nexttok[:name] == 'tr'
|
||||
else
|
||||
return false
|
||||
end
|
||||
end
|
||||
return false
|
||||
end
|
||||
|
||||
def is_optional_end(tagname, nexttok)
|
||||
type = nexttok ? nexttok[:type] : nil
|
||||
if %w[html head body].include?(tagname)
|
||||
# An html element's end tag may be omitted if the html element
|
||||
# is not immediately followed by a space character or a comment.
|
||||
return ![:Comment, :SpaceCharacters].include?(type)
|
||||
elsif %w[li optgroup option tr].include?(tagname)
|
||||
# A li element's end tag may be omitted if the li element is
|
||||
# immediately followed by another li element or if there is
|
||||
# no more content in the parent element.
|
||||
# An optgroup element's end tag may be omitted if the optgroup
|
||||
# element is immediately followed by another optgroup element,
|
||||
# or if there is no more content in the parent element.
|
||||
# An option element's end tag may be omitted if the option
|
||||
# element is immediately followed by another option element,
|
||||
# or if there is no more content in the parent element.
|
||||
# A tr element's end tag may be omitted if the tr element is
|
||||
# immediately followed by another tr element, or if there is
|
||||
# no more content in the parent element.
|
||||
if type == :StartTag
|
||||
return nexttok[:name] == tagname
|
||||
else
|
||||
return type == :EndTag || type == nil
|
||||
end
|
||||
elsif %w(dt dd).include?(tagname)
|
||||
# A dt element's end tag may be omitted if the dt element is
|
||||
# immediately followed by another dt element or a dd element.
|
||||
# A dd element's end tag may be omitted if the dd element is
|
||||
# immediately followed by another dd element or a dt element,
|
||||
# or if there is no more content in the parent element.
|
||||
if type == :StartTag
|
||||
return %w(dt dd).include?(nexttok[:name])
|
||||
elsif tagname == 'dd'
|
||||
return type == :EndTag || type == nil
|
||||
else
|
||||
return false
|
||||
end
|
||||
elsif tagname == 'p'
|
||||
# A p element's end tag may be omitted if the p element is
|
||||
# immediately followed by an address, blockquote, dl, fieldset,
|
||||
# form, h1, h2, h3, h4, h5, h6, hr, menu, ol, p, pre, table,
|
||||
# or ul element, or if there is no more content in the parent
|
||||
# element.
|
||||
if type == :StartTag
|
||||
return %w(address blockquote dl fieldset form h1 h2 h3 h4 h5
|
||||
h6 hr menu ol p pre table ul).include?(nexttok[:name])
|
||||
else
|
||||
return type == :EndTag || type == nil
|
||||
end
|
||||
elsif tagname == 'colgroup'
|
||||
# A colgroup element's end tag may be omitted if the colgroup
|
||||
# element is not immediately followed by a space character or
|
||||
# a comment.
|
||||
if [:Comment, :SpaceCharacters].include?(type)
|
||||
return false
|
||||
elsif type == :StartTag
|
||||
# XXX: we also look for an immediately following colgroup
|
||||
# element. See is_optional_start.
|
||||
return nexttok[:name] != 'colgroup'
|
||||
else
|
||||
return true
|
||||
end
|
||||
elsif %w(thead tbody).include? tagname
|
||||
# A thead element's end tag may be omitted if the thead element
|
||||
# is immediately followed by a tbody or tfoot element.
|
||||
# A tbody element's end tag may be omitted if the tbody element
|
||||
# is immediately followed by a tbody or tfoot element, or if
|
||||
# there is no more content in the parent element.
|
||||
# A tfoot element's end tag may be omitted if the tfoot element
|
||||
# is immediately followed by a tbody element, or if there is no
|
||||
# more content in the parent element.
|
||||
# XXX: we never omit the end tag when the following element is
|
||||
# a tbody. See is_optional_start.
|
||||
if type == :StartTag
|
||||
return %w(tbody tfoot).include?(nexttok[:name])
|
||||
elsif tagname == 'tbody'
|
||||
return (type == :EndTag or type == nil)
|
||||
else
|
||||
return false
|
||||
end
|
||||
elsif tagname == 'tfoot'
|
||||
# A tfoot element's end tag may be omitted if the tfoot element
|
||||
# is immediately followed by a tbody element, or if there is no
|
||||
# more content in the parent element.
|
||||
# XXX: we never omit the end tag when the following element is
|
||||
# a tbody. See is_optional_start.
|
||||
if type == :StartTag
|
||||
return nexttok[:name] == 'tbody'
|
||||
else
|
||||
return type == :EndTag || type == nil
|
||||
end
|
||||
elsif %w(td th).include? tagname
|
||||
# A td element's end tag may be omitted if the td element is
|
||||
# immediately followed by a td or th element, or if there is
|
||||
# no more content in the parent element.
|
||||
# A th element's end tag may be omitted if the th element is
|
||||
# immediately followed by a td or th element, or if there is
|
||||
# no more content in the parent element.
|
||||
if type == :StartTag
|
||||
return %w(td th).include?(nexttok[:name])
|
||||
else
|
||||
return type == :EndTag || type == nil
|
||||
end
|
||||
end
|
||||
return false
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
15
vendor/plugins/HTML5lib/lib/html5lib/filters/sanitizer.rb
vendored
Normal file
15
vendor/plugins/HTML5lib/lib/html5lib/filters/sanitizer.rb
vendored
Normal file
|
@ -0,0 +1,15 @@
|
|||
require 'html5lib/filters/base'
|
||||
require 'html5lib/sanitizer'
|
||||
|
||||
module HTML5lib
|
||||
module Filters
|
||||
class HTMLSanitizeFilter < Base
|
||||
include HTMLSanitizeModule
|
||||
def each
|
||||
__getobj__.each do |token|
|
||||
yield(sanitize_token(token))
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
36
vendor/plugins/HTML5lib/lib/html5lib/filters/whitespace.rb
vendored
Normal file
36
vendor/plugins/HTML5lib/lib/html5lib/filters/whitespace.rb
vendored
Normal file
|
@ -0,0 +1,36 @@
|
|||
require 'html5lib/constants'
|
||||
require 'html5lib/filters/base'
|
||||
|
||||
module HTML5lib
|
||||
module Filters
|
||||
class WhitespaceFilter < Base
|
||||
|
||||
SPACE_PRESERVE_ELEMENTS = %w[pre textarea] + RCDATA_ELEMENTS
|
||||
SPACES = /[#{SPACE_CHARACTERS.join('')}]+/m
|
||||
|
||||
def each
|
||||
preserve = 0
|
||||
__getobj__.each do |token|
|
||||
case token[:type]
|
||||
when :StartTag
|
||||
if preserve > 0 or SPACE_PRESERVE_ELEMENTS.include?(token[:name])
|
||||
preserve += 1
|
||||
end
|
||||
|
||||
when :EndTag
|
||||
preserve -= 1 if preserve > 0
|
||||
|
||||
when :SpaceCharacters
|
||||
next if preserve == 0
|
||||
|
||||
when :Characters
|
||||
token[:data] = token[:data].sub(SPACES,' ') if preserve == 0
|
||||
end
|
||||
|
||||
yield token
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
@ -37,13 +37,13 @@ module HTML5lib
|
|||
# :strict - raise an exception when a parse error is encountered
|
||||
# :tree - a treebuilder class controlling the type of tree that will be
|
||||
# returned. Built in treebuilders can be accessed through
|
||||
# html5lib.treebuilders.getTreeBuilder(treeType)
|
||||
# HTML5lib::TreeBuilders[treeType]
|
||||
def initialize(options = {})
|
||||
@strict = false
|
||||
@errors = []
|
||||
|
||||
@tokenizer = HTMLTokenizer
|
||||
@tree = TreeBuilders::REXMLTree::TreeBuilder
|
||||
@tree = TreeBuilders::REXML::TreeBuilder
|
||||
|
||||
options.each { |name, value| instance_variable_set("@#{name}", value) }
|
||||
|
||||
|
@ -62,7 +62,8 @@ module HTML5lib
|
|||
@errors = []
|
||||
|
||||
@tokenizer = @tokenizer.class unless Class === @tokenizer
|
||||
@tokenizer = @tokenizer.new(stream, :encoding => encoding, :parseMeta => innerHTML)
|
||||
@tokenizer = @tokenizer.new(stream, :encoding => encoding,
|
||||
:parseMeta => !innerHTML)
|
||||
|
||||
if innerHTML
|
||||
case @innerHTML = container.downcase
|
||||
|
@ -99,10 +100,13 @@ module HTML5lib
|
|||
case token[:type]
|
||||
when :Characters, :SpaceCharacters, :Comment
|
||||
@phase.send method, token[:data]
|
||||
when :StartTag, :Doctype
|
||||
when :StartTag
|
||||
@phase.send method, token[:name], token[:data]
|
||||
when :EndTag
|
||||
@phase.send method, token[:name]
|
||||
when :Doctype
|
||||
@phase.send method, token[:name], token[:publicId],
|
||||
token[:systemId], token[:correct]
|
||||
else
|
||||
parseError(token[:data])
|
||||
end
|
||||
|
@ -147,10 +151,6 @@ module HTML5lib
|
|||
raise ParseError if @strict
|
||||
end
|
||||
|
||||
# This error is not an error
|
||||
def atheistParseError
|
||||
end
|
||||
|
||||
# HTML5 specific normalizations to the token stream
|
||||
def normalizeToken(token)
|
||||
|
||||
|
@ -160,9 +160,7 @@ module HTML5lib
|
|||
# element. If it matches a void element atheists did the wrong
|
||||
# thing and if it doesn't it's wrong for everyone.
|
||||
|
||||
if VOID_ELEMENTS.include?(token[:name])
|
||||
atheistParseError
|
||||
else
|
||||
unless VOID_ELEMENTS.include?(token[:name])
|
||||
parseError(_('Solidus (/) incorrectly placed in tag.'))
|
||||
end
|
||||
|
||||
|
@ -181,7 +179,7 @@ module HTML5lib
|
|||
end
|
||||
|
||||
elsif token[:type] == :EndTag
|
||||
parseError(_('End tag contains unexpected attributes.')) if token[:data]
|
||||
parseError(_('End tag contains unexpected attributes.')) unless token[:data].empty?
|
||||
token[:name] = token[:name].downcase
|
||||
end
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@ module HTML5lib
|
|||
|
||||
handle_start 'html', 'head'
|
||||
|
||||
handle_end 'html'
|
||||
handle_end %w( html head body br ) => 'ImplyHead'
|
||||
|
||||
def processEOF
|
||||
startTagHead('head', {})
|
||||
|
@ -28,7 +28,7 @@ module HTML5lib
|
|||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def endTagHtml(name)
|
||||
def endTagImplyHead(name)
|
||||
startTagHead('head', {})
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
|
@ -38,4 +38,4 @@ module HTML5lib
|
|||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -5,15 +5,20 @@ module HTML5lib
|
|||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-body
|
||||
|
||||
handle_start 'html', 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image'
|
||||
handle_start 'html'
|
||||
handle_start %w( base link meta script style ) => 'ProcessInHead'
|
||||
handle_start 'title'
|
||||
|
||||
handle_start 'input', 'textarea', 'select', 'isindex', %w( script style ), %w( marquee object )
|
||||
handle_start 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image'
|
||||
|
||||
handle_start %w( li dd dt ) => 'ListItem', %w( base link meta title ) => 'FromHead'
|
||||
handle_start 'input', 'textarea', 'select', 'isindex', %w( marquee object )
|
||||
|
||||
handle_start %w( li dd dt ) => 'ListItem'
|
||||
|
||||
handle_start %w( address blockquote center dir div dl fieldset listing menu ol p pre ul ) => 'CloseP'
|
||||
|
||||
handle_start %w( b big em font i nobr s small strike strong tt u ) => 'Formatting'
|
||||
handle_start %w( b big em font i s small strike strong tt u ) => 'Formatting'
|
||||
handle_start 'nobr'
|
||||
|
||||
handle_start %w( area basefont bgsound br embed img param spacer wbr ) => 'VoidFormatting'
|
||||
|
||||
|
@ -27,11 +32,15 @@ module HTML5lib
|
|||
|
||||
handle_end %w( address blockquote center div dl fieldset listing menu ol pre ul ) => 'Block'
|
||||
|
||||
handle_end HEADING_ELEMENTS => 'Heading'
|
||||
|
||||
handle_end %w( a b big em font i nobr s small strike strong tt u ) => 'Formatting'
|
||||
|
||||
handle_end %w( head frameset select optgroup option table caption colgroup col thead tfoot tbody tr td th ) => 'Misplaced'
|
||||
|
||||
handle_end %w( area basefont bgsound br embed hr image img input isindex param spacer wbr frame ) => 'None'
|
||||
handle_end 'br'
|
||||
|
||||
handle_end %w( area basefont bgsound embed hr image img input isindex param spacer wbr frame ) => 'None'
|
||||
|
||||
handle_end %w( noframes noscript noembed textarea xmp iframe ) => 'CdataTextAreaXmp'
|
||||
|
||||
|
@ -41,14 +50,14 @@ module HTML5lib
|
|||
super(parser, tree)
|
||||
|
||||
# for special handling of whitespace in <pre>
|
||||
@processSpaceCharactersPre = false
|
||||
@processSpaceCharactersDropNewline = false
|
||||
end
|
||||
|
||||
def processSpaceCharactersPre(data)
|
||||
def processSpaceCharactersDropNewline(data)
|
||||
#Sometimes (start of <pre> blocks) we want to drop leading newlines
|
||||
@processSpaceCharactersPre = false
|
||||
@processSpaceCharactersDropNewline = false
|
||||
if (data.length > 0 and data[0] == ?\n and
|
||||
@tree.openElements[-1].name == 'pre' and
|
||||
%w[pre textarea].include?(@tree.openElements[-1].name) and
|
||||
not @tree.openElements[-1].hasContent)
|
||||
data = data[1..-1]
|
||||
end
|
||||
|
@ -56,8 +65,8 @@ module HTML5lib
|
|||
end
|
||||
|
||||
def processSpaceCharacters(data)
|
||||
if @processSpaceCharactersPre
|
||||
processSpaceCharactersPre(data)
|
||||
if @processSpaceCharactersDropNewline
|
||||
processSpaceCharactersDropNewline(data)
|
||||
else
|
||||
super(data)
|
||||
end
|
||||
|
@ -71,11 +80,11 @@ module HTML5lib
|
|||
@tree.insertText(data)
|
||||
end
|
||||
|
||||
def startTagScriptStyle(name, attributes)
|
||||
def startTagProcessInHead(name, attributes)
|
||||
@parser.phases[:inHead].processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def startTagFromHead(name, attributes)
|
||||
def startTagTitle(name, attributes)
|
||||
@parser.parseError(_("Unexpected start tag (#{name}) that belongs in the head. Moved."))
|
||||
@parser.phases[:inHead].processStartTag(name, attributes)
|
||||
end
|
||||
|
@ -98,7 +107,7 @@ module HTML5lib
|
|||
def startTagCloseP(name, attributes)
|
||||
endTagP('p') if in_scope?('p')
|
||||
@tree.insertElement(name, attributes)
|
||||
@processSpaceCharactersPre = true if name == 'pre'
|
||||
@processSpaceCharactersDropNewline = true if name == 'pre'
|
||||
end
|
||||
|
||||
def startTagForm(name, attributes)
|
||||
|
@ -118,7 +127,12 @@ module HTML5lib
|
|||
|
||||
@tree.openElements.reverse.each_with_index do |node, i|
|
||||
if stopName.include?(node.name)
|
||||
(i + 1).times { @tree.openElements.pop }
|
||||
poppedNodes = (0..i).collect { @tree.openElements.pop }
|
||||
if i >= 1
|
||||
@parser.parseError("Missing end tag%s (%s)" % [
|
||||
(i>1 ? 's' : ''),
|
||||
poppedNodes.reverse.map {|item| item.name}.join(', ')])
|
||||
end
|
||||
break
|
||||
end
|
||||
|
||||
|
@ -140,15 +154,19 @@ module HTML5lib
|
|||
|
||||
def startTagHeading(name, attributes)
|
||||
endTagP('p') if in_scope?('p')
|
||||
HEADING_ELEMENTS.each do |element|
|
||||
if in_scope?(element)
|
||||
@parser.parseError(_("Unexpected start tag (#{name})."))
|
||||
|
||||
remove_open_elements_until { |element| HEADING_ELEMENTS.include?(element.name) }
|
||||
|
||||
break
|
||||
end
|
||||
end
|
||||
# Uncomment the following for IE7 behavior:
|
||||
# HEADING_ELEMENTS.each do |element|
|
||||
# if in_scope?(element)
|
||||
# @parser.parseError(_("Unexpected start tag (#{name})."))
|
||||
#
|
||||
# remove_open_elements_until do |element|
|
||||
# HEADING_ELEMENTS.include?(element.name)
|
||||
# end
|
||||
#
|
||||
# break
|
||||
# end
|
||||
# end
|
||||
@tree.insertElement(name, attributes)
|
||||
end
|
||||
|
||||
|
@ -168,6 +186,12 @@ module HTML5lib
|
|||
addFormattingElement(name, attributes)
|
||||
end
|
||||
|
||||
def startTagNobr(name, attributes)
|
||||
@tree.reconstructActiveFormattingElements
|
||||
processEndTag('nobr') if in_scope?('nobr')
|
||||
addFormattingElement(name, attributes)
|
||||
end
|
||||
|
||||
def startTagButton(name, attributes)
|
||||
if in_scope?('button')
|
||||
@parser.parseError(_('Unexpected start tag (button) implied end tag (button).'))
|
||||
|
@ -248,6 +272,7 @@ module HTML5lib
|
|||
# XXX Form element pointer checking here as well...
|
||||
@tree.insertElement(name, attributes)
|
||||
@parser.tokenizer.contentModelFlag = :RCDATA
|
||||
@processSpaceCharactersDropNewline = true
|
||||
end
|
||||
|
||||
# iframe, noembed noframes, noscript(if scripting enabled)
|
||||
|
@ -312,7 +337,7 @@ module HTML5lib
|
|||
|
||||
def endTagBlock(name)
|
||||
#Put us back in the right whitespace handling mode
|
||||
@processSpaceCharactersPre = false if name == 'pre'
|
||||
@processSpaceCharactersDropNewline = false if name == 'pre'
|
||||
|
||||
@tree.generateImpliedEndTags if in_scope?(name)
|
||||
|
||||
|
@ -494,6 +519,13 @@ module HTML5lib
|
|||
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||
end
|
||||
|
||||
def endTagBr(name)
|
||||
@parser.parseError(_("Unexpected end tag (br). Treated as br element."))
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insertElement(name, {})
|
||||
@tree.openElements.pop()
|
||||
end
|
||||
|
||||
def endTagNone(name)
|
||||
# This handles elements with no end tag.
|
||||
@parser.parseError(_("This tag (#{name}) has no end tag"))
|
||||
|
@ -545,4 +577,4 @@ module HTML5lib
|
|||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -5,7 +5,9 @@ module HTML5lib
|
|||
|
||||
handle_start 'html', 'head', 'title', 'style', 'script', %w( base link meta )
|
||||
|
||||
handle_end 'head', 'html', %w( title style script )
|
||||
handle_end 'head'
|
||||
handle_end %w( html body br ) => 'ImplyAfterHead'
|
||||
handle_end %w( title style script )
|
||||
|
||||
def processEOF
|
||||
if ['title', 'style', 'script'].include?(name = @tree.openElements[-1].name)
|
||||
|
@ -63,7 +65,11 @@ module HTML5lib
|
|||
|
||||
def startTagBaseLinkMeta(name, attributes)
|
||||
element = @tree.createElement(name, attributes)
|
||||
appendToHead(element)
|
||||
if @tree.headPointer != nil and @parser.phase == @parser.phases[:inHead]
|
||||
appendToHead(element)
|
||||
else
|
||||
@tree.openElements[-1].appendChild(element)
|
||||
end
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
|
@ -80,7 +86,7 @@ module HTML5lib
|
|||
@parser.phase = @parser.phases[:afterHead]
|
||||
end
|
||||
|
||||
def endTagHtml(name)
|
||||
def endTagImplyAfterHead(name)
|
||||
anythingElse
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
|
@ -117,4 +123,4 @@ module HTML5lib
|
|||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -89,10 +89,10 @@ module HTML5lib
|
|||
def endTagOther(name)
|
||||
@parser.parseError(_("Unexpected end tag (#{name}) in table context caused voodoo mode."))
|
||||
# Make all the special element rearranging voodoo kick in
|
||||
@parser.insertFromTable = true
|
||||
@tree.insertFromTable = true
|
||||
# Process the end tag in the "in body" mode
|
||||
@parser.phases[:inBody].processEndTag(name)
|
||||
@parser.insertFromTable = false
|
||||
@tree.insertFromTable = false
|
||||
end
|
||||
|
||||
protected
|
||||
|
@ -107,4 +107,4 @@ module HTML5lib
|
|||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -17,9 +17,95 @@ module HTML5lib
|
|||
@tree.insertComment(data, @tree.document)
|
||||
end
|
||||
|
||||
def processDoctype(name, error)
|
||||
@parser.parseError(_('Erroneous DOCTYPE.')) if error
|
||||
def processDoctype(name, publicId, systemId, correct)
|
||||
if name.downcase != 'html' or publicId or systemId
|
||||
@parser.parseError(_('Erroneous DOCTYPE.'))
|
||||
end
|
||||
# XXX need to update DOCTYPE tokens
|
||||
@tree.insertDoctype(name)
|
||||
|
||||
publicId = publicId.to_s.upcase
|
||||
|
||||
if name.downcase != 'html'
|
||||
# XXX quirks mode
|
||||
else
|
||||
if ["+//silmaril//dtd html pro v0r11 19970101//en",
|
||||
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en",
|
||||
"-//as//dtd html 3.0 aswedit + extensions//en",
|
||||
"-//ietf//dtd html 2.0 level 1//en",
|
||||
"-//ietf//dtd html 2.0 level 2//en",
|
||||
"-//ietf//dtd html 2.0 strict level 1//en",
|
||||
"-//ietf//dtd html 2.0 strict level 2//en",
|
||||
"-//ietf//dtd html 2.0 strict//en",
|
||||
"-//ietf//dtd html 2.0//en",
|
||||
"-//ietf//dtd html 2.1e//en",
|
||||
"-//ietf//dtd html 3.0//en",
|
||||
"-//ietf//dtd html 3.0//en//",
|
||||
"-//ietf//dtd html 3.2 final//en",
|
||||
"-//ietf//dtd html 3.2//en",
|
||||
"-//ietf//dtd html 3//en",
|
||||
"-//ietf//dtd html level 0//en",
|
||||
"-//ietf//dtd html level 0//en//2.0",
|
||||
"-//ietf//dtd html level 1//en",
|
||||
"-//ietf//dtd html level 1//en//2.0",
|
||||
"-//ietf//dtd html level 2//en",
|
||||
"-//ietf//dtd html level 2//en//2.0",
|
||||
"-//ietf//dtd html level 3//en",
|
||||
"-//ietf//dtd html level 3//en//3.0",
|
||||
"-//ietf//dtd html strict level 0//en",
|
||||
"-//ietf//dtd html strict level 0//en//2.0",
|
||||
"-//ietf//dtd html strict level 1//en",
|
||||
"-//ietf//dtd html strict level 1//en//2.0",
|
||||
"-//ietf//dtd html strict level 2//en",
|
||||
"-//ietf//dtd html strict level 2//en//2.0",
|
||||
"-//ietf//dtd html strict level 3//en",
|
||||
"-//ietf//dtd html strict level 3//en//3.0",
|
||||
"-//ietf//dtd html strict//en",
|
||||
"-//ietf//dtd html strict//en//2.0",
|
||||
"-//ietf//dtd html strict//en//3.0",
|
||||
"-//ietf//dtd html//en",
|
||||
"-//ietf//dtd html//en//2.0",
|
||||
"-//ietf//dtd html//en//3.0",
|
||||
"-//metrius//dtd metrius presentational//en",
|
||||
"-//microsoft//dtd internet explorer 2.0 html strict//en",
|
||||
"-//microsoft//dtd internet explorer 2.0 html//en",
|
||||
"-//microsoft//dtd internet explorer 2.0 tables//en",
|
||||
"-//microsoft//dtd internet explorer 3.0 html strict//en",
|
||||
"-//microsoft//dtd internet explorer 3.0 html//en",
|
||||
"-//microsoft//dtd internet explorer 3.0 tables//en",
|
||||
"-//netscape comm. corp.//dtd html//en",
|
||||
"-//netscape comm. corp.//dtd strict html//en",
|
||||
"-//o'reilly and associates//dtd html 2.0//en",
|
||||
"-//o'reilly and associates//dtd html extended 1.0//en",
|
||||
"-//spyglass//dtd html 2.0 extended//en",
|
||||
"-//sq//dtd html 2.0 hotmetal + extensions//en",
|
||||
"-//sun microsystems corp.//dtd hotjava html//en",
|
||||
"-//sun microsystems corp.//dtd hotjava strict html//en",
|
||||
"-//w3c//dtd html 3 1995-03-24//en",
|
||||
"-//w3c//dtd html 3.2 draft//en",
|
||||
"-//w3c//dtd html 3.2 final//en",
|
||||
"-//w3c//dtd html 3.2//en",
|
||||
"-//w3c//dtd html 3.2s draft//en",
|
||||
"-//w3c//dtd html 4.0 frameset//en",
|
||||
"-//w3c//dtd html 4.0 transitional//en",
|
||||
"-//w3c//dtd html experimental 19960712//en",
|
||||
"-//w3c//dtd html experimental 970421//en",
|
||||
"-//w3c//dtd w3 html//en",
|
||||
"-//w3o//dtd w3 html 3.0//en",
|
||||
"-//w3o//dtd w3 html 3.0//en//",
|
||||
"-//w3o//dtd w3 html strict 3.0//en//",
|
||||
"-//webtechs//dtd mozilla html 2.0//en",
|
||||
"-//webtechs//dtd mozilla html//en",
|
||||
"-/w3c/dtd html 4.0 transitional/en",
|
||||
"html"].include?(publicId) or
|
||||
(systemId == nil and
|
||||
["-//w3c//dtd html 4.01 frameset//EN",
|
||||
"-//w3c//dtd html 4.01 transitional//EN"].include?(publicId)) or
|
||||
(systemId == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")
|
||||
#XXX quirks mode
|
||||
end
|
||||
end
|
||||
|
||||
@parser.phase = @parser.phases[:rootElement]
|
||||
end
|
||||
|
||||
|
@ -46,4 +132,4 @@ module HTML5lib
|
|||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -101,7 +101,7 @@ module HTML5lib
|
|||
@tree.insertComment(data, @tree.openElements[-1])
|
||||
end
|
||||
|
||||
def processDoctype(name, error)
|
||||
def processDoctype(name, publicId, systemId, correct)
|
||||
@parser.parseError(_('Unexpected DOCTYPE. Ignored.'))
|
||||
end
|
||||
|
||||
|
@ -153,4 +153,4 @@ module HTML5lib
|
|||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
213
vendor/plugins/HTML5lib/lib/html5lib/inputstream.rb
vendored
213
vendor/plugins/HTML5lib/lib/html5lib/inputstream.rb
vendored
|
@ -33,9 +33,6 @@ module HTML5lib
|
|||
|
||||
options.each { |name, value| instance_variable_set("@#{name}", value) }
|
||||
|
||||
# List of where new lines occur
|
||||
@new_lines = []
|
||||
|
||||
# Raw Stream
|
||||
@raw_stream = open_stream(source)
|
||||
|
||||
|
@ -55,25 +52,30 @@ module HTML5lib
|
|||
|
||||
# Read bytes from stream decoding them into Unicode
|
||||
uString = @raw_stream.read
|
||||
unless @char_encoding == 'utf-8'
|
||||
if @char_encoding == 'windows-1252'
|
||||
@win1252 = true
|
||||
elsif @char_encoding != 'utf-8'
|
||||
begin
|
||||
require 'iconv'
|
||||
uString = Iconv.iconv('utf-8', @encoding, uString)[0]
|
||||
rescue
|
||||
begin
|
||||
uString = Iconv.iconv('utf-8', @char_encoding, uString).first
|
||||
rescue
|
||||
@win1252 = true
|
||||
end
|
||||
rescue LoadError
|
||||
@win1252 = true
|
||||
end
|
||||
end
|
||||
|
||||
# Normalize newlines and null characters
|
||||
uString.gsub!(/\r\n?/, "\n")
|
||||
uString.gsub!("\x00", [0xFFFD].pack('U'))
|
||||
|
||||
# Convert the unicode string into a list to be used as the data stream
|
||||
@data_stream = uString
|
||||
|
||||
@queue = []
|
||||
|
||||
# Reset position in the list to read from
|
||||
reset
|
||||
@tell = 0
|
||||
@line = @col = 0
|
||||
@line_lengths = []
|
||||
end
|
||||
|
||||
# Produces a file object from source.
|
||||
|
@ -95,11 +97,13 @@ module HTML5lib
|
|||
#First look for a BOM
|
||||
#This will also read past the BOM if present
|
||||
encoding = detect_bom
|
||||
|
||||
#If there is no BOM need to look for meta elements with encoding
|
||||
#information
|
||||
if encoding.nil? and @parse_meta
|
||||
encoding = detect_encoding_meta
|
||||
end
|
||||
|
||||
#Guess with chardet, if avaliable
|
||||
if encoding.nil? and @chardet
|
||||
begin
|
||||
|
@ -107,17 +111,18 @@ module HTML5lib
|
|||
require 'UniversalDetector' # gem install chardet
|
||||
buffer = @raw_stream.read
|
||||
encoding = UniversalDetector::chardet(buffer)['encoding']
|
||||
@raw_stream = open_stream(buffer)
|
||||
seek(buffer, 0)
|
||||
rescue LoadError
|
||||
end
|
||||
end
|
||||
|
||||
# If all else fails use the default encoding
|
||||
if encoding.nil?
|
||||
encoding = @DEFAULT_ENCODING
|
||||
end
|
||||
|
||||
#Substitute for equivalent encodings:
|
||||
encoding_sub = {'ascii' => 'windows-1252', 'iso-8859-1' => 'windows-1252'}
|
||||
#Substitute for equivalent encodings
|
||||
encoding_sub = {'iso-8859-1' => 'windows-1252'}
|
||||
|
||||
if encoding_sub.has_key?(encoding.downcase)
|
||||
encoding = encoding_sub[encoding.downcase]
|
||||
|
@ -132,14 +137,13 @@ module HTML5lib
|
|||
def detect_bom
|
||||
bom_dict = {
|
||||
"\xef\xbb\xbf" => 'utf-8',
|
||||
"\xff\xfe" => 'utf-16-le',
|
||||
"\xfe\xff" => 'utf-16-be',
|
||||
"\xff\xfe\x00\x00" => 'utf-32-le',
|
||||
"\x00\x00\xfe\xff" => 'utf-32-be'
|
||||
"\xff\xfe" => 'utf-16le',
|
||||
"\xfe\xff" => 'utf-16be',
|
||||
"\xff\xfe\x00\x00" => 'utf-32le',
|
||||
"\x00\x00\xfe\xff" => 'utf-32be'
|
||||
}
|
||||
|
||||
# Go to beginning of file and read in 4 bytes
|
||||
@raw_stream.seek(0)
|
||||
string = @raw_stream.read(4)
|
||||
return nil unless string
|
||||
|
||||
|
@ -156,45 +160,80 @@ module HTML5lib
|
|||
end
|
||||
end
|
||||
|
||||
#AT - move this to the caller?
|
||||
# Set the read position past the BOM if one was found, otherwise
|
||||
# set it to the start of the stream
|
||||
@raw_stream.seek(encoding ? seek : 0)
|
||||
seek(string, encoding ? seek : 0)
|
||||
|
||||
return encoding
|
||||
end
|
||||
|
||||
# Report the encoding declared by the meta element
|
||||
def detect_encoding_meta
|
||||
parser = EncodingParser.new(@raw_stream.read(@NUM_BYTES_META))
|
||||
@raw_stream.seek(0)
|
||||
return parser.get_encoding
|
||||
def seek(buffer, n)
|
||||
if @raw_stream.respond_to?(:unget)
|
||||
@raw_stream.unget(buffer[n..-1])
|
||||
return
|
||||
end
|
||||
|
||||
if @raw_stream.respond_to?(:seek)
|
||||
begin
|
||||
@raw_stream.seek(n)
|
||||
return
|
||||
rescue Errno::ESPIPE
|
||||
end
|
||||
end
|
||||
|
||||
require 'delegate'
|
||||
@raw_stream = SimpleDelegator.new(@raw_stream)
|
||||
|
||||
class << @raw_stream
|
||||
def read(chars=-1)
|
||||
if chars == -1 or chars > @data.length
|
||||
result = @data
|
||||
@data = ''
|
||||
return result if __getobj__.eof?
|
||||
return result + __getobj__.read if chars == -1
|
||||
return result + __getobj__.read(chars-result.length)
|
||||
elsif @data.empty?
|
||||
return __getobj__.read(chars)
|
||||
else
|
||||
result = @data[1...chars]
|
||||
@data = @data[chars..-1]
|
||||
return result
|
||||
end
|
||||
end
|
||||
|
||||
def unget(data)
|
||||
if !@data or @data.empty?
|
||||
@data = data
|
||||
else
|
||||
@data += data
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@raw_stream.unget(buffer[n .. -1])
|
||||
end
|
||||
|
||||
def determine_new_lines
|
||||
# Looks through the stream to find where new lines occur so
|
||||
# the position method can tell where it is.
|
||||
@new_lines.push(0)
|
||||
(0...@data_stream.length).each { |i| @new_lines.push(i) if @data_stream[i] == ?\n }
|
||||
# Report the encoding declared by the meta element
|
||||
def detect_encoding_meta
|
||||
buffer = @raw_stream.read(@NUM_BYTES_META)
|
||||
parser = EncodingParser.new(buffer)
|
||||
seek(buffer, 0)
|
||||
return parser.get_encoding
|
||||
end
|
||||
|
||||
# Returns (line, col) of the current position in the stream.
|
||||
def position
|
||||
# Generate list of new lines first time around
|
||||
determine_new_lines if @new_lines.empty?
|
||||
line = 0
|
||||
tell = @tell
|
||||
@new_lines.each do |pos|
|
||||
break unless pos < tell
|
||||
line += 1
|
||||
line, col = @line, @col
|
||||
@queue.reverse.each do |c|
|
||||
if c == "\n"
|
||||
line -= 1
|
||||
raise RuntimeError.new("col=#{col}") unless col == 0
|
||||
col = @line_lengths[line]
|
||||
else
|
||||
col -= 1
|
||||
end
|
||||
end
|
||||
col = tell - @new_lines[line-1] - 1
|
||||
return [line, col]
|
||||
end
|
||||
|
||||
# Resets the position in the stream back to the start.
|
||||
def reset
|
||||
@tell = 0
|
||||
return [line+1, col]
|
||||
end
|
||||
|
||||
# Read one character from the stream or queue if available. Return
|
||||
|
@ -203,11 +242,60 @@ module HTML5lib
|
|||
unless @queue.empty?
|
||||
return @queue.shift
|
||||
else
|
||||
begin
|
||||
@tell += 1
|
||||
return @data_stream[@tell - 1].chr
|
||||
rescue
|
||||
return :EOF
|
||||
c = @data_stream[@tell]
|
||||
@tell += 1
|
||||
|
||||
case c
|
||||
when 0x01 .. 0x7F
|
||||
if c == 0x0D
|
||||
# normalize newlines
|
||||
@tell += 1 if @data_stream[@tell] == 0x0A
|
||||
c = 0x0A
|
||||
end
|
||||
|
||||
# update position in stream
|
||||
if c == 0x0a
|
||||
@line_lengths << @col
|
||||
@line += 1
|
||||
@col = 0
|
||||
else
|
||||
@col += 1
|
||||
end
|
||||
|
||||
c.chr
|
||||
|
||||
when 0x80 .. 0xBF
|
||||
if !@win1252
|
||||
[0xFFFD].pack('U') # invalid utf-8
|
||||
elsif c <= 0x9f
|
||||
[ENTITIES_WINDOWS1252[c-0x80]].pack('U')
|
||||
else
|
||||
"\xC2" + c.chr # convert to utf-8
|
||||
end
|
||||
|
||||
when 0xC0 .. 0xFF
|
||||
if @win1252
|
||||
"\xC3" + (c-64).chr # convert to utf-8
|
||||
elsif @data_stream[@tell-1 .. -1] =~ /^
|
||||
( [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
|
||||
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
|
||||
| [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
|
||||
| \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
|
||||
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
|
||||
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
|
||||
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
|
||||
)/x
|
||||
@tell += $1.length - 1
|
||||
$1
|
||||
else
|
||||
[0xFFFD].pack('U') # invalid utf-8
|
||||
end
|
||||
|
||||
when 0x00
|
||||
[0xFFFD].pack('U') # null characters are invalid
|
||||
|
||||
else
|
||||
:EOF
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -218,28 +306,15 @@ module HTML5lib
|
|||
def chars_until(characters, opposite=false)
|
||||
char_stack = [char]
|
||||
|
||||
unless char_stack[0] == :EOF
|
||||
while (characters.include? char_stack[-1]) == opposite
|
||||
unless @queue.empty?
|
||||
# First from the queue
|
||||
char_stack.push(@queue.shift)
|
||||
break if char_stack[-1] == :EOF
|
||||
else
|
||||
# Then the rest
|
||||
begin
|
||||
char_stack.push(@data_stream[@tell].chr)
|
||||
@tell += 1
|
||||
rescue
|
||||
char_stack.push(:EOF)
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
while char_stack.last != :EOF
|
||||
break unless (characters.include?(char_stack.last)) == opposite
|
||||
char_stack.push(char)
|
||||
end
|
||||
|
||||
# Put the character stopped on back to the front of the queue
|
||||
# from where it came.
|
||||
@queue.insert(0, char_stack.pop)
|
||||
c = char_stack.pop
|
||||
@queue.insert(0, c) unless c == :EOF
|
||||
return char_stack.join('')
|
||||
end
|
||||
end
|
||||
|
@ -428,7 +503,7 @@ module HTML5lib
|
|||
space_found = false
|
||||
#Step 5 attribute name
|
||||
while true
|
||||
if @data.current_byte == '=' and attr_name:
|
||||
if @data.current_byte == '=' and attr_name
|
||||
break
|
||||
elsif SPACE_CHARACTERS.include?(@data.current_byte)
|
||||
space_found = true
|
||||
|
|
|
@ -69,15 +69,22 @@ module HTML5lib
|
|||
|
||||
# ensure that non-void XHTML elements have content so that separate
|
||||
# open and close tags are emitted
|
||||
if token[:type] == :EndTag and \
|
||||
not VOID_ELEMENTS.include? token[:name] and \
|
||||
token[:name] == @tree.openElements[-1].name and \
|
||||
not @tree.openElements[-1].hasContent
|
||||
@tree.insertText('') unless
|
||||
@tree.openElements.any? {|e|
|
||||
e.attributes.keys.include? 'xmlns' and
|
||||
e.attributes['xmlns'] != 'http://www.w3.org/1999/xhtml'
|
||||
}
|
||||
if token[:type] == :EndTag
|
||||
if VOID_ELEMENTS.include? token[:name]
|
||||
if @tree.openElements[-1].name != token["name"]:
|
||||
token[:type] = :EmptyTag
|
||||
token["data"] ||= {}
|
||||
end
|
||||
else
|
||||
if token[:name] == @tree.openElements[-1].name and \
|
||||
not @tree.openElements[-1].hasContent
|
||||
@tree.insertText('') unless
|
||||
@tree.openElements.any? {|e|
|
||||
e.attributes.keys.include? 'xmlns' and
|
||||
e.attributes['xmlns'] != 'http://www.w3.org/1999/xhtml'
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return token
|
||||
|
|
|
@ -1,12 +1,22 @@
|
|||
require 'html5lib/tokenizer'
|
||||
require 'cgi'
|
||||
|
||||
module HTML5lib
|
||||
|
||||
# This module provides sanitization of XHTML+MathML+SVG
|
||||
# and of inline style attributes.
|
||||
#
|
||||
# It can be either at the Tokenizer stage:
|
||||
#
|
||||
# HTMLParser.parse(html, :tokenizer => HTMLSanitizer)
|
||||
#
|
||||
# or, if you already have a parse tree (in this example, a REXML tree),
|
||||
# at the Serializer stage:
|
||||
#
|
||||
# tokens = TreeWalkers.getTreeWalker('rexml').new(tree)
|
||||
# HTMLSerializer.serialize(tokens, {:encoding=>'utf-8',
|
||||
# :sanitize => true})
|
||||
|
||||
class HTMLSanitizer < HTMLTokenizer
|
||||
module HTMLSanitizeModule
|
||||
|
||||
ACCEPTABLE_ELEMENTS = %w[a abbr acronym address area b big blockquote br
|
||||
button caption center cite code col colgroup dd del dfn dir div dl dt
|
||||
|
@ -64,7 +74,7 @@ module HTML5lib
|
|||
xlink:show xlink:title xlink:type xml:base xml:lang xml:space xmlns
|
||||
xmlns:xlink y y1 y2 zoomAndPan]
|
||||
|
||||
ATTR_VAL_IS_URI = %w[href src cite action longdesc xlink:href]
|
||||
ATTR_VAL_IS_URI = %w[href src cite action longdesc xlink:href xml:base]
|
||||
|
||||
ACCEPTABLE_CSS_PROPERTIES = %w[azimuth background-color
|
||||
border-bottom-color border-collapse border-color border-left-color
|
||||
|
@ -96,19 +106,7 @@ module HTML5lib
|
|||
ALLOWED_SVG_PROPERTIES = ACCEPTABLE_SVG_PROPERTIES
|
||||
ALLOWED_PROTOCOLS = ACCEPTABLE_PROTOCOLS
|
||||
|
||||
# Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
|
||||
# stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style
|
||||
# attributes are parsed, and a restricted set, # specified by
|
||||
# ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through.
|
||||
# attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified
|
||||
# in ALLOWED_PROTOCOLS are allowed.
|
||||
#
|
||||
# sanitize_html('<script> do_nasty_stuff() </script>')
|
||||
# => <script> do_nasty_stuff() </script>
|
||||
# sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
|
||||
# => <a>Click here for $100</a>
|
||||
def each
|
||||
super do |token|
|
||||
def sanitize_token(token)
|
||||
case token[:type]
|
||||
when :StartTag, :EndTag, :EmptyTag
|
||||
if ALLOWED_ELEMENTS.include?(token[:name])
|
||||
|
@ -116,7 +114,7 @@ module HTML5lib
|
|||
attrs = Hash[*token[:data].flatten]
|
||||
attrs.delete_if { |attr,v| !ALLOWED_ATTRIBUTES.include?(attr) }
|
||||
ATTR_VAL_IS_URI.each do |attr|
|
||||
val_unescaped = CGI.unescapeHTML(attrs[attr].to_s).gsub(/[\000-\040\177\s]+|\302[\200-\240]/,'').downcase
|
||||
val_unescaped = CGI.unescapeHTML(attrs[attr].to_s).gsub(/`|[\000-\040\177\s]+|\302[\200-\240]/,'').downcase
|
||||
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ and !ALLOWED_PROTOCOLS.include?(val_unescaped.split(':')[0])
|
||||
attrs.delete attr
|
||||
end
|
||||
|
@ -126,7 +124,7 @@ module HTML5lib
|
|||
end
|
||||
token[:data] = attrs.map {|k,v| [k,v]}
|
||||
end
|
||||
yield token
|
||||
return token
|
||||
else
|
||||
if token[:type] == :EndTag
|
||||
token[:data] = "</#{token[:name]}>"
|
||||
|
@ -139,12 +137,14 @@ module HTML5lib
|
|||
token[:data].insert(-2,'/') if token[:type] == :EmptyTag
|
||||
token[:type] = :Characters
|
||||
token.delete(:name)
|
||||
yield token
|
||||
return token
|
||||
end
|
||||
when :Comment
|
||||
token[:data] = ""
|
||||
return token
|
||||
else
|
||||
yield token
|
||||
return token
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def sanitize_css(style)
|
||||
|
@ -174,4 +174,14 @@ module HTML5lib
|
|||
style = clean.join(' ')
|
||||
end
|
||||
end
|
||||
|
||||
class HTMLSanitizer < HTMLTokenizer
|
||||
include HTMLSanitizeModule
|
||||
def each
|
||||
super do |token|
|
||||
yield(sanitize_token(token))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
|
2
vendor/plugins/HTML5lib/lib/html5lib/serializer.rb
vendored
Normal file
2
vendor/plugins/HTML5lib/lib/html5lib/serializer.rb
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
require 'html5lib/serializer/htmlserializer'
|
||||
require 'html5lib/serializer/xhtmlserializer'
|
177
vendor/plugins/HTML5lib/lib/html5lib/serializer/htmlserializer.rb
vendored
Normal file
177
vendor/plugins/HTML5lib/lib/html5lib/serializer/htmlserializer.rb
vendored
Normal file
|
@ -0,0 +1,177 @@
|
|||
require 'html5lib/constants'
|
||||
|
||||
module HTML5lib
|
||||
|
||||
class HTMLSerializer
|
||||
|
||||
def self.serialize(stream, options = {})
|
||||
new(options).serialize(stream, options[:encoding])
|
||||
end
|
||||
|
||||
def escape(string)
|
||||
string.gsub("&", "&").gsub("<", "<").gsub(">", ">")
|
||||
end
|
||||
|
||||
def initialize(options={})
|
||||
@quote_attr_values = false
|
||||
@quote_char = '"'
|
||||
@use_best_quote_char = true
|
||||
@minimize_boolean_attributes = true
|
||||
|
||||
@use_trailing_solidus = false
|
||||
@space_before_trailing_solidus = true
|
||||
@escape_lt_in_attrs = false
|
||||
|
||||
@omit_optional_tags = true
|
||||
@sanitize = false
|
||||
|
||||
@strip_whitespace = false
|
||||
|
||||
@inject_meta_charset = true
|
||||
|
||||
options.each do |name, value|
|
||||
next unless instance_variables.include?("@#{name}")
|
||||
@use_best_quote_char = false if name.to_s == 'quote_char'
|
||||
instance_variable_set("@#{name}", value)
|
||||
end
|
||||
|
||||
@errors = []
|
||||
end
|
||||
|
||||
def serialize(treewalker, encoding=nil)
|
||||
in_cdata = false
|
||||
@errors = []
|
||||
|
||||
if encoding and @inject_meta_charset
|
||||
require 'html5lib/filters/inject_meta_charset'
|
||||
treewalker = Filters::InjectMetaCharset.new(treewalker, encoding)
|
||||
end
|
||||
|
||||
if @strip_whitespace
|
||||
require 'html5lib/filters/whitespace'
|
||||
treewalker = Filters::WhitespaceFilter.new(treewalker)
|
||||
end
|
||||
|
||||
if @sanitize
|
||||
require 'html5lib/filters/sanitizer'
|
||||
treewalker = Filters::HTMLSanitizeFilter.new(treewalker)
|
||||
end
|
||||
|
||||
if @omit_optional_tags
|
||||
require 'html5lib/filters/optionaltags'
|
||||
treewalker = Filters::OptionalTagFilter.new(treewalker)
|
||||
end
|
||||
|
||||
result = []
|
||||
treewalker.each do |token|
|
||||
type = token[:type]
|
||||
if type == :Doctype
|
||||
doctype = "<!DOCTYPE %s>" % token[:name]
|
||||
result << doctype
|
||||
|
||||
elsif [:Characters, :SpaceCharacters].include? type
|
||||
if type == :SpaceCharacters or in_cdata
|
||||
if in_cdata and token[:data].include?("</")
|
||||
serializeError(_("Unexpected </ in CDATA"))
|
||||
end
|
||||
result << token[:data]
|
||||
else
|
||||
result << escape(token[:data])
|
||||
end
|
||||
|
||||
elsif [:StartTag, :EmptyTag].include? type
|
||||
name = token[:name]
|
||||
if RCDATA_ELEMENTS.include?(name)
|
||||
in_cdata = true
|
||||
elsif in_cdata
|
||||
serializeError(_("Unexpected child element of a CDATA element"))
|
||||
end
|
||||
attributes = []
|
||||
for k,v in attrs = token[:data].to_a.sort
|
||||
attributes << ' '
|
||||
|
||||
attributes << k
|
||||
if not @minimize_boolean_attributes or \
|
||||
(!(BOOLEAN_ATTRIBUTES[name]||[]).include?(k) \
|
||||
and !BOOLEAN_ATTRIBUTES[:global].include?(k))
|
||||
attributes << "="
|
||||
if @quote_attr_values or v.empty?
|
||||
quote_attr = true
|
||||
else
|
||||
quote_attr = (SPACE_CHARACTERS + %w(< > " ')).any? {|c| v.include?(c)}
|
||||
end
|
||||
v = v.gsub("&", "&")
|
||||
v = v.gsub("<", "<") if @escape_lt_in_attrs
|
||||
if quote_attr
|
||||
quote_char = @quote_char
|
||||
if @use_best_quote_char
|
||||
if v.index("'") and !v.index('"')
|
||||
quote_char = '"'
|
||||
elsif v.index('"') and !v.index("'")
|
||||
quote_char = "'"
|
||||
end
|
||||
end
|
||||
if quote_char == "'"
|
||||
v = v.gsub("'", "'")
|
||||
else
|
||||
v = v.gsub('"', """)
|
||||
end
|
||||
attributes << quote_char << v << quote_char
|
||||
else
|
||||
attributes << v
|
||||
end
|
||||
end
|
||||
end
|
||||
if VOID_ELEMENTS.include?(name) and @use_trailing_solidus
|
||||
if @space_before_trailing_solidus
|
||||
attributes << " /"
|
||||
else
|
||||
attributes << "/"
|
||||
end
|
||||
end
|
||||
result << "<%s%s>" % [name, attributes.join('')]
|
||||
|
||||
elsif type == :EndTag
|
||||
name = token[:name]
|
||||
if RCDATA_ELEMENTS.include?(name)
|
||||
in_cdata = false
|
||||
elsif in_cdata
|
||||
serializeError(_("Unexpected child element of a CDATA element"))
|
||||
end
|
||||
end_tag = "</#{name}>"
|
||||
result << end_tag
|
||||
|
||||
elsif type == :Comment
|
||||
data = token[:data]
|
||||
serializeError(_("Comment contains --")) if data.index("--")
|
||||
comment = "<!--%s-->" % token[:data]
|
||||
result << comment
|
||||
|
||||
else
|
||||
serializeError(token[:data])
|
||||
end
|
||||
end
|
||||
|
||||
if encoding and encoding != 'utf-8'
|
||||
require 'iconv'
|
||||
Iconv.iconv(encoding, 'utf-8', result.join('')).first
|
||||
else
|
||||
result.join('')
|
||||
end
|
||||
end
|
||||
|
||||
alias :render :serialize
|
||||
|
||||
def serializeError(data="XXX ERROR MESSAGE NEEDED")
|
||||
# XXX The idea is to make data mandatory.
|
||||
@errors.push(data)
|
||||
if @strict
|
||||
raise SerializeError
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Error in serialized tree
|
||||
class SerializeError < Exception
|
||||
end
|
||||
end
|
19
vendor/plugins/HTML5lib/lib/html5lib/serializer/xhtmlserializer.rb
vendored
Normal file
19
vendor/plugins/HTML5lib/lib/html5lib/serializer/xhtmlserializer.rb
vendored
Normal file
|
@ -0,0 +1,19 @@
|
|||
require 'html5lib/serializer/htmlserializer'
|
||||
|
||||
module HTML5lib
|
||||
|
||||
class XHTMLSerializer < HTMLSerializer
|
||||
DEFAULTS = {
|
||||
:quote_attr_values => true,
|
||||
:minimize_boolean_attributes => false,
|
||||
:use_trailing_solidus => true,
|
||||
:escape_lt_in_attrs => true,
|
||||
:omit_optional_tags => false
|
||||
}
|
||||
|
||||
def initialize(options={})
|
||||
super(DEFAULTS.clone.update(options))
|
||||
end
|
||||
end
|
||||
|
||||
end
|
402
vendor/plugins/HTML5lib/lib/html5lib/tokenizer.rb
vendored
402
vendor/plugins/HTML5lib/lib/html5lib/tokenizer.rb
vendored
|
@ -41,19 +41,31 @@ module HTML5lib
|
|||
:attributeValueUnQuoted => :attributeValueUnQuotedState,
|
||||
:bogusComment => :bogusCommentState,
|
||||
:markupDeclarationOpen => :markupDeclarationOpenState,
|
||||
:commentStart => :commentStartState,
|
||||
:commentStartDash => :commentStartDashState,
|
||||
:comment => :commentState,
|
||||
:commentDash => :commentDashState,
|
||||
:commentEndDash => :commentEndDashState,
|
||||
:commentEnd => :commentEndState,
|
||||
:doctype => :doctypeState,
|
||||
:beforeDoctypeName => :beforeDoctypeNameState,
|
||||
:doctypeName => :doctypeNameState,
|
||||
:afterDoctypeName => :afterDoctypeNameState,
|
||||
:beforeDoctypePublicIdentifier => :beforeDoctypePublicIdentifierState,
|
||||
:doctypePublicIdentifierDoubleQuoted => :doctypePublicIdentifierDoubleQuotedState,
|
||||
:doctypePublicIdentifierSingleQuoted => :doctypePublicIdentifierSingleQuotedState,
|
||||
:afterDoctypePublicIdentifier => :afterDoctypePublicIdentifierState,
|
||||
:beforeDoctypeSystemIdentifier => :beforeDoctypeSystemIdentifierState,
|
||||
:doctypeSystemIdentifierDoubleQuoted => :doctypeSystemIdentifierDoubleQuotedState,
|
||||
:doctypeSystemIdentifierSingleQuoted => :doctypeSystemIdentifierSingleQuotedState,
|
||||
:afterDoctypeSystemIdentifier => :afterDoctypeSystemIdentifierState,
|
||||
:bogusDoctype => :bogusDoctypeState
|
||||
}
|
||||
|
||||
# Setup the initial tokenizer state
|
||||
@contentModelFlag = :PCDATA
|
||||
@state = @states[:data]
|
||||
@escapeFlag = false
|
||||
@lastFourChars = []
|
||||
|
||||
# The current token being created
|
||||
@currentToken = nil
|
||||
|
@ -68,7 +80,6 @@ module HTML5lib
|
|||
# to return we yield the token which pauses processing until the next token
|
||||
# is requested.
|
||||
def each
|
||||
@stream.reset
|
||||
@tokenQueue = []
|
||||
# Start processing. When EOF is reached @state will return false
|
||||
# instead of true and the loop will terminate.
|
||||
|
@ -134,24 +145,14 @@ module HTML5lib
|
|||
# If the integer is between 127 and 160 (so 128 and bigger and 159 and
|
||||
# smaller) we need to do the "windows trick".
|
||||
if (127...160).include? charAsInt
|
||||
#XXX - removed parse error from windows 1252 entity for now
|
||||
#we may want to reenable this later
|
||||
#@tokenQueue.push({:type => :ParseError, :data =>
|
||||
# _("Entity used with illegal number (windows-1252 reference).")})
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Entity used with illegal number (windows-1252 reference).")})
|
||||
|
||||
charAsInt = ENTITIES_WINDOWS1252[charAsInt - 128]
|
||||
end
|
||||
|
||||
# 0 is not a good number.
|
||||
if charAsInt == 0
|
||||
charAsInt = 65533
|
||||
end
|
||||
|
||||
if charAsInt <= 0x10FFFF
|
||||
if charAsInt > 0 and charAsInt <= 1114111
|
||||
char = [charAsInt].pack('U')
|
||||
else
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Numeric entity couldn't be converted to character.")})
|
||||
end
|
||||
|
||||
# Discard the ; if present. Otherwise, put it back on the queue and
|
||||
|
@ -168,7 +169,10 @@ module HTML5lib
|
|||
def consumeEntity
|
||||
char = nil
|
||||
charStack = [@stream.char]
|
||||
if charStack[0] == "#"
|
||||
if SPACE_CHARACTERS.include?(charStack[0]) or
|
||||
[:EOF, '<', '&'].include?(charStack[0])
|
||||
@stream.queue+= charStack
|
||||
elsif charStack[0] == "#"
|
||||
# We might have a number entity here.
|
||||
charStack += [@stream.char, @stream.char]
|
||||
if charStack.include? :EOF
|
||||
|
@ -195,10 +199,6 @@ module HTML5lib
|
|||
_("Numeric entity expected but none found.")})
|
||||
end
|
||||
end
|
||||
# Break out if we reach the end of the file
|
||||
elsif charStack[0] == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Entity expected. Got end of file instead.")})
|
||||
else
|
||||
# At this point in the process might have named entity. Entities
|
||||
# are stored in the global variable "entities".
|
||||
|
@ -268,14 +268,33 @@ module HTML5lib
|
|||
# statements should be.
|
||||
def dataState
|
||||
data = @stream.char
|
||||
if data == "&" and (@contentModelFlag == :PCDATA or
|
||||
@contentModelFlag == :RCDATA)
|
||||
|
||||
if @contentModelFlag == :CDATA or @contentModelFlag == :RCDATA
|
||||
@lastFourChars << data
|
||||
@lastFourChars.shift if @lastFourChars.length > 4
|
||||
end
|
||||
|
||||
if data == "&" and [:PCDATA,:RCDATA].include?(@contentModelFlag)
|
||||
@state = @states[:entityData]
|
||||
elsif data == "<" and @contentModelFlag != :PLAINTEXT
|
||||
@state = @states[:tagOpen]
|
||||
|
||||
elsif data == "-" and [:CDATA,:RCDATA].include?(@contentModelFlag) and
|
||||
@escapeFlag == false and @lastFourChars.join('') == "<!--"
|
||||
@escapeFlag = true
|
||||
@tokenQueue.push({:type => :Characters, :data => data})
|
||||
|
||||
elsif data == "<" and @escapeFlag == false and
|
||||
[:PCDATA,:CDATA,:RCDATA].include?(@contentModelFlag)
|
||||
@state = @states[:tagOpen]
|
||||
|
||||
elsif data == ">" and [:CDATA,:RCDATA].include?(@contentModelFlag) and
|
||||
@escapeFlag == true and @lastFourChars[1..-1].join('') == "-->"
|
||||
@escapeFlag = false
|
||||
@tokenQueue.push({:type => :Characters, :data => data})
|
||||
|
||||
elsif data == :EOF
|
||||
# Tokenization ends.
|
||||
return false
|
||||
|
||||
elsif SPACE_CHARACTERS.include? data
|
||||
# Directly after emitting a token you switch back to the "data
|
||||
# state". At that point SPACE_CHARACTERS are important so they are
|
||||
|
@ -286,7 +305,7 @@ module HTML5lib
|
|||
data + @stream.chars_until(SPACE_CHARACTERS, true)})
|
||||
else
|
||||
@tokenQueue.push({:type => :Characters, :data =>
|
||||
data + @stream.chars_until(["&", "<"])})
|
||||
data + @stream.chars_until(%w[& < > -])})
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
@ -381,8 +400,6 @@ module HTML5lib
|
|||
# emitting the end tag token.
|
||||
@contentModelFlag = :PCDATA
|
||||
else
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Expected closing tag after seeing '</'. None found.")})
|
||||
@tokenQueue.push({:type => :Characters, :data => "</"})
|
||||
@state = @states[:data]
|
||||
|
||||
|
@ -392,29 +409,27 @@ module HTML5lib
|
|||
end
|
||||
end
|
||||
|
||||
if @contentModelFlag == :PCDATA
|
||||
data = @stream.char
|
||||
if data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Expected closing tag. Unexpected end of file.")})
|
||||
@tokenQueue.push({:type => :Characters, :data => "</"})
|
||||
@state = @states[:data]
|
||||
elsif ASCII_LETTERS.include? data
|
||||
@currentToken =\
|
||||
{:type => :EndTag, :name => data, :data => []}
|
||||
@state = @states[:tagName]
|
||||
elsif data == ">"
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Expected closing tag. Got '>' instead. Ignoring '</>'.")})
|
||||
@state = @states[:data]
|
||||
else
|
||||
# XXX data can be _'_...
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Expected closing tag. Unexpected character '" + data + "' found.")})
|
||||
@stream.queue.push(data)
|
||||
@state = @states[:bogusComment]
|
||||
end
|
||||
data = @stream.char
|
||||
if data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Expected closing tag. Unexpected end of file.")})
|
||||
@tokenQueue.push({:type => :Characters, :data => "</"})
|
||||
@state = @states[:data]
|
||||
elsif ASCII_LETTERS.include? data
|
||||
@currentToken = {:type => :EndTag, :name => data, :data => []}
|
||||
@state = @states[:tagName]
|
||||
elsif data == ">"
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Expected closing tag. Got '>' instead. Ignoring '</>'.")})
|
||||
@state = @states[:data]
|
||||
else
|
||||
# XXX data can be _'_...
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Expected closing tag. Unexpected character '#{data}' found.")})
|
||||
@stream.queue.push(data)
|
||||
@state = @states[:bogusComment]
|
||||
end
|
||||
|
||||
return true
|
||||
end
|
||||
|
||||
|
@ -431,11 +446,6 @@ module HTML5lib
|
|||
@stream.chars_until(ASCII_LETTERS, true)
|
||||
elsif data == ">"
|
||||
emitCurrentToken
|
||||
elsif data == "<"
|
||||
@stream.queue.push(data)
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected < character when getting the tag name.")})
|
||||
emitCurrentToken
|
||||
elsif data == "/"
|
||||
processSolidusInTag
|
||||
@state = @states[:beforeAttributeName]
|
||||
|
@ -460,11 +470,6 @@ module HTML5lib
|
|||
emitCurrentToken
|
||||
elsif data == "/"
|
||||
processSolidusInTag
|
||||
elsif data == "<"
|
||||
@stream.queue.push(data)
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected < character. Expected attribute name instead.")})
|
||||
emitCurrentToken
|
||||
else
|
||||
@currentToken[:data].push([data, ""])
|
||||
@state = @states[:attributeName]
|
||||
|
@ -495,12 +500,6 @@ module HTML5lib
|
|||
elsif data == "/"
|
||||
processSolidusInTag
|
||||
@state = @states[:beforeAttributeName]
|
||||
elsif data == "<"
|
||||
@stream.queue.push(data)
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected < character in attribute name.")})
|
||||
emitCurrentToken
|
||||
leavingThisState = false
|
||||
else
|
||||
@currentToken[:data][-1][0] += data
|
||||
leavingThisState = false
|
||||
|
@ -538,11 +537,6 @@ module HTML5lib
|
|||
elsif data == "/"
|
||||
processSolidusInTag
|
||||
@state = @states[:beforeAttributeName]
|
||||
elsif data == "<"
|
||||
@stream.queue.push(data)
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected < character. Expected = or end of tag.")})
|
||||
emitCurrentToken
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file. Expected = or end of tag.")})
|
||||
|
@ -567,11 +561,6 @@ module HTML5lib
|
|||
@state = @states[:attributeValueSingleQuoted]
|
||||
elsif data == ">"
|
||||
emitCurrentToken
|
||||
elsif data == "<"
|
||||
@stream.queue.push(data)
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected < character. Expected attribute value.")})
|
||||
emitCurrentToken
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file. Expected attribute value.")})
|
||||
|
@ -625,11 +614,6 @@ module HTML5lib
|
|||
processEntityInAttribute
|
||||
elsif data == ">"
|
||||
emitCurrentToken
|
||||
elsif data == "<"
|
||||
@stream.queue.push(data)
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected < character in attribute value.")})
|
||||
emitCurrentToken
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in attribute value.")})
|
||||
|
@ -659,14 +643,15 @@ module HTML5lib
|
|||
charStack = [@stream.char, @stream.char]
|
||||
if charStack == ["-", "-"]
|
||||
@currentToken = {:type => :Comment, :data => ""}
|
||||
@state = @states[:comment]
|
||||
@state = @states[:commentStart]
|
||||
else
|
||||
5.times { charStack.push(@stream.char) }
|
||||
# Put in explicit :EOF check
|
||||
if ((not charStack.include? :EOF) and
|
||||
charStack.join("").upcase == "DOCTYPE")
|
||||
@currentToken =\
|
||||
{:type => :Doctype, :name => "", :data => true}
|
||||
{:type => :Doctype, :name => "",
|
||||
:publicId => nil, :systemId => nil, :correct => true}
|
||||
@state = @states[:doctype]
|
||||
else
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
|
@ -678,10 +663,52 @@ module HTML5lib
|
|||
return true
|
||||
end
|
||||
|
||||
def commentStartState
|
||||
data = @stream.char
|
||||
if data == "-"
|
||||
@state = @states[:commentStartDash]
|
||||
elsif data == ">"
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Incorrect comment.")})
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
elsif data == EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in comment.")})
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
else
|
||||
@currentToken[:data] += data + @stream.chars_until("-")
|
||||
@state = @states[:comment]
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def commentStartDashState
|
||||
data = @stream.char
|
||||
if data == "-"
|
||||
@state = @states[:commentEnd]
|
||||
elsif data == ">"
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Incorrect comment.")})
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
elsif data == EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in comment.")})
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
else
|
||||
@currentToken[:data] += data + @stream.chars_until("-")
|
||||
@state = @states[:comment]
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def commentState
|
||||
data = @stream.char
|
||||
if data == "-"
|
||||
@state = @states[:commentDash]
|
||||
@state = @states[:commentEndDash]
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in comment.")})
|
||||
|
@ -693,7 +720,7 @@ module HTML5lib
|
|||
return true
|
||||
end
|
||||
|
||||
def commentDashState
|
||||
def commentEndDashState
|
||||
data = @stream.char
|
||||
if data == "-"
|
||||
@state = @states[:commentEnd]
|
||||
|
@ -753,19 +780,16 @@ module HTML5lib
|
|||
def beforeDoctypeNameState
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include? data
|
||||
elsif ASCII_LOWERCASE.include? data
|
||||
@currentToken[:name] = data.upcase
|
||||
@state = @states[:doctypeName]
|
||||
elsif data == ">"
|
||||
# Character needs to be consumed per the specification so don't
|
||||
# invoke emitCurrentTokenWithParseError with :data as argument.
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected > character. Expected DOCTYPE name.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file. Expected DOCTYPE name.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
else
|
||||
|
@ -777,33 +801,21 @@ module HTML5lib
|
|||
|
||||
def doctypeNameState
|
||||
data = @stream.char
|
||||
needsDoctypeCheck = false
|
||||
if SPACE_CHARACTERS.include? data
|
||||
@state = @states[:afterDoctypeName]
|
||||
needsDoctypeCheck = true
|
||||
elsif data == ">"
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in DOCTYPE name.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
else
|
||||
# We can't just uppercase everything that arrives here. For
|
||||
# instance, non-ASCII characters.
|
||||
if ASCII_LOWERCASE.include? data
|
||||
data = data.upcase
|
||||
end
|
||||
@currentToken[:name] += data
|
||||
needsDoctypeCheck = true
|
||||
end
|
||||
|
||||
# After some iterations through this state it should eventually say
|
||||
# "HTML". Otherwise there's an error.
|
||||
if needsDoctypeCheck and @currentToken[:name] == "HTML"
|
||||
@currentToken[:data] = false
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
|
@ -815,16 +827,195 @@ module HTML5lib
|
|||
@state = @states[:data]
|
||||
elsif data == :EOF
|
||||
@currentToken[:data] = true
|
||||
# XXX EMIT
|
||||
@stream.queue.push(data)
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in DOCTYPE.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
else
|
||||
charStack = [data]
|
||||
5.times { charStack << stream.char }
|
||||
token = charStack.join('').tr(ASCII_UPPERCASE,ASCII_LOWERCASE)
|
||||
if token == "public"
|
||||
@state = @states[:beforeDoctypePublicIdentifier]
|
||||
elsif token == "system"
|
||||
@state = @states[:beforeDoctypeSystemIdentifier]
|
||||
else
|
||||
@stream.queue += charStack
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Expected 'public' or 'system'. Got '#{charStack.join('')}'")})
|
||||
@state = @states[:bogusDoctype]
|
||||
end
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def beforeDoctypePublicIdentifierState
|
||||
data = @stream.char
|
||||
|
||||
if SPACE_CHARACTERS.include?(data)
|
||||
elsif data == "\""
|
||||
@currentToken[:publicId] = ""
|
||||
@state = @states[:doctypePublicIdentifierDoubleQuoted]
|
||||
elsif data == "'"
|
||||
@currentToken[:publicId] = ""
|
||||
@state = @states[:doctypePublicIdentifierSingleQuoted]
|
||||
elsif data == ">"
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of DOCTYPE.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in DOCTYPE.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
else
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Expected space or '>'. Got '" + data + "'")})
|
||||
@currentToken[:data] = true
|
||||
_("Unexpected character in DOCTYPE.")})
|
||||
@state = @states[:bogusDoctype]
|
||||
end
|
||||
|
||||
return true
|
||||
end
|
||||
|
||||
def doctypePublicIdentifierDoubleQuotedState
|
||||
data = @stream.char
|
||||
if data == "\""
|
||||
@state = @states[:afterDoctypePublicIdentifier]
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in DOCTYPE.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
else
|
||||
@currentToken[:publicId] += data
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def doctypePublicIdentifierSingleQuotedState
|
||||
data = @stream.char
|
||||
if data == "'"
|
||||
@state = @states[:afterDoctypePublicIdentifier]
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in DOCTYPE.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
else
|
||||
@currentToken[:publicId] += data
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def afterDoctypePublicIdentifierState
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include?(data)
|
||||
elsif data == "\""
|
||||
@currentToken[:systemId] = ""
|
||||
@state = @states[:doctypeSystemIdentifierDoubleQuoted]
|
||||
elsif data == "'"
|
||||
@currentToken[:systemId] = ""
|
||||
@state = @states[:doctypeSystemIdentifierSingleQuoted]
|
||||
elsif data == ">"
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in DOCTYPE.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
else
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected character in DOCTYPE.")})
|
||||
@state = @states[:bogusDoctype]
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def beforeDoctypeSystemIdentifierState
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include?(data)
|
||||
elsif data == "\""
|
||||
@currentToken[:systemId] = ""
|
||||
@state = @states[:doctypeSystemIdentifierDoubleQuoted]
|
||||
elsif data == "'"
|
||||
@currentToken[:systemId] = ""
|
||||
@state = @states[:doctypeSystemIdentifierSingleQuoted]
|
||||
elsif data == ">"
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected character in DOCTYPE.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in DOCTYPE.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
else
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected character in DOCTYPE.")})
|
||||
@state = @states[:bogusDoctype]
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def doctypeSystemIdentifierDoubleQuotedState
|
||||
data = @stream.char
|
||||
if data == "\""
|
||||
@state = @states[:afterDoctypeSystemIdentifier]
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in DOCTYPE.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
else
|
||||
@currentToken[:systemId] += data
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def doctypeSystemIdentifierSingleQuotedState
|
||||
data = @stream.char
|
||||
if data == "'"
|
||||
@state = @states[:afterDoctypeSystemIdentifier]
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in DOCTYPE.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
else
|
||||
@currentToken[:systemId] += data
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def afterDoctypeSystemIdentifierState
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include?(data)
|
||||
elsif data == ">"
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
elsif data == :EOF
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in DOCTYPE.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
else
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected character in DOCTYPE.")})
|
||||
@state = @states[:bogusDoctype]
|
||||
end
|
||||
return true
|
||||
|
@ -840,6 +1031,7 @@ module HTML5lib
|
|||
@stream.queue.push(data)
|
||||
@tokenQueue.push({:type => :ParseError, :data =>
|
||||
_("Unexpected end of file in bogus doctype.")})
|
||||
@currentToken[:correct] = false
|
||||
@tokenQueue.push(@currentToken)
|
||||
@state = @states[:data]
|
||||
end
|
||||
|
|
|
@ -1,21 +1,24 @@
|
|||
module HTML5lib
|
||||
module TreeBuilders
|
||||
|
||||
def self.getTreeBuilder(name)
|
||||
case name.to_s.downcase
|
||||
class << self
|
||||
def [](name)
|
||||
case name.to_s.downcase
|
||||
when 'simpletree' then
|
||||
require 'html5lib/treebuilders/simpletree'
|
||||
SimpleTree::TreeBuilder
|
||||
when 'rexml' then
|
||||
require 'html5lib/treebuilders/rexml'
|
||||
REXMLTree::TreeBuilder
|
||||
REXML::TreeBuilder
|
||||
when 'hpricot' then
|
||||
require 'html5lib/treebuilders/hpricot'
|
||||
Hpricot::TreeBuilder
|
||||
else
|
||||
raise "Unknown TreeBuilder #{name}"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
alias :getTreeBuilder :[]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -144,7 +144,7 @@ module HTML5lib
|
|||
# code. It should still do the same though.
|
||||
|
||||
# Step 1: stop the algorithm when there's nothing to do.
|
||||
return unless @activeFormattingElements
|
||||
return if @activeFormattingElements.empty?
|
||||
|
||||
# Step 2 and step 3: we start with the last element. So i is -1.
|
||||
i = -1
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
require 'html5lib/treebuilders/base'
|
||||
require 'rubygems'
|
||||
require 'hpricot'
|
||||
require 'forwardable'
|
||||
|
||||
|
@ -26,12 +27,17 @@ module HTML5lib
|
|||
childNodes << node
|
||||
hpricot.children << node.hpricot
|
||||
end
|
||||
if (oldparent = node.hpricot.parent) != nil
|
||||
oldparent.children.delete_at(oldparent.children.index(node.hpricot))
|
||||
end
|
||||
node.hpricot.parent = hpricot
|
||||
node.parent = self
|
||||
end
|
||||
|
||||
def removeChild(node)
|
||||
childNodes.delete(node)
|
||||
hpricot.children.delete_at(hpricot.children.index(node.hpricot))
|
||||
node.hpricot.parent = nil
|
||||
node.parent = nil
|
||||
end
|
||||
|
||||
|
@ -48,6 +54,7 @@ module HTML5lib
|
|||
if node.kind_of?(TextNode) and index > 0 and childNodes[index-1].kind_of?(TextNode)
|
||||
childNodes[index-1].hpricot.content = childNodes[index-1].hpricot.to_s + node.hpricot.to_s
|
||||
else
|
||||
refNode.hpricot.parent.insert_before(node.hpricot,refNode.hpricot)
|
||||
childNodes.insert(index, node)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -4,7 +4,7 @@ require 'forwardable'
|
|||
|
||||
module HTML5lib
|
||||
module TreeBuilders
|
||||
module REXMLTree
|
||||
module REXML
|
||||
|
||||
class Node < Base::Node
|
||||
extend Forwardable
|
||||
|
@ -52,6 +52,7 @@ module HTML5lib
|
|||
childNodes[index-1].rxobj.raw = true
|
||||
else
|
||||
childNodes.insert index, node
|
||||
refNode.rxobj.parent.insert_before(refNode.rxobj,node.rxobj)
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -62,7 +63,7 @@ module HTML5lib
|
|||
|
||||
class Element < Node
|
||||
def self.rxclass
|
||||
REXML::Element
|
||||
::REXML::Element
|
||||
end
|
||||
|
||||
def initialize name
|
||||
|
@ -95,7 +96,7 @@ module HTML5lib
|
|||
|
||||
class Document < Node
|
||||
def self.rxclass
|
||||
REXML::Document
|
||||
::REXML::Document
|
||||
end
|
||||
|
||||
def initialize
|
||||
|
@ -120,7 +121,7 @@ module HTML5lib
|
|||
|
||||
class DocumentType < Node
|
||||
def self.rxclass
|
||||
REXML::DocType
|
||||
::REXML::DocType
|
||||
end
|
||||
|
||||
def printTree indent=0
|
||||
|
@ -145,7 +146,7 @@ module HTML5lib
|
|||
class TextNode < Node
|
||||
def initialize data
|
||||
raw=data.gsub('&','&').gsub('<','<').gsub('>','>')
|
||||
@rxobj = REXML::Text.new(raw, true, nil, true)
|
||||
@rxobj = ::REXML::Text.new(raw, true, nil, true)
|
||||
end
|
||||
|
||||
def printTree indent=0
|
||||
|
@ -155,7 +156,7 @@ module HTML5lib
|
|||
|
||||
class CommentNode < Node
|
||||
def self.rxclass
|
||||
REXML::Comment
|
||||
::REXML::Comment
|
||||
end
|
||||
|
||||
def printTree indent=0
|
||||
|
|
|
@ -78,7 +78,7 @@ module HTML5lib
|
|||
|
||||
class Element < Node
|
||||
def to_s
|
||||
"<%s>" % name
|
||||
"<#{name}>"
|
||||
end
|
||||
|
||||
def printTree indent=0
|
||||
|
|
26
vendor/plugins/HTML5lib/lib/html5lib/treewalkers.rb
vendored
Normal file
26
vendor/plugins/HTML5lib/lib/html5lib/treewalkers.rb
vendored
Normal file
|
@ -0,0 +1,26 @@
|
|||
require 'html5lib/treewalkers/base'
|
||||
|
||||
module HTML5lib
|
||||
module TreeWalkers
|
||||
|
||||
class << self
|
||||
def [](name)
|
||||
case name.to_s.downcase
|
||||
when 'simpletree' then
|
||||
require 'html5lib/treewalkers/simpletree'
|
||||
SimpleTree::TreeWalker
|
||||
when 'rexml' then
|
||||
require 'html5lib/treewalkers/rexml'
|
||||
REXML::TreeWalker
|
||||
when 'hpricot' then
|
||||
require 'html5lib/treewalkers/hpricot'
|
||||
Hpricot::TreeWalker
|
||||
else
|
||||
raise "Unknown TreeWalker #{name}"
|
||||
end
|
||||
end
|
||||
|
||||
alias :getTreeWalker :[]
|
||||
end
|
||||
end
|
||||
end
|
156
vendor/plugins/HTML5lib/lib/html5lib/treewalkers/base.rb
vendored
Normal file
156
vendor/plugins/HTML5lib/lib/html5lib/treewalkers/base.rb
vendored
Normal file
|
@ -0,0 +1,156 @@
|
|||
require 'html5lib/constants'
|
||||
module HTML5lib
|
||||
module TreeWalkers
|
||||
|
||||
module TokenConstructor
|
||||
def error(msg)
|
||||
return {:type => "SerializeError", :data => msg}
|
||||
end
|
||||
|
||||
def normalizeAttrs(attrs)
|
||||
attrs.to_a
|
||||
end
|
||||
|
||||
def emptyTag(name, attrs, hasChildren=false)
|
||||
error(_("Void element has children")) if hasChildren
|
||||
return({:type => :EmptyTag, :name => name, \
|
||||
:data => normalizeAttrs(attrs)})
|
||||
end
|
||||
|
||||
def startTag(name, attrs)
|
||||
return {:type => :StartTag, :name => name, \
|
||||
:data => normalizeAttrs(attrs)}
|
||||
end
|
||||
|
||||
def endTag(name)
|
||||
return {:type => :EndTag, :name => name, :data => []}
|
||||
end
|
||||
|
||||
def text(data)
|
||||
if data =~ /\A([#{SPACE_CHARACTERS.join('')}]+)/m
|
||||
yield({:type => :SpaceCharacters, :data => $1})
|
||||
data = data[$1.length .. -1]
|
||||
return if data.empty?
|
||||
end
|
||||
|
||||
if data =~ /([#{SPACE_CHARACTERS.join('')}]+)\Z/m
|
||||
yield({:type => :Characters, :data => data[0 ... -$1.length]})
|
||||
yield({:type => :SpaceCharacters, :data => $1})
|
||||
else
|
||||
yield({:type => :Characters, :data => data})
|
||||
end
|
||||
end
|
||||
|
||||
def comment(data)
|
||||
return {:type => :Comment, :data => data}
|
||||
end
|
||||
|
||||
def doctype(name)
|
||||
return {:type => :Doctype, :name => name, :data => name.upcase() == "HTML"}
|
||||
end
|
||||
|
||||
def unknown(nodeType)
|
||||
return error(_("Unknown node type: ") + nodeType.to_s)
|
||||
end
|
||||
|
||||
def _(str)
|
||||
str
|
||||
end
|
||||
end
|
||||
|
||||
class Base
|
||||
include TokenConstructor
|
||||
|
||||
def initialize(tree)
|
||||
@tree = tree
|
||||
end
|
||||
|
||||
def each
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
alias walk each
|
||||
end
|
||||
|
||||
class NonRecursiveTreeWalker < TreeWalkers::Base
|
||||
def node_details(node)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def first_child(node)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def next_sibling(node)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def parent(node)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def each
|
||||
currentNode = @tree
|
||||
while currentNode != nil
|
||||
details = node_details(currentNode)
|
||||
hasChildren = false
|
||||
|
||||
case details.shift
|
||||
when :DOCTYPE
|
||||
yield doctype(*details)
|
||||
|
||||
when :TEXT
|
||||
text(*details) {|token| yield token}
|
||||
|
||||
when :ELEMENT
|
||||
name, attributes, hasChildren = details
|
||||
if VOID_ELEMENTS.include?(name)
|
||||
yield emptyTag(name, attributes.to_a, hasChildren)
|
||||
hasChildren = false
|
||||
else
|
||||
yield startTag(name, attributes.to_a)
|
||||
end
|
||||
|
||||
when :COMMENT
|
||||
yield comment(details[0])
|
||||
|
||||
when :DOCUMENT, :DOCUMENT_FRAGMENT
|
||||
hasChildren = true
|
||||
|
||||
when nil
|
||||
# ignore (REXML::XMLDecl is an example)
|
||||
|
||||
else
|
||||
yield unknown(details[0])
|
||||
end
|
||||
|
||||
firstChild = hasChildren ? first_child(currentNode) : nil
|
||||
if firstChild != nil
|
||||
currentNode = firstChild
|
||||
else
|
||||
while currentNode != nil
|
||||
details = node_details(currentNode)
|
||||
if details.shift == :ELEMENT
|
||||
name, attributes, hasChildren = details
|
||||
yield endTag(name) if !VOID_ELEMENTS.include?(name)
|
||||
end
|
||||
|
||||
if @tree == currentNode
|
||||
currentNode = nil
|
||||
else
|
||||
nextSibling = next_sibling(currentNode)
|
||||
if nextSibling != nil
|
||||
currentNode = nextSibling
|
||||
break
|
||||
end
|
||||
|
||||
currentNode = parent(currentNode)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
48
vendor/plugins/HTML5lib/lib/html5lib/treewalkers/hpricot.rb
vendored
Normal file
48
vendor/plugins/HTML5lib/lib/html5lib/treewalkers/hpricot.rb
vendored
Normal file
|
@ -0,0 +1,48 @@
|
|||
require 'html5lib/treewalkers/base'
|
||||
require 'rexml/document'
|
||||
|
||||
module HTML5lib
|
||||
module TreeWalkers
|
||||
module Hpricot
|
||||
class TreeWalker < HTML5lib::TreeWalkers::NonRecursiveTreeWalker
|
||||
|
||||
def node_details(node)
|
||||
case node
|
||||
when ::Hpricot::Elem
|
||||
if node.name.empty?
|
||||
[:DOCUMENT_FRAGMENT]
|
||||
else
|
||||
[:ELEMENT, node.name,
|
||||
node.attributes.map {|name,value| [name,value]},
|
||||
!node.empty?]
|
||||
end
|
||||
when ::Hpricot::Text
|
||||
[:TEXT, node.to_plain_text]
|
||||
when ::Hpricot::Comment
|
||||
[:COMMENT, node.content]
|
||||
when ::Hpricot::Doc
|
||||
[:DOCUMENT]
|
||||
when ::Hpricot::DocType
|
||||
[:DOCTYPE, node.target]
|
||||
when ::Hpricot::XMLDecl
|
||||
[nil]
|
||||
else
|
||||
[:UNKNOWN, node.class.inspect]
|
||||
end
|
||||
end
|
||||
|
||||
def first_child(node)
|
||||
node.children.first
|
||||
end
|
||||
|
||||
def next_sibling(node)
|
||||
node.next_node
|
||||
end
|
||||
|
||||
def parent(node)
|
||||
node.parent
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
48
vendor/plugins/HTML5lib/lib/html5lib/treewalkers/rexml.rb
vendored
Normal file
48
vendor/plugins/HTML5lib/lib/html5lib/treewalkers/rexml.rb
vendored
Normal file
|
@ -0,0 +1,48 @@
|
|||
require 'html5lib/treewalkers/base'
|
||||
require 'rexml/document'
|
||||
|
||||
module HTML5lib
|
||||
module TreeWalkers
|
||||
module REXML
|
||||
class TreeWalker < HTML5lib::TreeWalkers::NonRecursiveTreeWalker
|
||||
|
||||
def node_details(node)
|
||||
case node
|
||||
when ::REXML::Document
|
||||
[:DOCUMENT]
|
||||
when ::REXML::Element
|
||||
if !node.name
|
||||
[:DOCUMENT_FRAGMENT]
|
||||
else
|
||||
[:ELEMENT, node.name,
|
||||
node.attributes.map {|name,value| [name,value]},
|
||||
node.has_elements? || node.has_text?]
|
||||
end
|
||||
when ::REXML::Text
|
||||
[:TEXT, node.value]
|
||||
when ::REXML::Comment
|
||||
[:COMMENT, node.string]
|
||||
when ::REXML::DocType
|
||||
[:DOCTYPE, node.name]
|
||||
when ::REXML::XMLDecl
|
||||
[nil]
|
||||
else
|
||||
[:UNKNOWN, node.class.inspect]
|
||||
end
|
||||
end
|
||||
|
||||
def first_child(node)
|
||||
node.children.first
|
||||
end
|
||||
|
||||
def next_sibling(node)
|
||||
node.next_sibling
|
||||
end
|
||||
|
||||
def parent(node)
|
||||
node.parent
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
48
vendor/plugins/HTML5lib/lib/html5lib/treewalkers/simpletree.rb
vendored
Normal file
48
vendor/plugins/HTML5lib/lib/html5lib/treewalkers/simpletree.rb
vendored
Normal file
|
@ -0,0 +1,48 @@
|
|||
require 'html5lib/treewalkers/base'
|
||||
|
||||
module HTML5lib
|
||||
module TreeWalkers
|
||||
module SimpleTree
|
||||
class TreeWalker < HTML5lib::TreeWalkers::Base
|
||||
include HTML5lib::TreeBuilders::SimpleTree
|
||||
|
||||
def walk(node)
|
||||
case node
|
||||
when Document, DocumentFragment
|
||||
return
|
||||
|
||||
when DocumentType
|
||||
yield doctype(node.name)
|
||||
|
||||
when TextNode
|
||||
text(node.value) {|token| yield token}
|
||||
|
||||
when Element
|
||||
if VOID_ELEMENTS.include?(node.name)
|
||||
yield emptyTag(node.name, node.attributes, node.hasContent())
|
||||
else
|
||||
yield startTag(node.name, node.attributes)
|
||||
for child in node.childNodes
|
||||
walk(child) {|token| yield token}
|
||||
end
|
||||
yield endTag(node.name)
|
||||
end
|
||||
|
||||
when CommentNode
|
||||
yield comment(node.value)
|
||||
|
||||
else
|
||||
puts '?'
|
||||
yield unknown(node.class)
|
||||
end
|
||||
end
|
||||
|
||||
def each
|
||||
for child in @tree.childNodes
|
||||
walk(child) {|node| yield node}
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
213
vendor/plugins/HTML5lib/parse.rb
vendored
Executable file
213
vendor/plugins/HTML5lib/parse.rb
vendored
Executable file
|
@ -0,0 +1,213 @@
|
|||
#!/usr/bin/env ruby
|
||||
#
|
||||
# Parse a document to a simpletree tree, with optional profiling
|
||||
|
||||
$:.unshift File.dirname(__FILE__),'lib'
|
||||
|
||||
def parse(opts, args)
|
||||
encoding = nil
|
||||
|
||||
f = args[-1]
|
||||
if f
|
||||
begin
|
||||
if f[0..6] == 'http://'
|
||||
require 'open-uri'
|
||||
f = URI.parse(f).open
|
||||
encoding = f.charset
|
||||
elsif f == '-'
|
||||
f = $stdin
|
||||
else
|
||||
f = open(f)
|
||||
end
|
||||
rescue
|
||||
end
|
||||
else
|
||||
$stderr.write("No filename provided. Use -h for help\n")
|
||||
exit(1)
|
||||
end
|
||||
|
||||
require 'html5lib/treebuilders'
|
||||
treebuilder = HTML5lib::TreeBuilders[opts.treebuilder]
|
||||
|
||||
if opts.output == :xml
|
||||
require 'html5lib/liberalxmlparser'
|
||||
p = HTML5lib::XHTMLParser.new(:tree=>treebuilder)
|
||||
else
|
||||
require 'html5lib/html5parser'
|
||||
p = HTML5lib::HTMLParser.new(:tree=>treebuilder)
|
||||
end
|
||||
|
||||
if opts.parsemethod == :parse
|
||||
args = [f, encoding]
|
||||
else
|
||||
args = [f, 'div', encoding]
|
||||
end
|
||||
|
||||
if opts.profile
|
||||
require 'profiler'
|
||||
Profiler__::start_profile
|
||||
p.send(opts.parsemethod, *args)
|
||||
Profiler__::stop_profile
|
||||
Profiler__::print_profile($stderr)
|
||||
elsif opts.time
|
||||
require 'time'
|
||||
t0 = Time.new
|
||||
document = p.send(opts.parsemethod, *args)
|
||||
t1 = Time.new
|
||||
printOutput(p, document, opts)
|
||||
t2 = Time.new
|
||||
puts "\n\nRun took: %fs (plus %fs to print the output)"%[t1-t0, t2-t1]
|
||||
else
|
||||
document = p.send(opts.parsemethod, *args)
|
||||
printOutput(p, document, opts)
|
||||
end
|
||||
end
|
||||
|
||||
def printOutput(parser, document, opts)
|
||||
puts "Encoding: #{parser.tokenizer.stream.char_encoding}" if opts.encoding
|
||||
|
||||
case opts.output
|
||||
when :xml
|
||||
print document
|
||||
when :html
|
||||
require 'html5lib/treewalkers'
|
||||
tokens = HTML5lib::TreeWalkers[opts.treebuilder].new(document)
|
||||
require 'html5lib/serializer'
|
||||
puts HTML5lib::HTMLSerializer.serialize(tokens, opts.serializer)
|
||||
when :hilite
|
||||
print document.hilite
|
||||
when :tree
|
||||
document = [document] unless document.respond_to?(:each)
|
||||
document.each {|fragment| puts parser.tree.testSerializer(fragment)}
|
||||
end
|
||||
|
||||
if opts.error
|
||||
errList=[]
|
||||
for pos, message in parser.errors
|
||||
errList << ("Line %i Col %i"%pos + " " + message)
|
||||
end
|
||||
$stdout.write("\nParse errors:\n" + errList.join("\n")+"\n")
|
||||
end
|
||||
end
|
||||
|
||||
require 'ostruct'
|
||||
options = OpenStruct.new
|
||||
options.profile = false
|
||||
options.time = false
|
||||
options.output = :html
|
||||
options.treebuilder = 'simpletree'
|
||||
options.error = false
|
||||
options.encoding = false
|
||||
options.parsemethod = :parse
|
||||
options.serializer = {
|
||||
:encoding => 'utf-8',
|
||||
:omit_optional_tags => false,
|
||||
:inject_meta_charset => false
|
||||
}
|
||||
|
||||
require 'optparse'
|
||||
opts = OptionParser.new do |opts|
|
||||
opts.separator ""
|
||||
opts.separator "Parse Options:"
|
||||
|
||||
opts.on("-b", "--treebuilder NAME") do |treebuilder|
|
||||
options.treebuilder = treebuilder
|
||||
end
|
||||
|
||||
opts.on("-f", "--fragment", "Parse as a fragment") do |parse|
|
||||
options.parsemethod = :parseFragment
|
||||
end
|
||||
|
||||
opts.separator ""
|
||||
opts.separator "Filter Options:"
|
||||
|
||||
opts.on("--[no-]inject-meta-charset", "inject <meta charset>") do |inject|
|
||||
options.serializer[:inject_meta_charset] = inject
|
||||
end
|
||||
|
||||
opts.on("--[no-]strip-whitespace", "strip unnecessary whitespace") do |strip|
|
||||
options.serializer[:strip_whitespace] = strip
|
||||
end
|
||||
|
||||
opts.on("--[no-]sanitize", "escape unsafe tags") do |sanitize|
|
||||
options.serializer[:sanitize] = sanitize
|
||||
end
|
||||
|
||||
opts.separator ""
|
||||
opts.separator "Output Options:"
|
||||
|
||||
opts.on("--tree", "output as debug tree") do |tree|
|
||||
options.output = :tree
|
||||
end
|
||||
|
||||
opts.on("-x", "--xml", "output as xml") do |xml|
|
||||
options.output = :xml
|
||||
options.treebuilder = "rexml"
|
||||
end
|
||||
|
||||
opts.on("--[no-]html", "Output as html") do |html|
|
||||
options.output = (html ? :html : nil)
|
||||
end
|
||||
|
||||
opts.on("--hilite", "Output as formatted highlighted code.") do |hilite|
|
||||
options.output = :hilite
|
||||
end
|
||||
|
||||
opts.on("-e", "--error", "Print a list of parse errors") do |error|
|
||||
options.error = error
|
||||
end
|
||||
|
||||
opts.separator ""
|
||||
opts.separator "Serialization Options:"
|
||||
|
||||
opts.on("--[no-]omit-optional-tags", "Omit optional tags") do |omit|
|
||||
options.serializer[:omit_optional_tags] = omit
|
||||
end
|
||||
|
||||
opts.on("--[no-]quote-attr-values", "Quote attribute values") do |quote|
|
||||
options.serializer[:quote_attr_values] = quote
|
||||
end
|
||||
|
||||
opts.on("--[no-]use-best-quote-char", "Use best quote character") do |best|
|
||||
options.serializer[:use_best_quote_char] = best
|
||||
end
|
||||
|
||||
opts.on("--quote-char C", "Use specified quote character") do |c|
|
||||
options.serializer[:quote_char] = c
|
||||
end
|
||||
|
||||
opts.on("--[no-]minimize-boolean-attributes", "Minimize boolean attributes") do |min|
|
||||
options.serializer[:minimize_boolean_attributes] = min
|
||||
end
|
||||
|
||||
opts.on("--[no-]use-trailing-solidus", "Use trailing solidus") do |slash|
|
||||
options.serializer[:use_trailing_solidus] = slash
|
||||
end
|
||||
|
||||
opts.on("--[no-]escape-lt-in-attrs", "Escape less than signs in attribute values") do |lt|
|
||||
options.serializer[:escape_lt_in_attrs] = lt
|
||||
end
|
||||
|
||||
opts.separator ""
|
||||
opts.separator "Other Options:"
|
||||
|
||||
opts.on("-p", "--[no-]profile", "Profile the run") do |profile|
|
||||
options.profile = profile
|
||||
end
|
||||
|
||||
opts.on("-t", "--[no-]time", "Time the run") do |time|
|
||||
options.time = time
|
||||
end
|
||||
|
||||
opts.on("-c", "--[no-]encoding", "Print character encoding used") do |encoding|
|
||||
options.encoding = encoding
|
||||
end
|
||||
|
||||
opts.on_tail("-h", "--help", "Show this message") do
|
||||
puts opts
|
||||
exit
|
||||
end
|
||||
end
|
||||
|
||||
opts.parse!(ARGV)
|
||||
parse options, ARGV
|
51
vendor/plugins/HTML5lib/testdata/encoding/chardet/test_big5.txt
vendored
Normal file
51
vendor/plugins/HTML5lib/testdata/encoding/chardet/test_big5.txt
vendored
Normal file
|
@ -0,0 +1,51 @@
|
|||
老子《道德經》 第一~四十章
|
||||
|
||||
老子道經
|
||||
|
||||
第一章
|
||||
|
||||
道可道,非常道。名可名,非常名。無,名天地之始﹔有,名萬物之母。
|
||||
故常無,欲以觀其妙;常有,欲以觀其徼。此兩者,同出而異名,同謂之
|
||||
玄。玄之又玄,眾妙之門。
|
||||
|
||||
第二章
|
||||
|
||||
天下皆知美之為美,斯惡矣﹔皆知善之為善,斯不善矣。故有無相生,難
|
||||
易相成,長短相形,高下相傾,音聲相和,前後相隨。是以聖人處「無為
|
||||
」之事,行「不言」之教。萬物作焉而不辭,生而不有,為而不恃,功成
|
||||
而弗居。夫唯弗居,是以不去。
|
||||
|
||||
第三章
|
||||
|
||||
不尚賢,使民不爭﹔不貴難得之貨,使民不為盜﹔不見可欲,使民心不亂
|
||||
。是以「聖人」之治,虛其心,實其腹,弱其志,強其骨。常使民無知無
|
||||
欲。使夫智者不敢為也。為「無為」,則無不治。
|
||||
|
||||
第四章
|
||||
|
||||
「道」沖,而用之或不盈。淵兮,似萬物之宗﹔挫其銳,解其紛,和其光
|
||||
,同其塵﹔湛兮似或存。吾不知誰之子?象帝之先。
|
||||
|
||||
第五章
|
||||
|
||||
天地不仁,以萬物為芻狗﹔聖人不仁,以百姓為芻狗。天地之間,其猶橐
|
||||
蘥乎?虛而不屈,動而愈出。多言數窮,不如守中。
|
||||
|
||||
第六章
|
||||
|
||||
谷神不死,是謂玄牝。玄牝之門,是謂天地根。綿綿若存,用之不勤。
|
||||
|
||||
第七章
|
||||
|
||||
天長地久。天地所以能長且久者,以其不自生,故能長久。是以聖人後其
|
||||
身而身先,外其身而身存。非以其無私邪?故能成其私。
|
||||
|
||||
第八章
|
||||
|
||||
上善若水。水善利萬物而不爭。處眾人之所惡,故幾於道。居善地,心善
|
||||
淵,與善仁,言善信,政善治,事善能,動善時。夫唯不爭,故無尤。
|
||||
|
||||
第九章
|
||||
|
||||
持而盈之,不如其已﹔揣而銳之,不可長保。金玉滿堂,莫之能守﹔富貴
|
||||
而驕,自遺其咎。功遂身退,天之道。
|
10
vendor/plugins/HTML5lib/testdata/encoding/test-yahoo-jp.dat
vendored
Normal file
10
vendor/plugins/HTML5lib/testdata/encoding/test-yahoo-jp.dat
vendored
Normal file
|
@ -0,0 +1,10 @@
|
|||
#data
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=euc-jp">
|
||||
<!--京-->
|
||||
<title>Yahoo! JAPAN</title>
|
||||
<meta name="description" content="日本最大級のポータルサイト。検索、オークション、ニュース、メール、コミュニティ、ショッピング、など80以上のサービスを展開。あなたの生活をより豊かにする「ライフ・エンジン」を目指していきます。">
|
||||
<style type="text/css" media="all">
|
||||
#encoding
|
||||
euc-jp
|
394
vendor/plugins/HTML5lib/testdata/encoding/tests1.dat
vendored
Normal file
394
vendor/plugins/HTML5lib/testdata/encoding/tests1.dat
vendored
Normal file
File diff suppressed because one or more lines are too long
82
vendor/plugins/HTML5lib/testdata/encoding/tests2.dat
vendored
Normal file
82
vendor/plugins/HTML5lib/testdata/encoding/tests2.dat
vendored
Normal file
|
@ -0,0 +1,82 @@
|
|||
#data
|
||||
<meta
|
||||
#encoding
|
||||
windows-1252
|
||||
|
||||
#data
|
||||
<
|
||||
#encoding
|
||||
windows-1252
|
||||
|
||||
#data
|
||||
<!
|
||||
#encoding
|
||||
windows-1252
|
||||
|
||||
#data
|
||||
<meta charset = "
|
||||
#encoding
|
||||
windows-1252
|
||||
|
||||
#data
|
||||
<meta charset=EUC-jp
|
||||
#encoding
|
||||
windows-1252
|
||||
|
||||
#data
|
||||
<meta <meta charset='EUC-jp'>
|
||||
#encoding
|
||||
EUC-jp
|
||||
|
||||
#data
|
||||
<meta charset = 'EUC-jp'>
|
||||
#encoding
|
||||
EUC-jp
|
||||
|
||||
|
||||
#data
|
||||
<!-- -->
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||||
#encoding
|
||||
utf-8
|
||||
|
||||
#data
|
||||
<!-- -->
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf
|
||||
#encoding
|
||||
windows-1252
|
||||
|
||||
#data
|
||||
<meta http-equiv="Content-Type<meta charset="utf-8">
|
||||
#encoding
|
||||
windows-1252
|
||||
|
||||
#data
|
||||
<meta http-equiv="Content-Type" content="text/html; charset='utf-8'">
|
||||
#encoding
|
||||
utf-8
|
||||
|
||||
#data
|
||||
<meta http-equiv="Content-Type" content="text/html; charset='utf-8">
|
||||
#encoding
|
||||
windows-1252
|
||||
|
||||
#data
|
||||
<meta
|
||||
#encoding
|
||||
windows-1252
|
||||
|
||||
#data
|
||||
<meta charset =
|
||||
#encoding
|
||||
windows-1252
|
||||
|
||||
#data
|
||||
<meta charset= utf-8
|
||||
#encoding
|
||||
windows-1252
|
||||
|
||||
#data
|
||||
<meta content = "text/html;
|
||||
#encoding
|
||||
windows-1252
|
409
vendor/plugins/HTML5lib/testdata/sanitizer/tests1.dat
vendored
Normal file
409
vendor/plugins/HTML5lib/testdata/sanitizer/tests1.dat
vendored
Normal file
|
@ -0,0 +1,409 @@
|
|||
[
|
||||
{
|
||||
"name": "IE_Comments",
|
||||
"input": "<!--[if gte IE 4]><script>alert('XSS');</script><![endif]-->",
|
||||
"output": ""
|
||||
},
|
||||
|
||||
{
|
||||
"name": "IE_Comments_2",
|
||||
"input": "<![if !IE 5]><script>alert('XSS');</script><![endif]>",
|
||||
"output": "<script>alert('XSS');</script>",
|
||||
"rexml": "Ill-formed XHTML!"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "allow_colons_in_path_component",
|
||||
"input": "<a href=\"./this:that\">foo</a>",
|
||||
"output": "<a href='./this:that'>foo</a>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "background_attribute",
|
||||
"input": "<div background=\"javascript:alert('XSS')\"></div>",
|
||||
"output": "<div/>",
|
||||
"xhtml": "<div></div>",
|
||||
"rexml": "<div></div>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "bgsound",
|
||||
"input": "<bgsound src=\"javascript:alert('XSS');\" />",
|
||||
"output": "<bgsound src=\"javascript:alert('XSS');\"/>",
|
||||
"rexml": "<bgsound src=\"javascript:alert('XSS');\"></bgsound>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "div_background_image_unicode_encoded",
|
||||
"input": "<div style=\"background-image:\u00a5\u00a2\u006C\u0028'\u006a\u0061\u00a6\u0061\u00a3\u0063\u00a2\u0069\u00a0\u00a4\u003a\u0061\u006c\u0065\u00a2\u00a4\u0028.1027\u0058.1053\u0053\u0027\u0029'\u0029\">foo</div>",
|
||||
"output": "<div style=''>foo</div>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "div_expression",
|
||||
"input": "<div style=\"width: expression(alert('XSS'));\">foo</div>",
|
||||
"output": "<div style=''>foo</div>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "double_open_angle_brackets",
|
||||
"input": "<img src=http://ha.ckers.org/scriptlet.html <",
|
||||
"output": "<img src='http://ha.ckers.org/scriptlet.html'/>",
|
||||
"rexml": "Ill-formed XHTML!"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "double_open_angle_brackets_2",
|
||||
"input": "<script src=http://ha.ckers.org/scriptlet.html <",
|
||||
"output": "<script src=\"http://ha.ckers.org/scriptlet.html\" <=\"\">",
|
||||
"rexml": "Ill-formed XHTML!"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "grave_accents",
|
||||
"input": "<img src=`javascript:alert('XSS')` />",
|
||||
"output": "<img/>",
|
||||
"rexml": "Ill-formed XHTML!"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "img_dynsrc_lowsrc",
|
||||
"input": "<img dynsrc=\"javascript:alert('XSS')\" />",
|
||||
"output": "<img/>",
|
||||
"rexml": "<img />"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "img_vbscript",
|
||||
"input": "<img src='vbscript:msgbox(\"XSS\")' />",
|
||||
"output": "<img/>",
|
||||
"rexml": "<img />"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "input_image",
|
||||
"input": "<input type=\"image\" src=\"javascript:alert('XSS');\" />",
|
||||
"output": "<input type='image'/>",
|
||||
"rexml": "<input type='image' />"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "link_stylesheets",
|
||||
"input": "<link rel=\"stylesheet\" href=\"javascript:alert('XSS');\" />",
|
||||
"output": "<link rel=\"stylesheet\" href=\"javascript:alert('XSS');\"/>",
|
||||
"rexml": "<link href=\"javascript:alert('XSS');\" rel=\"stylesheet\"/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "link_stylesheets_2",
|
||||
"input": "<link rel=\"stylesheet\" href=\"http://ha.ckers.org/xss.css\" />",
|
||||
"output": "<link rel=\"stylesheet\" href=\"http://ha.ckers.org/xss.css\"/>",
|
||||
"rexml": "<link href=\"http://ha.ckers.org/xss.css\" rel=\"stylesheet\"/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "list_style_image",
|
||||
"input": "<li style=\"list-style-image: url(javascript:alert('XSS'))\">foo</li>",
|
||||
"output": "<li style=''>foo</li>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "no_closing_script_tags",
|
||||
"input": "<script src=http://ha.ckers.org/xss.js?<b>",
|
||||
"output": "<script src=\"http://ha.ckers.org/xss.js?&lt;b\">",
|
||||
"rexml": "Ill-formed XHTML!"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "non_alpha_non_digit",
|
||||
"input": "<script/XSS src=\"http://ha.ckers.org/xss.js\"></script>",
|
||||
"output": "<script XSS=\"\" src=\"http://ha.ckers.org/xss.js\"></script>",
|
||||
"rexml": "Ill-formed XHTML!"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "non_alpha_non_digit_2",
|
||||
"input": "<a onclick!\\#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>foo</a>",
|
||||
"output": "<a>foo</a>",
|
||||
"rexml": "Ill-formed XHTML!"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "non_alpha_non_digit_3",
|
||||
"input": "<img/src=\"http://ha.ckers.org/xss.js\"/>",
|
||||
"output": "<img src='http://ha.ckers.org/xss.js'/>",
|
||||
"rexml": "Ill-formed XHTML!"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "non_alpha_non_digit_II",
|
||||
"input": "<a href!\\#$%&()*~+-_.,:;?@[/|]^`=alert('XSS')>foo</a>",
|
||||
"output": "<a>foo</a>",
|
||||
"rexml": "Ill-formed XHTML!"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "non_alpha_non_digit_III",
|
||||
"input": "<a/href=\"javascript:alert('XSS');\">foo</a>",
|
||||
"output": "<a>foo</a>",
|
||||
"rexml": "Ill-formed XHTML!"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "platypus",
|
||||
"input": "<a href=\"http://www.ragingplatypus.com/\" style=\"display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;\">never trust your upstream platypus</a>",
|
||||
"output": "<a href='http://www.ragingplatypus.com/' style='display: block; width: 100%; height: 100%; background-color: black; background-x: center; background-y: center;'>never trust your upstream platypus</a>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "protocol_resolution_in_script_tag",
|
||||
"input": "<script src=//ha.ckers.org/.j></script>",
|
||||
"output": "<script src=\"//ha.ckers.org/.j\"></script>",
|
||||
"rexml": "Ill-formed XHTML!"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_allow_anchors",
|
||||
"input": "<a href='foo' onclick='bar'><script>baz</script></a>",
|
||||
"output": "<a href='foo'><script>baz</script></a>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_allow_image_alt_attribute",
|
||||
"input": "<img alt='foo' onclick='bar' />",
|
||||
"output": "<img alt='foo'/>",
|
||||
"rexml": "<img alt='foo' />"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_allow_image_height_attribute",
|
||||
"input": "<img height='foo' onclick='bar' />",
|
||||
"output": "<img height='foo'/>",
|
||||
"rexml": "<img height='foo' />"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_allow_image_src_attribute",
|
||||
"input": "<img src='foo' onclick='bar' />",
|
||||
"output": "<img src='foo'/>",
|
||||
"rexml": "<img src='foo' />"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_allow_image_width_attribute",
|
||||
"input": "<img width='foo' onclick='bar' />",
|
||||
"output": "<img width='foo'/>",
|
||||
"rexml": "<img width='foo' />"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_handle_blank_text",
|
||||
"input": "",
|
||||
"output": ""
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_handle_malformed_image_tags",
|
||||
"input": "<img \"\"\"><script>alert(\"XSS\")</script>\">",
|
||||
"output": "<img/><script>alert(\"XSS\")</script>\">",
|
||||
"rexml": "Ill-formed XHTML!"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_handle_non_html",
|
||||
"input": "abc",
|
||||
"output": "abc"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_ridiculous_hack",
|
||||
"input": "<img\nsrc\n=\n\"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n\"\n />",
|
||||
"output": "<img/>",
|
||||
"rexml": "<img />"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_0",
|
||||
"input": "<img src=\"javascript:alert('XSS');\" />",
|
||||
"output": "<img/>",
|
||||
"rexml": "<img />"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_1",
|
||||
"input": "<img src=javascript:alert('XSS') />",
|
||||
"output": "<img/>",
|
||||
"rexml": "Ill-formed XHTML!"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_10",
|
||||
"input": "<img src=\"jav
ascript:alert('XSS');\" />",
|
||||
"output": "<img/>",
|
||||
"rexml": "<img />"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_11",
|
||||
"input": "<img src=\"jav
ascript:alert('XSS');\" />",
|
||||
"output": "<img/>",
|
||||
"rexml": "<img />"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_12",
|
||||
"input": "<img src=\"  javascript:alert('XSS');\" />",
|
||||
"output": "<img/>",
|
||||
"rexml": "<img />"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_13",
|
||||
"input": "<img src=\" javascript:alert('XSS');\" />",
|
||||
"output": "<img/>",
|
||||
"rexml": "<img />"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_14",
|
||||
"input": "<img src=\" javascript:alert('XSS');\" />",
|
||||
"output": "<img/>",
|
||||
"rexml": "<img />"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_2",
|
||||
"input": "<img src=\"JaVaScRiPt:alert('XSS')\" />",
|
||||
"output": "<img/>",
|
||||
"rexml": "<img />"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_3",
|
||||
"input": "<img src='javascript:alert("XSS")' />",
|
||||
"output": "<img/>",
|
||||
"rexml": "<img />"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_4",
|
||||
"input": "<img src='javascript:alert(String.fromCharCode(88,83,83))' />",
|
||||
"output": "<img/>",
|
||||
"rexml": "<img />"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_5",
|
||||
"input": "<img src='javascript:alert('XSS')' />",
|
||||
"output": "<img/>",
|
||||
"rexml": "<img />"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_6",
|
||||
"input": "<img src='javascript:alert('XSS')' />",
|
||||
"output": "<img/>",
|
||||
"rexml": "<img />"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_7",
|
||||
"input": "<img src='javascript:alert('XSS')' />",
|
||||
"output": "<img/>",
|
||||
"rexml": "<img />"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_8",
|
||||
"input": "<img src=\"jav\tascript:alert('XSS');\" />",
|
||||
"output": "<img/>",
|
||||
"rexml": "<img />"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_9",
|
||||
"input": "<img src=\"jav	ascript:alert('XSS');\" />",
|
||||
"output": "<img/>",
|
||||
"rexml": "<img />"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_sanitize_half_open_scripts",
|
||||
"input": "<img src=\"javascript:alert('XSS')\"",
|
||||
"output": "<img/>",
|
||||
"rexml": "Ill-formed XHTML!"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_sanitize_invalid_script_tag",
|
||||
"input": "<script/XSS SRC=\"http://ha.ckers.org/xss.js\"></script>",
|
||||
"output": "<script XSS=\"\" SRC=\"http://ha.ckers.org/xss.js\"></script>",
|
||||
"rexml": "Ill-formed XHTML!"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_sanitize_script_tag_with_multiple_open_brackets",
|
||||
"input": "<<script>alert(\"XSS\");//<</script>",
|
||||
"output": "<<script>alert(\"XSS\");//<</script>",
|
||||
"rexml": "Ill-formed XHTML!"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_sanitize_script_tag_with_multiple_open_brackets_2",
|
||||
"input": "<iframe src=http://ha.ckers.org/scriptlet.html\n<",
|
||||
"output": "<iframe src=\"http://ha.ckers.org/scriptlet.html\" <=\"\">",
|
||||
"rexml": "Ill-formed XHTML!"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_sanitize_tag_broken_up_by_null",
|
||||
"input": "<scr\u0000ipt>alert(\"XSS\")</scr\u0000ipt>",
|
||||
"output": "<scr\ufffdipt>alert(\"XSS\")</scr\ufffdipt>",
|
||||
"rexml": "Ill-formed XHTML!"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_sanitize_unclosed_script",
|
||||
"input": "<script src=http://ha.ckers.org/xss.js?<b>",
|
||||
"output": "<script src=\"http://ha.ckers.org/xss.js?&lt;b\">",
|
||||
"rexml": "Ill-formed XHTML!"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_strip_href_attribute_in_a_with_bad_protocols",
|
||||
"input": "<a href=\"javascript:XSS\" title=\"1\">boo</a>",
|
||||
"output": "<a title='1'>boo</a>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_strip_href_attribute_in_a_with_bad_protocols_and_whitespace",
|
||||
"input": "<a href=\" javascript:XSS\" title=\"1\">boo</a>",
|
||||
"output": "<a title='1'>boo</a>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_strip_src_attribute_in_img_with_bad_protocols",
|
||||
"input": "<img src=\"javascript:XSS\" title=\"1\">boo</img>",
|
||||
"output": "<img title='1'/>boo",
|
||||
"rexml": "<img title='1' />"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_strip_src_attribute_in_img_with_bad_protocols_and_whitespace",
|
||||
"input": "<img src=\" javascript:XSS\" title=\"1\">boo</img>",
|
||||
"output": "<img title='1'/>boo",
|
||||
"rexml": "<img title='1' />"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "xml_base",
|
||||
"input": "<div xml:base=\"javascript:alert('XSS');//\">foo</div>",
|
||||
"output": "<div>foo</div>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "xul",
|
||||
"input": "<p style=\"-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')\">fubar</p>",
|
||||
"output": "<p style=''>fubar</p>"
|
||||
}
|
||||
]
|
103
vendor/plugins/HTML5lib/testdata/serializer/core.test
vendored
Normal file
103
vendor/plugins/HTML5lib/testdata/serializer/core.test
vendored
Normal file
|
@ -0,0 +1,103 @@
|
|||
{"tests": [
|
||||
|
||||
{"description": "proper attribute value escaping",
|
||||
"input": [["StartTag", "span", {"title": "test \"with\" ""}]],
|
||||
"expected": ["<span title='test \"with\" &quot;'>"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value non-quoting",
|
||||
"input": [["StartTag", "span", {"title": "foo"}]],
|
||||
"expected": ["<span title=foo>"],
|
||||
"xhtml": ["<span title=\"foo\">"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value quoting (with >)",
|
||||
"input": [["StartTag", "span", {"title": "foo>bar"}]],
|
||||
"expected": ["<span title=\"foo>bar\">"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value quoting (with <)",
|
||||
"input": [["StartTag", "span", {"title": "foo<bar"}]],
|
||||
"expected": ["<span title=\"foo<bar\">"],
|
||||
"xhtml": ["<span title=\"foo<bar\">"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value quoting (with \")",
|
||||
"input": [["StartTag", "span", {"title": "foo\"bar"}]],
|
||||
"expected": ["<span title='foo\"bar'>"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value quoting (with ')",
|
||||
"input": [["StartTag", "span", {"title": "foo'bar"}]],
|
||||
"expected": ["<span title=\"foo'bar\">"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value quoting (with both \" and ')",
|
||||
"input": [["StartTag", "span", {"title": "foo'bar\"baz"}]],
|
||||
"expected": ["<span title=\"foo'bar"baz\">"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value quoting (with space)",
|
||||
"input": [["StartTag", "span", {"title": "foo bar"}]],
|
||||
"expected": ["<span title=\"foo bar\">"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value quoting (with tab)",
|
||||
"input": [["StartTag", "span", {"title": "foo\tbar"}]],
|
||||
"expected": ["<span title=\"foo\tbar\">"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value quoting (with LF)",
|
||||
"input": [["StartTag", "span", {"title": "foo\nbar"}]],
|
||||
"expected": ["<span title=\"foo\nbar\">"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value quoting (with CR)",
|
||||
"input": [["StartTag", "span", {"title": "foo\rbar"}]],
|
||||
"expected": ["<span title=\"foo\rbar\">"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value quoting (with linetab)",
|
||||
"input": [["StartTag", "span", {"title": "foo\u000Bbar"}]],
|
||||
"expected": ["<span title=\"foo\u000Bbar\">"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value quoting (with form feed)",
|
||||
"input": [["StartTag", "span", {"title": "foo\u000Cbar"}]],
|
||||
"expected": ["<span title=\"foo\u000Cbar\">"]
|
||||
},
|
||||
|
||||
{"description": "void element (as EmptyTag token)",
|
||||
"input": [["EmptyTag", "img", {}]],
|
||||
"expected": ["<img>"],
|
||||
"xhtml": ["<img />"]
|
||||
},
|
||||
|
||||
{"description": "void element (as StartTag token)",
|
||||
"input": [["StartTag", "img", {}]],
|
||||
"expected": ["<img>"],
|
||||
"xhtml": ["<img />"]
|
||||
},
|
||||
|
||||
{"description": "doctype in error",
|
||||
"input": [["Doctype", "foo"]],
|
||||
"expected": ["<!DOCTYPE foo>"]
|
||||
},
|
||||
|
||||
{"description": "character data",
|
||||
"options": {"encoding":"utf-8"},
|
||||
"input": [["Characters", "a<b>c&d"]],
|
||||
"expected": ["a<b>c&d"]
|
||||
},
|
||||
|
||||
{"description": "rcdata",
|
||||
"input": [["StartTag", "script", {}], ["Characters", "a<b>c&d"]],
|
||||
"expected": ["<script>a<b>c&d"]
|
||||
},
|
||||
|
||||
{"description": "doctype",
|
||||
"input": [["Doctype", "HTML"]],
|
||||
"expected": ["<!DOCTYPE HTML>"]
|
||||
}
|
||||
|
||||
]}
|
65
vendor/plugins/HTML5lib/testdata/serializer/injectmeta.test
vendored
Normal file
65
vendor/plugins/HTML5lib/testdata/serializer/injectmeta.test
vendored
Normal file
|
@ -0,0 +1,65 @@
|
|||
{"tests": [
|
||||
|
||||
{"description": "no encoding",
|
||||
"options": {"inject_meta_charset": true},
|
||||
"input": [["EmptyTag", "head", {}]],
|
||||
"expected": ["<head>"]
|
||||
},
|
||||
|
||||
{"description": "empytag head",
|
||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||
"input": [["EmptyTag", "head", {}]],
|
||||
"expected": ["<head><meta charset=utf-8>"],
|
||||
"xhtml": ["<head><meta charset=\"utf-8\" /></head>"]
|
||||
},
|
||||
|
||||
{"description": "head w/title",
|
||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||
"input": [["StartTag", "head", {}], ["StartTag","title",{}], ["Characters", "foo"],["EndTag", "title"], ["EndTag", "head"]],
|
||||
"expected": ["<head><meta charset=utf-8><title>foo</title>"],
|
||||
"xhtml": ["<head><meta charset=\"utf-8\" /><title>foo</title></head>"]
|
||||
},
|
||||
|
||||
{"description": "head w/meta-charset",
|
||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||
"input": [["StartTag", "head", {}], ["EmptyTag","meta",{"charset":"ascii"}], ["EndTag", "head"]],
|
||||
"expected": ["<head><meta charset=utf-8>"],
|
||||
"xhtml": ["<head><meta charset=\"utf-8\" /></head>"]
|
||||
},
|
||||
|
||||
{"description": "head w/ two meta-charset",
|
||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||
"input": [["StartTag", "head", {}], ["EmptyTag","meta",{"charset":"ascii"}], ["EmptyTag","meta",{"charset":"ascii"}], ["EndTag", "head"]],
|
||||
"expected": ["<head><meta charset=utf-8><meta charset=utf-8>", "<head><meta charset=utf-8><meta charset=ascii>"],
|
||||
"xhtml": ["<head><meta charset=\"utf-8\" /><meta charset=\"utf-8\" /></head>", "<head><meta charset=\"utf-8\" /><meta charset=\"ascii\" /></head>"]
|
||||
},
|
||||
|
||||
{"description": "head w/robots",
|
||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||
"input": [["StartTag", "head", {}], ["EmptyTag","meta",{"name":"robots","content":"noindex"}], ["EndTag", "head"]],
|
||||
"expected": ["<head><meta charset=utf-8><meta content=noindex name=robots>"],
|
||||
"xhtml": ["<head><meta charset=\"utf-8\" /><meta content=\"noindex\" name=\"robots\" /></head>"]
|
||||
},
|
||||
|
||||
{"description": "head w/robots & charset",
|
||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||
"input": [["StartTag", "head", {}], ["EmptyTag","meta",{"name":"robots","content":"noindex"}], ["EmptyTag","meta",{"charset":"ascii"}], ["EndTag", "head"]],
|
||||
"expected": ["<head><meta content=noindex name=robots><meta charset=utf-8>"],
|
||||
"xhtml": ["<head><meta content=\"noindex\" name=\"robots\" /><meta charset=\"utf-8\" /></head>"]
|
||||
},
|
||||
|
||||
{"description": "head w/ charset in http-equiv content-type",
|
||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||
"input": [["StartTag", "head", {}], ["EmptyTag","meta",{"http-equiv":"content-type", "content":"text/html; charset=ascii"}], ["EndTag", "head"]],
|
||||
"expected": ["<head><meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"],
|
||||
"xhtml": ["<head><meta content=\"text/html; charset=utf-8\" http-equiv=\"content-type\" /></head>"]
|
||||
},
|
||||
|
||||
{"description": "head w/robots & charset in http-equiv content-type",
|
||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||
"input": [["StartTag", "head", {}], ["EmptyTag","meta",{"name":"robots","content":"noindex"}], ["EmptyTag","meta",{"http-equiv":"content-type", "content":"text/html; charset=ascii"}], ["EndTag", "head"]],
|
||||
"expected": ["<head><meta content=noindex name=robots><meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"],
|
||||
"xhtml": ["<head><meta content=\"noindex\" name=\"robots\" /><meta content=\"text/html; charset=utf-8\" http-equiv=\"content-type\" /></head>"]
|
||||
}
|
||||
|
||||
]}
|
900
vendor/plugins/HTML5lib/testdata/serializer/optionaltags.test
vendored
Normal file
900
vendor/plugins/HTML5lib/testdata/serializer/optionaltags.test
vendored
Normal file
|
@ -0,0 +1,900 @@
|
|||
{"tests": [
|
||||
|
||||
{"description": "html start-tag followed by text, with attributes",
|
||||
"input": [["StartTag", "html", {"lang": "en"}], ["Characters", "foo"]],
|
||||
"expected": ["<html lang=en>foo"]
|
||||
},
|
||||
|
||||
|
||||
|
||||
{"description": "html start-tag followed by comment",
|
||||
"input": [["StartTag", "html", {}], ["Comment", "foo"]],
|
||||
"expected": ["<html><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "html start-tag followed by space character",
|
||||
"input": [["StartTag", "html", {}], ["Characters", " foo"]],
|
||||
"expected": ["<html> foo"]
|
||||
},
|
||||
|
||||
{"description": "html start-tag followed by text",
|
||||
"input": [["StartTag", "html", {}], ["Characters", "foo"]],
|
||||
"expected": ["foo"]
|
||||
},
|
||||
|
||||
{"description": "html start-tag followed by start-tag",
|
||||
"input": [["StartTag", "html", {}], ["StartTag", "foo", {}]],
|
||||
"expected": ["<foo>"]
|
||||
},
|
||||
|
||||
{"description": "html start-tag followed by end-tag",
|
||||
"input": [["StartTag", "html", {}], ["EndTag", "foo", {}]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "html start-tag at EOF (shouldn't ever happen?!)",
|
||||
"input": [["StartTag", "html", {}]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
{"description": "html end-tag followed by comment",
|
||||
"input": [["EndTag", "html"], ["Comment", "foo"]],
|
||||
"expected": ["</html><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "html end-tag followed by space character",
|
||||
"input": [["EndTag", "html"], ["Characters", " foo"]],
|
||||
"expected": ["</html> foo"]
|
||||
},
|
||||
|
||||
{"description": "html end-tag followed by text",
|
||||
"input": [["EndTag", "html"], ["Characters", "foo"]],
|
||||
"expected": ["foo"]
|
||||
},
|
||||
|
||||
{"description": "html end-tag followed by start-tag",
|
||||
"input": [["EndTag", "html"], ["StartTag", "foo", {}]],
|
||||
"expected": ["<foo>"]
|
||||
},
|
||||
|
||||
{"description": "html end-tag followed by end-tag",
|
||||
"input": [["EndTag", "html"], ["EndTag", "foo", {}]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "html end-tag at EOF",
|
||||
"input": [["EndTag", "html"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "head start-tag followed by comment",
|
||||
"input": [["StartTag", "head", {}], ["Comment", "foo"]],
|
||||
"expected": ["<head><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "head start-tag followed by space character",
|
||||
"input": [["StartTag", "head", {}], ["Characters", " foo"]],
|
||||
"expected": ["<head> foo"]
|
||||
},
|
||||
|
||||
{"description": "head start-tag followed by text",
|
||||
"input": [["StartTag", "head", {}], ["Characters", "foo"]],
|
||||
"expected": ["<head>foo"]
|
||||
},
|
||||
|
||||
{"description": "head start-tag followed by start-tag",
|
||||
"input": [["StartTag", "head", {}], ["StartTag", "foo", {}]],
|
||||
"expected": ["<foo>"]
|
||||
},
|
||||
|
||||
{"description": "head start-tag followed by end-tag",
|
||||
"input": [["StartTag", "head", {}], ["EndTag", "foo", {}]],
|
||||
"expected": ["<head></foo>"]
|
||||
},
|
||||
|
||||
{"description": "head start-tag at EOF (shouldn't ever happen?!)",
|
||||
"input": [["StartTag", "head", {}]],
|
||||
"expected": ["<head>"]
|
||||
},
|
||||
|
||||
|
||||
|
||||
{"description": "head end-tag followed by comment",
|
||||
"input": [["EndTag", "head"], ["Comment", "foo"]],
|
||||
"expected": ["</head><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "head end-tag followed by space character",
|
||||
"input": [["EndTag", "head"], ["Characters", " foo"]],
|
||||
"expected": ["</head> foo"]
|
||||
},
|
||||
|
||||
{"description": "head end-tag followed by text",
|
||||
"input": [["EndTag", "head"], ["Characters", "foo"]],
|
||||
"expected": ["foo"]
|
||||
},
|
||||
|
||||
{"description": "head end-tag followed by start-tag",
|
||||
"input": [["EndTag", "head"], ["StartTag", "foo", {}]],
|
||||
"expected": ["<foo>"]
|
||||
},
|
||||
|
||||
{"description": "head end-tag followed by end-tag",
|
||||
"input": [["EndTag", "head"], ["EndTag", "foo", {}]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "head end-tag at EOF",
|
||||
"input": [["EndTag", "head"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "body start-tag followed by comment",
|
||||
"input": [["StartTag", "body", {}], ["Comment", "foo"]],
|
||||
"expected": ["<body><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "body start-tag followed by space character",
|
||||
"input": [["StartTag", "body", {}], ["Characters", " foo"]],
|
||||
"expected": ["<body> foo"]
|
||||
},
|
||||
|
||||
{"description": "body start-tag followed by text",
|
||||
"input": [["StartTag", "body", {}], ["Characters", "foo"]],
|
||||
"expected": ["foo"]
|
||||
},
|
||||
|
||||
{"description": "body start-tag followed by start-tag",
|
||||
"input": [["StartTag", "body", {}], ["StartTag", "foo", {}]],
|
||||
"expected": ["<foo>"]
|
||||
},
|
||||
|
||||
{"description": "body start-tag followed by end-tag",
|
||||
"input": [["StartTag", "body", {}], ["EndTag", "foo", {}]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "body start-tag at EOF (shouldn't ever happen?!)",
|
||||
"input": [["StartTag", "body", {}]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
{"description": "body end-tag followed by comment",
|
||||
"input": [["EndTag", "body"], ["Comment", "foo"]],
|
||||
"expected": ["</body><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "body end-tag followed by space character",
|
||||
"input": [["EndTag", "body"], ["Characters", " foo"]],
|
||||
"expected": ["</body> foo"]
|
||||
},
|
||||
|
||||
{"description": "body end-tag followed by text",
|
||||
"input": [["EndTag", "body"], ["Characters", "foo"]],
|
||||
"expected": ["foo"]
|
||||
},
|
||||
|
||||
{"description": "body end-tag followed by start-tag",
|
||||
"input": [["EndTag", "body"], ["StartTag", "foo", {}]],
|
||||
"expected": ["<foo>"]
|
||||
},
|
||||
|
||||
{"description": "body end-tag followed by end-tag",
|
||||
"input": [["EndTag", "body"], ["EndTag", "foo", {}]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "body end-tag at EOF",
|
||||
"input": [["EndTag", "body"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "li end-tag followed by comment",
|
||||
"input": [["EndTag", "li"], ["Comment", "foo"]],
|
||||
"expected": ["</li><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "li end-tag followed by space character",
|
||||
"input": [["EndTag", "li"], ["Characters", " foo"]],
|
||||
"expected": ["</li> foo"]
|
||||
},
|
||||
|
||||
{"description": "li end-tag followed by text",
|
||||
"input": [["EndTag", "li"], ["Characters", "foo"]],
|
||||
"expected": ["</li>foo"]
|
||||
},
|
||||
|
||||
{"description": "li end-tag followed by start-tag",
|
||||
"input": [["EndTag", "li"], ["StartTag", "foo", {}]],
|
||||
"expected": ["</li><foo>"]
|
||||
},
|
||||
|
||||
{"description": "li end-tag followed by li start-tag",
|
||||
"input": [["EndTag", "li"], ["StartTag", "li", {}]],
|
||||
"expected": ["<li>"]
|
||||
},
|
||||
|
||||
{"description": "li end-tag followed by end-tag",
|
||||
"input": [["EndTag", "li"], ["EndTag", "foo", {}]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "li end-tag at EOF",
|
||||
"input": [["EndTag", "li"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "dt end-tag followed by comment",
|
||||
"input": [["EndTag", "dt"], ["Comment", "foo"]],
|
||||
"expected": ["</dt><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "dt end-tag followed by space character",
|
||||
"input": [["EndTag", "dt"], ["Characters", " foo"]],
|
||||
"expected": ["</dt> foo"]
|
||||
},
|
||||
|
||||
{"description": "dt end-tag followed by text",
|
||||
"input": [["EndTag", "dt"], ["Characters", "foo"]],
|
||||
"expected": ["</dt>foo"]
|
||||
},
|
||||
|
||||
{"description": "dt end-tag followed by start-tag",
|
||||
"input": [["EndTag", "dt"], ["StartTag", "foo", {}]],
|
||||
"expected": ["</dt><foo>"]
|
||||
},
|
||||
|
||||
{"description": "dt end-tag followed by dt start-tag",
|
||||
"input": [["EndTag", "dt"], ["StartTag", "dt", {}]],
|
||||
"expected": ["<dt>"]
|
||||
},
|
||||
|
||||
{"description": "dt end-tag followed by dd start-tag",
|
||||
"input": [["EndTag", "dt"], ["StartTag", "dd", {}]],
|
||||
"expected": ["<dd>"]
|
||||
},
|
||||
|
||||
{"description": "dt end-tag followed by end-tag",
|
||||
"input": [["EndTag", "dt"], ["EndTag", "foo", {}]],
|
||||
"expected": ["</dt></foo>"]
|
||||
},
|
||||
|
||||
{"description": "dt end-tag at EOF",
|
||||
"input": [["EndTag", "dt"]],
|
||||
"expected": ["</dt>"]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "dd end-tag followed by comment",
|
||||
"input": [["EndTag", "dd"], ["Comment", "foo"]],
|
||||
"expected": ["</dd><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "dd end-tag followed by space character",
|
||||
"input": [["EndTag", "dd"], ["Characters", " foo"]],
|
||||
"expected": ["</dd> foo"]
|
||||
},
|
||||
|
||||
{"description": "dd end-tag followed by text",
|
||||
"input": [["EndTag", "dd"], ["Characters", "foo"]],
|
||||
"expected": ["</dd>foo"]
|
||||
},
|
||||
|
||||
{"description": "dd end-tag followed by start-tag",
|
||||
"input": [["EndTag", "dd"], ["StartTag", "foo", {}]],
|
||||
"expected": ["</dd><foo>"]
|
||||
},
|
||||
|
||||
{"description": "dd end-tag followed by dd start-tag",
|
||||
"input": [["EndTag", "dd"], ["StartTag", "dd", {}]],
|
||||
"expected": ["<dd>"]
|
||||
},
|
||||
|
||||
{"description": "dd end-tag followed by dt start-tag",
|
||||
"input": [["EndTag", "dd"], ["StartTag", "dt", {}]],
|
||||
"expected": ["<dt>"]
|
||||
},
|
||||
|
||||
{"description": "dd end-tag followed by end-tag",
|
||||
"input": [["EndTag", "dd"], ["EndTag", "foo", {}]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "dd end-tag at EOF",
|
||||
"input": [["EndTag", "dd"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "p end-tag followed by comment",
|
||||
"input": [["EndTag", "p"], ["Comment", "foo"]],
|
||||
"expected": ["</p><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by space character",
|
||||
"input": [["EndTag", "p"], ["Characters", " foo"]],
|
||||
"expected": ["</p> foo"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by text",
|
||||
"input": [["EndTag", "p"], ["Characters", "foo"]],
|
||||
"expected": ["</p>foo"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by start-tag",
|
||||
"input": [["EndTag", "p"], ["StartTag", "foo", {}]],
|
||||
"expected": ["</p><foo>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by address start-tag",
|
||||
"input": [["EndTag", "p"], ["StartTag", "address", {}]],
|
||||
"expected": ["<address>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by blockquote start-tag",
|
||||
"input": [["EndTag", "p"], ["StartTag", "blockquote", {}]],
|
||||
"expected": ["<blockquote>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by dl start-tag",
|
||||
"input": [["EndTag", "p"], ["StartTag", "dl", {}]],
|
||||
"expected": ["<dl>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by fieldset start-tag",
|
||||
"input": [["EndTag", "p"], ["StartTag", "fieldset", {}]],
|
||||
"expected": ["<fieldset>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by form start-tag",
|
||||
"input": [["EndTag", "p"], ["StartTag", "form", {}]],
|
||||
"expected": ["<form>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by h1 start-tag",
|
||||
"input": [["EndTag", "p"], ["StartTag", "h1", {}]],
|
||||
"expected": ["<h1>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by h2 start-tag",
|
||||
"input": [["EndTag", "p"], ["StartTag", "h2", {}]],
|
||||
"expected": ["<h2>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by h3 start-tag",
|
||||
"input": [["EndTag", "p"], ["StartTag", "h3", {}]],
|
||||
"expected": ["<h3>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by h4 start-tag",
|
||||
"input": [["EndTag", "p"], ["StartTag", "h4", {}]],
|
||||
"expected": ["<h4>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by h5 start-tag",
|
||||
"input": [["EndTag", "p"], ["StartTag", "h5", {}]],
|
||||
"expected": ["<h5>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by h6 start-tag",
|
||||
"input": [["EndTag", "p"], ["StartTag", "h6", {}]],
|
||||
"expected": ["<h6>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by hr start-tag",
|
||||
"input": [["EndTag", "p"], ["StartTag", "hr", {}]],
|
||||
"expected": ["<hr>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by menu start-tag",
|
||||
"input": [["EndTag", "p"], ["StartTag", "menu", {}]],
|
||||
"expected": ["<menu>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by ol start-tag",
|
||||
"input": [["EndTag", "p"], ["StartTag", "ol", {}]],
|
||||
"expected": ["<ol>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by p start-tag",
|
||||
"input": [["EndTag", "p"], ["StartTag", "p", {}]],
|
||||
"expected": ["<p>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by pre start-tag",
|
||||
"input": [["EndTag", "p"], ["StartTag", "pre", {}]],
|
||||
"expected": ["<pre>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by table start-tag",
|
||||
"input": [["EndTag", "p"], ["StartTag", "table", {}]],
|
||||
"expected": ["<table>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by ul start-tag",
|
||||
"input": [["EndTag", "p"], ["StartTag", "ul", {}]],
|
||||
"expected": ["<ul>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by end-tag",
|
||||
"input": [["EndTag", "p"], ["EndTag", "foo", {}]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag at EOF",
|
||||
"input": [["EndTag", "p"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "optgroup end-tag followed by comment",
|
||||
"input": [["EndTag", "optgroup"], ["Comment", "foo"]],
|
||||
"expected": ["</optgroup><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "optgroup end-tag followed by space character",
|
||||
"input": [["EndTag", "optgroup"], ["Characters", " foo"]],
|
||||
"expected": ["</optgroup> foo"]
|
||||
},
|
||||
|
||||
{"description": "optgroup end-tag followed by text",
|
||||
"input": [["EndTag", "optgroup"], ["Characters", "foo"]],
|
||||
"expected": ["</optgroup>foo"]
|
||||
},
|
||||
|
||||
{"description": "optgroup end-tag followed by start-tag",
|
||||
"input": [["EndTag", "optgroup"], ["StartTag", "foo", {}]],
|
||||
"expected": ["</optgroup><foo>"]
|
||||
},
|
||||
|
||||
{"description": "optgroup end-tag followed by optgroup start-tag",
|
||||
"input": [["EndTag", "optgroup"], ["StartTag", "optgroup", {}]],
|
||||
"expected": ["<optgroup>"]
|
||||
},
|
||||
|
||||
{"description": "optgroup end-tag followed by end-tag",
|
||||
"input": [["EndTag", "optgroup"], ["EndTag", "foo", {}]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "optgroup end-tag at EOF",
|
||||
"input": [["EndTag", "optgroup"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "option end-tag followed by comment",
|
||||
"input": [["EndTag", "option"], ["Comment", "foo"]],
|
||||
"expected": ["</option><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "option end-tag followed by space character",
|
||||
"input": [["EndTag", "option"], ["Characters", " foo"]],
|
||||
"expected": ["</option> foo"]
|
||||
},
|
||||
|
||||
{"description": "option end-tag followed by text",
|
||||
"input": [["EndTag", "option"], ["Characters", "foo"]],
|
||||
"expected": ["</option>foo"]
|
||||
},
|
||||
|
||||
{"description": "option end-tag followed by start-tag",
|
||||
"input": [["EndTag", "option"], ["StartTag", "foo", {}]],
|
||||
"expected": ["</option><foo>"]
|
||||
},
|
||||
|
||||
{"description": "option end-tag followed by option start-tag",
|
||||
"input": [["EndTag", "option"], ["StartTag", "option", {}]],
|
||||
"expected": ["<option>"]
|
||||
},
|
||||
|
||||
{"description": "option end-tag followed by end-tag",
|
||||
"input": [["EndTag", "option"], ["EndTag", "foo", {}]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "option end-tag at EOF",
|
||||
"input": [["EndTag", "option"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "colgroup start-tag followed by comment",
|
||||
"input": [["StartTag", "colgroup", {}], ["Comment", "foo"]],
|
||||
"expected": ["<colgroup><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "colgroup start-tag followed by space character",
|
||||
"input": [["StartTag", "colgroup", {}], ["Characters", " foo"]],
|
||||
"expected": ["<colgroup> foo"]
|
||||
},
|
||||
|
||||
{"description": "colgroup start-tag followed by text",
|
||||
"input": [["StartTag", "colgroup", {}], ["Characters", "foo"]],
|
||||
"expected": ["<colgroup>foo"]
|
||||
},
|
||||
|
||||
{"description": "colgroup start-tag followed by start-tag",
|
||||
"input": [["StartTag", "colgroup", {}], ["StartTag", "foo", {}]],
|
||||
"expected": ["<colgroup><foo>"]
|
||||
},
|
||||
|
||||
{"description": "first colgroup in a table with a col child",
|
||||
"input": [["StartTag", "table", {}], ["StartTag", "colgroup", {}], ["StartTag", "col", {}]],
|
||||
"expected": ["<table><col>"]
|
||||
},
|
||||
|
||||
{"description": "colgroup with a col child, following another colgroup",
|
||||
"input": [["EndTag", "colgroup", {}], ["StartTag", "colgroup", {}], ["StartTag", "col", {}]],
|
||||
"expected": ["</colgroup><col>", "<colgroup><col>"]
|
||||
},
|
||||
|
||||
{"description": "colgroup start-tag followed by end-tag",
|
||||
"input": [["StartTag", "colgroup", {}], ["EndTag", "foo", {}]],
|
||||
"expected": ["<colgroup></foo>"]
|
||||
},
|
||||
|
||||
{"description": "colgroup start-tag at EOF",
|
||||
"input": [["StartTag", "colgroup", {}]],
|
||||
"expected": ["<colgroup>"]
|
||||
},
|
||||
|
||||
|
||||
|
||||
{"description": "colgroup end-tag followed by comment",
|
||||
"input": [["EndTag", "colgroup"], ["Comment", "foo"]],
|
||||
"expected": ["</colgroup><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "colgroup end-tag followed by space character",
|
||||
"input": [["EndTag", "colgroup"], ["Characters", " foo"]],
|
||||
"expected": ["</colgroup> foo"]
|
||||
},
|
||||
|
||||
{"description": "colgroup end-tag followed by text",
|
||||
"input": [["EndTag", "colgroup"], ["Characters", "foo"]],
|
||||
"expected": ["foo"]
|
||||
},
|
||||
|
||||
{"description": "colgroup end-tag followed by start-tag",
|
||||
"input": [["EndTag", "colgroup"], ["StartTag", "foo", {}]],
|
||||
"expected": ["<foo>"]
|
||||
},
|
||||
|
||||
{"description": "colgroup end-tag followed by end-tag",
|
||||
"input": [["EndTag", "colgroup"], ["EndTag", "foo", {}]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "colgroup end-tag at EOF",
|
||||
"input": [["EndTag", "colgroup"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "thead end-tag followed by comment",
|
||||
"input": [["EndTag", "thead"], ["Comment", "foo"]],
|
||||
"expected": ["</thead><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "thead end-tag followed by space character",
|
||||
"input": [["EndTag", "thead"], ["Characters", " foo"]],
|
||||
"expected": ["</thead> foo"]
|
||||
},
|
||||
|
||||
{"description": "thead end-tag followed by text",
|
||||
"input": [["EndTag", "thead"], ["Characters", "foo"]],
|
||||
"expected": ["</thead>foo"]
|
||||
},
|
||||
|
||||
{"description": "thead end-tag followed by start-tag",
|
||||
"input": [["EndTag", "thead"], ["StartTag", "foo", {}]],
|
||||
"expected": ["</thead><foo>"]
|
||||
},
|
||||
|
||||
{"description": "thead end-tag followed by tbody start-tag",
|
||||
"input": [["EndTag", "thead"], ["StartTag", "tbody", {}]],
|
||||
"expected": ["<tbody>"]
|
||||
},
|
||||
|
||||
{"description": "thead end-tag followed by tfoot start-tag",
|
||||
"input": [["EndTag", "thead"], ["StartTag", "tfoot", {}]],
|
||||
"expected": ["<tfoot>"]
|
||||
},
|
||||
|
||||
{"description": "thead end-tag followed by end-tag",
|
||||
"input": [["EndTag", "thead"], ["EndTag", "foo", {}]],
|
||||
"expected": ["</thead></foo>"]
|
||||
},
|
||||
|
||||
{"description": "thead end-tag at EOF",
|
||||
"input": [["EndTag", "thead"]],
|
||||
"expected": ["</thead>"]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "tbody start-tag followed by comment",
|
||||
"input": [["StartTag", "tbody", {}], ["Comment", "foo"]],
|
||||
"expected": ["<tbody><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "tbody start-tag followed by space character",
|
||||
"input": [["StartTag", "tbody", {}], ["Characters", " foo"]],
|
||||
"expected": ["<tbody> foo"]
|
||||
},
|
||||
|
||||
{"description": "tbody start-tag followed by text",
|
||||
"input": [["StartTag", "tbody", {}], ["Characters", "foo"]],
|
||||
"expected": ["<tbody>foo"]
|
||||
},
|
||||
|
||||
{"description": "tbody start-tag followed by start-tag",
|
||||
"input": [["StartTag", "tbody", {}], ["StartTag", "foo", {}]],
|
||||
"expected": ["<tbody><foo>"]
|
||||
},
|
||||
|
||||
{"description": "first tbody in a table with a tr child",
|
||||
"input": [["StartTag", "table", {}], ["StartTag", "tbody", {}], ["StartTag", "tr", {}]],
|
||||
"expected": ["<table><tr>"]
|
||||
},
|
||||
|
||||
{"description": "tbody with a tr child, following another tbody",
|
||||
"input": [["EndTag", "tbody", {}], ["StartTag", "tbody", {}], ["StartTag", "tr", {}]],
|
||||
"expected": ["<tbody><tr>", "</tbody><tr>"]
|
||||
},
|
||||
|
||||
{"description": "tbody with a tr child, following a thead",
|
||||
"input": [["EndTag", "thead", {}], ["StartTag", "tbody", {}], ["StartTag", "tr", {}]],
|
||||
"expected": ["<tbody><tr>", "</thead><tr>"]
|
||||
},
|
||||
|
||||
{"description": "tbody with a tr child, following a tfoot",
|
||||
"input": [["EndTag", "tfoot", {}], ["StartTag", "tbody", {}], ["StartTag", "tr", {}]],
|
||||
"expected": ["<tbody><tr>", "</tfoot><tr>"]
|
||||
},
|
||||
|
||||
{"description": "tbody start-tag followed by end-tag",
|
||||
"input": [["StartTag", "tbody", {}], ["EndTag", "foo", {}]],
|
||||
"expected": ["<tbody></foo>"]
|
||||
},
|
||||
|
||||
{"description": "tbody start-tag at EOF",
|
||||
"input": [["StartTag", "tbody", {}]],
|
||||
"expected": ["<tbody>"]
|
||||
},
|
||||
|
||||
|
||||
|
||||
{"description": "tbody end-tag followed by comment",
|
||||
"input": [["EndTag", "tbody"], ["Comment", "foo"]],
|
||||
"expected": ["</tbody><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "tbody end-tag followed by space character",
|
||||
"input": [["EndTag", "tbody"], ["Characters", " foo"]],
|
||||
"expected": ["</tbody> foo"]
|
||||
},
|
||||
|
||||
{"description": "tbody end-tag followed by text",
|
||||
"input": [["EndTag", "tbody"], ["Characters", "foo"]],
|
||||
"expected": ["</tbody>foo"]
|
||||
},
|
||||
|
||||
{"description": "tbody end-tag followed by start-tag",
|
||||
"input": [["EndTag", "tbody"], ["StartTag", "foo", {}]],
|
||||
"expected": ["</tbody><foo>"]
|
||||
},
|
||||
|
||||
{"description": "tbody end-tag followed by tbody start-tag",
|
||||
"input": [["EndTag", "tbody"], ["StartTag", "tbody", {}]],
|
||||
"expected": ["<tbody>", "</tbody>"]
|
||||
},
|
||||
|
||||
{"description": "tbody end-tag followed by tfoot start-tag",
|
||||
"input": [["EndTag", "tbody"], ["StartTag", "tfoot", {}]],
|
||||
"expected": ["<tfoot>"]
|
||||
},
|
||||
|
||||
{"description": "tbody end-tag followed by end-tag",
|
||||
"input": [["EndTag", "tbody"], ["EndTag", "foo", {}]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "tbody end-tag at EOF",
|
||||
"input": [["EndTag", "tbody"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "tfoot end-tag followed by comment",
|
||||
"input": [["EndTag", "tfoot"], ["Comment", "foo"]],
|
||||
"expected": ["</tfoot><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "tfoot end-tag followed by space character",
|
||||
"input": [["EndTag", "tfoot"], ["Characters", " foo"]],
|
||||
"expected": ["</tfoot> foo"]
|
||||
},
|
||||
|
||||
{"description": "tfoot end-tag followed by text",
|
||||
"input": [["EndTag", "tfoot"], ["Characters", "foo"]],
|
||||
"expected": ["</tfoot>foo"]
|
||||
},
|
||||
|
||||
{"description": "tfoot end-tag followed by start-tag",
|
||||
"input": [["EndTag", "tfoot"], ["StartTag", "foo", {}]],
|
||||
"expected": ["</tfoot><foo>"]
|
||||
},
|
||||
|
||||
{"description": "tfoot end-tag followed by tbody start-tag",
|
||||
"input": [["EndTag", "tfoot"], ["StartTag", "tbody", {}]],
|
||||
"expected": ["<tbody>", "</tfoot>"]
|
||||
},
|
||||
|
||||
{"description": "tfoot end-tag followed by end-tag",
|
||||
"input": [["EndTag", "tfoot"], ["EndTag", "foo", {}]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "tfoot end-tag at EOF",
|
||||
"input": [["EndTag", "tfoot"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "tr end-tag followed by comment",
|
||||
"input": [["EndTag", "tr"], ["Comment", "foo"]],
|
||||
"expected": ["</tr><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "tr end-tag followed by space character",
|
||||
"input": [["EndTag", "tr"], ["Characters", " foo"]],
|
||||
"expected": ["</tr> foo"]
|
||||
},
|
||||
|
||||
{"description": "tr end-tag followed by text",
|
||||
"input": [["EndTag", "tr"], ["Characters", "foo"]],
|
||||
"expected": ["</tr>foo"]
|
||||
},
|
||||
|
||||
{"description": "tr end-tag followed by start-tag",
|
||||
"input": [["EndTag", "tr"], ["StartTag", "foo", {}]],
|
||||
"expected": ["</tr><foo>"]
|
||||
},
|
||||
|
||||
{"description": "tr end-tag followed by tr start-tag",
|
||||
"input": [["EndTag", "tr"], ["StartTag", "tr", {}]],
|
||||
"expected": ["<tr>", "</tr>"]
|
||||
},
|
||||
|
||||
{"description": "tr end-tag followed by end-tag",
|
||||
"input": [["EndTag", "tr"], ["EndTag", "foo", {}]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "tr end-tag at EOF",
|
||||
"input": [["EndTag", "tr"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "td end-tag followed by comment",
|
||||
"input": [["EndTag", "td"], ["Comment", "foo"]],
|
||||
"expected": ["</td><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "td end-tag followed by space character",
|
||||
"input": [["EndTag", "td"], ["Characters", " foo"]],
|
||||
"expected": ["</td> foo"]
|
||||
},
|
||||
|
||||
{"description": "td end-tag followed by text",
|
||||
"input": [["EndTag", "td"], ["Characters", "foo"]],
|
||||
"expected": ["</td>foo"]
|
||||
},
|
||||
|
||||
{"description": "td end-tag followed by start-tag",
|
||||
"input": [["EndTag", "td"], ["StartTag", "foo", {}]],
|
||||
"expected": ["</td><foo>"]
|
||||
},
|
||||
|
||||
{"description": "td end-tag followed by td start-tag",
|
||||
"input": [["EndTag", "td"], ["StartTag", "td", {}]],
|
||||
"expected": ["<td>", "</td>"]
|
||||
},
|
||||
|
||||
{"description": "td end-tag followed by th start-tag",
|
||||
"input": [["EndTag", "td"], ["StartTag", "th", {}]],
|
||||
"expected": ["<th>", "</td>"]
|
||||
},
|
||||
|
||||
{"description": "td end-tag followed by end-tag",
|
||||
"input": [["EndTag", "td"], ["EndTag", "foo", {}]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "td end-tag at EOF",
|
||||
"input": [["EndTag", "td"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "th end-tag followed by comment",
|
||||
"input": [["EndTag", "th"], ["Comment", "foo"]],
|
||||
"expected": ["</th><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "th end-tag followed by space character",
|
||||
"input": [["EndTag", "th"], ["Characters", " foo"]],
|
||||
"expected": ["</th> foo"]
|
||||
},
|
||||
|
||||
{"description": "th end-tag followed by text",
|
||||
"input": [["EndTag", "th"], ["Characters", "foo"]],
|
||||
"expected": ["</th>foo"]
|
||||
},
|
||||
|
||||
{"description": "th end-tag followed by start-tag",
|
||||
"input": [["EndTag", "th"], ["StartTag", "foo", {}]],
|
||||
"expected": ["</th><foo>"]
|
||||
},
|
||||
|
||||
{"description": "th end-tag followed by th start-tag",
|
||||
"input": [["EndTag", "th"], ["StartTag", "th", {}]],
|
||||
"expected": ["<th>", "</th>"]
|
||||
},
|
||||
|
||||
{"description": "th end-tag followed by td start-tag",
|
||||
"input": [["EndTag", "th"], ["StartTag", "td", {}]],
|
||||
"expected": ["<td>", "</th>"]
|
||||
},
|
||||
|
||||
{"description": "th end-tag followed by end-tag",
|
||||
"input": [["EndTag", "th"], ["EndTag", "foo", {}]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "th end-tag at EOF",
|
||||
"input": [["EndTag", "th"]],
|
||||
"expected": [""]
|
||||
}
|
||||
|
||||
]}
|
54
vendor/plugins/HTML5lib/testdata/serializer/options.test
vendored
Normal file
54
vendor/plugins/HTML5lib/testdata/serializer/options.test
vendored
Normal file
|
@ -0,0 +1,54 @@
|
|||
{"tests":[
|
||||
|
||||
{"description": "quote_char=\"'\"",
|
||||
"options": {"quote_char": "'"},
|
||||
"input": [["StartTag", "span", {"title": "test 'with' quote_char"}]],
|
||||
"expected": ["<span title='test 'with' quote_char'>"]
|
||||
},
|
||||
|
||||
{"description": "quote_attr_values=true",
|
||||
"options": {"quote_attr_values": true},
|
||||
"input": [["StartTag", "button", {"disabled": "disabled"}]],
|
||||
"expected": ["<button disabled>"],
|
||||
"xhtml": ["<button disabled=\"disabled\">"]
|
||||
},
|
||||
|
||||
{"description": "quote_attr_values=true with irrelevant",
|
||||
"options": {"quote_attr_values": true},
|
||||
"input": [["StartTag", "div", {"irrelevant": "irrelevant"}]],
|
||||
"expected": ["<div irrelevant>"],
|
||||
"xhtml": ["<div irrelevant=\"irrelevant\">"]
|
||||
},
|
||||
|
||||
{"description": "use_trailing_solidus=true with void element",
|
||||
"options": {"use_trailing_solidus": true},
|
||||
"input": [["EmptyTag", "img", {}]],
|
||||
"expected": ["<img />"]
|
||||
},
|
||||
|
||||
{"description": "use_trailing_solidus=true with non-void element",
|
||||
"options": {"use_trailing_solidus": true},
|
||||
"input": [["StartTag", "div", {}]],
|
||||
"expected": ["<div>"]
|
||||
},
|
||||
|
||||
{"description": "minimize_boolean_attributes=false",
|
||||
"options": {"minimize_boolean_attributes": false},
|
||||
"input": [["StartTag", "div", {"irrelevant": "irrelevant"}]],
|
||||
"expected": ["<div irrelevant=irrelevant>"],
|
||||
"xhtml": ["<div irrelevant=\"irrelevant\">"]
|
||||
},
|
||||
|
||||
{"description": "minimize_boolean_attributes=false with empty value",
|
||||
"options": {"minimize_boolean_attributes": false},
|
||||
"input": [["StartTag", "div", {"irrelevant": ""}]],
|
||||
"expected": ["<div irrelevant=\"\">"]
|
||||
},
|
||||
|
||||
{"description": "escape less than signs in attribute values",
|
||||
"options": {"escape_lt_in_attrs": true},
|
||||
"input": [["StartTag", "a", {"title": "a<b>c&d"}]],
|
||||
"expected": ["<a title=\"a<b>c&d\">"]
|
||||
}
|
||||
|
||||
]}
|
51
vendor/plugins/HTML5lib/testdata/serializer/whitespace.test
vendored
Normal file
51
vendor/plugins/HTML5lib/testdata/serializer/whitespace.test
vendored
Normal file
|
@ -0,0 +1,51 @@
|
|||
{"tests": [
|
||||
|
||||
{"description": "bare text with leading spaces",
|
||||
"options": {"strip_whitespace": true},
|
||||
"input": [["Characters", "\t\r\n\u000B\u000C foo"]],
|
||||
"expected": ["foo"]
|
||||
},
|
||||
|
||||
{"description": "bare text with trailing spaces",
|
||||
"options": {"strip_whitespace": true},
|
||||
"input": [["Characters", "foo \t\r\n\u000B\u000C"]],
|
||||
"expected": ["foo"]
|
||||
},
|
||||
|
||||
{"description": "bare text with inner spaces",
|
||||
"options": {"strip_whitespace": true},
|
||||
"input": [["Characters", "foo \t\r\n\u000B\u000C bar"]],
|
||||
"expected": ["foo bar"]
|
||||
},
|
||||
|
||||
{"description": "text within <pre>",
|
||||
"options": {"strip_whitespace": true},
|
||||
"input": [["StartTag", "pre", {}], ["Characters", "\t\r\n\u000B\u000C foo \t\r\n\u000B\u000C bar \t\r\n\u000B\u000C"], ["EndTag", "pre"]],
|
||||
"expected": ["<pre>\t\r\n\u000B\u000C foo \t\r\n\u000B\u000C bar \t\r\n\u000B\u000C</pre>"]
|
||||
},
|
||||
|
||||
{"description": "text within <pre>, with inner markup",
|
||||
"options": {"strip_whitespace": true},
|
||||
"input": [["StartTag", "pre", {}], ["Characters", "\t\r\n\u000B\u000C fo"], ["StartTag", "span", {}], ["Characters", "o \t\r\n\u000B\u000C b"], ["EndTag", "span"], ["Characters", "ar \t\r\n\u000B\u000C"], ["EndTag", "pre"]],
|
||||
"expected": ["<pre>\t\r\n\u000B\u000C fo<span>o \t\r\n\u000B\u000C b</span>ar \t\r\n\u000B\u000C</pre>"]
|
||||
},
|
||||
|
||||
{"description": "text within <textarea>",
|
||||
"options": {"strip_whitespace": true},
|
||||
"input": [["StartTag", "textarea", {}], ["Characters", "\t\r\n\u000B\u000C foo \t\r\n\u000B\u000C bar \t\r\n\u000B\u000C"], ["EndTag", "textarea"]],
|
||||
"expected": ["<textarea>\t\r\n\u000B\u000C foo \t\r\n\u000B\u000C bar \t\r\n\u000B\u000C</textarea>"]
|
||||
},
|
||||
|
||||
{"description": "text within <script>",
|
||||
"options": {"strip_whitespace": true},
|
||||
"input": [["StartTag", "script", {}], ["Characters", "\t\r\n\u000B\u000C foo \t\r\n\u000B\u000C bar \t\r\n\u000B\u000C"], ["EndTag", "script"]],
|
||||
"expected": ["<script>\t\r\n\u000B\u000C foo \t\r\n\u000B\u000C bar \t\r\n\u000B\u000C</script>"]
|
||||
},
|
||||
|
||||
{"description": "text within <style>",
|
||||
"options": {"strip_whitespace": true},
|
||||
"input": [["StartTag", "style", {}], ["Characters", "\t\r\n\u000B\u000C foo \t\r\n\u000B\u000C bar \t\r\n\u000B\u000C"], ["EndTag", "style"]],
|
||||
"expected": ["<style>\t\r\n\u000B\u000C foo \t\r\n\u000B\u000C bar \t\r\n\u000B\u000C</style>"]
|
||||
}
|
||||
|
||||
]}
|
1
vendor/plugins/HTML5lib/testdata/sites/google-results.htm
vendored
Executable file
1
vendor/plugins/HTML5lib/testdata/sites/google-results.htm
vendored
Executable file
File diff suppressed because one or more lines are too long
1
vendor/plugins/HTML5lib/testdata/sites/python-ref-import.htm
vendored
Executable file
1
vendor/plugins/HTML5lib/testdata/sites/python-ref-import.htm
vendored
Executable file
File diff suppressed because one or more lines are too long
1
vendor/plugins/HTML5lib/testdata/sites/web-apps-old.htm
vendored
Executable file
1
vendor/plugins/HTML5lib/testdata/sites/web-apps-old.htm
vendored
Executable file
File diff suppressed because one or more lines are too long
34275
vendor/plugins/HTML5lib/testdata/sites/web-apps.htm
vendored
Executable file
34275
vendor/plugins/HTML5lib/testdata/sites/web-apps.htm
vendored
Executable file
File diff suppressed because it is too large
Load diff
36
vendor/plugins/HTML5lib/testdata/tokenizer/contentModelFlags.test
vendored
Normal file
36
vendor/plugins/HTML5lib/testdata/tokenizer/contentModelFlags.test
vendored
Normal file
|
@ -0,0 +1,36 @@
|
|||
{"tests": [
|
||||
|
||||
{"description":"PLAINTEXT content model flag",
|
||||
"contentModelFlags":["PLAINTEXT"],
|
||||
"input":"<head>&body;",
|
||||
"output":[["Character", "<head>&body;"]]},
|
||||
|
||||
{"description":"End tag closing RCDATA or CDATA",
|
||||
"contentModelFlags":["RCDATA", "CDATA"],
|
||||
"lastStartTag":"bar",
|
||||
"input":"foo</bar>",
|
||||
"output":[["Character", "foo"], ["EndTag", "bar"]]},
|
||||
|
||||
{"description":"End tag with incorrect name in RCDATA or CDATA",
|
||||
"contentModelFlags":["RCDATA", "CDATA"],
|
||||
"lastStartTag":"baz",
|
||||
"input":"</foo>bar</baz>",
|
||||
"output":[["Character", "</foo>bar"], ["EndTag", "baz"]]},
|
||||
|
||||
{"description":"End tag closing RCDATA or CDATA, switching back to PCDATA",
|
||||
"contentModelFlags":["RCDATA", "CDATA"],
|
||||
"lastStartTag":"bar",
|
||||
"input":"foo</bar></baz>",
|
||||
"output":[["Character", "foo"], ["EndTag", "bar"], ["EndTag", "baz"]]},
|
||||
|
||||
{"description":"CDATA w/ something looking like an entity",
|
||||
"contentModelFlags":["CDATA"],
|
||||
"input":"&foo;",
|
||||
"output":[["Character", "&foo;"]]},
|
||||
|
||||
{"description":"RCDATA w/ an entity",
|
||||
"contentModelFlags":["RCDATA"],
|
||||
"input":"<",
|
||||
"output":[["Character", "<"]]}
|
||||
|
||||
]}
|
21
vendor/plugins/HTML5lib/testdata/tokenizer/escapeFlag.test
vendored
Normal file
21
vendor/plugins/HTML5lib/testdata/tokenizer/escapeFlag.test
vendored
Normal file
|
@ -0,0 +1,21 @@
|
|||
{"tests": [
|
||||
|
||||
{"description":"Commented close tag in [R]CDATA",
|
||||
"contentModelFlags":["RCDATA", "CDATA"],
|
||||
"lastStartTag":"bar",
|
||||
"input":"foo<!--</bar>--></bar>",
|
||||
"output":[["Character", "foo<!--</bar>-->"], ["EndTag", "bar"]]},
|
||||
|
||||
{"description":"Bogus comment in [R]CDATA",
|
||||
"contentModelFlags":["RCDATA", "CDATA"],
|
||||
"lastStartTag":"bar",
|
||||
"input":"foo<!-->baz</bar>",
|
||||
"output":[["Character", "foo<!-->baz"], ["EndTag", "bar"]]},
|
||||
|
||||
{"description":"End tag surrounded by bogus comment in [R]CDATA",
|
||||
"contentModelFlags":["RCDATA", "CDATA"],
|
||||
"lastStartTag":"bar",
|
||||
"input":"foo<!--></bar><!-->baz</bar>",
|
||||
"output":[["Character", "foo<!-->"], ["EndTag", "bar"], "ParseError", ["Comment", ""], ["Character", "baz"], ["EndTag", "bar"]]}
|
||||
|
||||
]}
|
156
vendor/plugins/HTML5lib/testdata/tokenizer/test1.test
vendored
Normal file
156
vendor/plugins/HTML5lib/testdata/tokenizer/test1.test
vendored
Normal file
|
@ -0,0 +1,156 @@
|
|||
{"tests": [
|
||||
|
||||
{"description":"Correct Doctype lowercase",
|
||||
"input":"<!DOCTYPE html>",
|
||||
"output":[["DOCTYPE", "html", null, null, true]]},
|
||||
|
||||
{"description":"Correct Doctype uppercase",
|
||||
"input":"<!DOCTYPE HTML>",
|
||||
"output":[["DOCTYPE", "HTML", null, null, true]]},
|
||||
|
||||
{"description":"Correct Doctype mixed case",
|
||||
"input":"<!DOCTYPE HtMl>",
|
||||
"output":[["DOCTYPE", "HtMl", null, null, true]]},
|
||||
|
||||
{"description":"Truncated doctype start",
|
||||
"input":"<!DOC>",
|
||||
"output":["ParseError", ["Comment", "DOC"]]},
|
||||
|
||||
{"description":"Doctype in error",
|
||||
"input":"<!DOCTYPE foo>",
|
||||
"output":[["DOCTYPE", "foo", null, null, true]]},
|
||||
|
||||
{"description":"Single Start Tag",
|
||||
"input":"<h>",
|
||||
"output":[["StartTag", "h", {}]]},
|
||||
|
||||
{"description":"Empty end tag",
|
||||
"input":"</>",
|
||||
"output":["ParseError"]},
|
||||
|
||||
{"description":"Empty start tag",
|
||||
"input":"<>",
|
||||
"output":["ParseError", ["Character", "<>"]]},
|
||||
|
||||
{"description":"Start Tag w/attribute",
|
||||
"input":"<h a='b'>",
|
||||
"output":[["StartTag", "h", {"a":"b"}]]},
|
||||
|
||||
{"description":"Start Tag w/attribute no quotes",
|
||||
"input":"<h a=b>",
|
||||
"output":[["StartTag", "h", {"a":"b"}]]},
|
||||
|
||||
{"description":"Start/End Tag",
|
||||
"input":"<h></h>",
|
||||
"output":[["StartTag", "h", {}], ["EndTag", "h"]]},
|
||||
|
||||
{"description":"Two unclosed start tags",
|
||||
"input":"<p>One<p>Two",
|
||||
"output":[["StartTag", "p", {}], ["Character", "One"], ["StartTag", "p", {}], ["Character", "Two"]]},
|
||||
|
||||
{"description":"End Tag w/attribute",
|
||||
"input":"<h></h a='b'>",
|
||||
"output":[["StartTag", "h", {}], "ParseError", ["EndTag", "h"]]},
|
||||
|
||||
{"description":"Multiple atts",
|
||||
"input":"<h a='b' c='d'>",
|
||||
"output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
|
||||
|
||||
{"description":"Multiple atts no space",
|
||||
"input":"<h a='b'c='d'>",
|
||||
"output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
|
||||
|
||||
{"description":"Repeated attr",
|
||||
"input":"<h a='b' a='d'>",
|
||||
"output":["ParseError", ["StartTag", "h", {"a":"b"}]]},
|
||||
|
||||
{"description":"Simple comment",
|
||||
"input":"<!--comment-->",
|
||||
"output":[["Comment", "comment"]]},
|
||||
|
||||
{"description":"Comment, Central dash no space",
|
||||
"input":"<!----->",
|
||||
"output":["ParseError", ["Comment", "-"]]},
|
||||
|
||||
{"description":"Comment, two central dashes",
|
||||
"input":"<!-- --comment -->",
|
||||
"output":["ParseError", ["Comment", " --comment "]]},
|
||||
|
||||
{"description":"Unfinished comment",
|
||||
"input":"<!--comment",
|
||||
"output":["ParseError", ["Comment", "comment"]]},
|
||||
|
||||
{"description":"Start of a comment",
|
||||
"input":"<!-",
|
||||
"output":["ParseError", ["Comment", "-"]]},
|
||||
|
||||
{"description":"Short comment",
|
||||
"input":"<!-->",
|
||||
"output":["ParseError", ["Comment", ""]]},
|
||||
|
||||
{"description":"Short comment two",
|
||||
"input":"<!--->",
|
||||
"output":["ParseError", ["Comment", ""]]},
|
||||
|
||||
{"description":"Short comment three",
|
||||
"input":"<!---->",
|
||||
"output":[["Comment", ""]]},
|
||||
|
||||
|
||||
{"description":"Ampersand EOF",
|
||||
"input":"&",
|
||||
"output":[["Character", "&"]]},
|
||||
|
||||
{"description":"Ampersand ampersand EOF",
|
||||
"input":"&&",
|
||||
"output":[["Character", "&&"]]},
|
||||
|
||||
{"description":"Ampersand space EOF",
|
||||
"input":"& ",
|
||||
"output":[["Character", "& "]]},
|
||||
|
||||
{"description":"Unfinished entity",
|
||||
"input":"&f",
|
||||
"output":["ParseError", ["Character", "&f"]]},
|
||||
|
||||
{"description":"Ampersand, number sign",
|
||||
"input":"&#",
|
||||
"output":["ParseError", ["Character", "&#"]]},
|
||||
|
||||
{"description":"Unfinished numeric entity",
|
||||
"input":"&#x",
|
||||
"output":["ParseError", ["Character", "&#x"]]},
|
||||
|
||||
{"description":"Entity with trailing semicolon (1)",
|
||||
"input":"I'm ¬it",
|
||||
"output":[["Character","I'm ¬it"]]},
|
||||
|
||||
{"description":"Entity with trailing semicolon (2)",
|
||||
"input":"I'm ∉",
|
||||
"output":[["Character","I'm ∉"]]},
|
||||
|
||||
{"description":"Entity without trailing semicolon (1)",
|
||||
"input":"I'm ¬it",
|
||||
"output":[["Character","I'm "], "ParseError", ["Character", "¬it"]]},
|
||||
|
||||
{"description":"Entity without trailing semicolon (2)",
|
||||
"input":"I'm ¬in",
|
||||
"output":[["Character","I'm "], "ParseError", ["Character", "∉"]]},
|
||||
|
||||
{"description":"Partial entity match at end of file",
|
||||
"input":"I'm &no",
|
||||
"output":[["Character","I'm "], "ParseError", ["Character", "&no"]]},
|
||||
|
||||
{"description":"ASCII decimal entity",
|
||||
"input":"$",
|
||||
"output":[["Character","$"]]},
|
||||
|
||||
{"description":"ASCII hexadecimal entity",
|
||||
"input":"?",
|
||||
"output":[["Character","?"]]},
|
||||
|
||||
{"description":"Hexadecimal entity in attribute",
|
||||
"input":"<h a='?'></h>",
|
||||
"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]}
|
||||
|
||||
]}
|
125
vendor/plugins/HTML5lib/testdata/tokenizer/test2.test
vendored
Normal file
125
vendor/plugins/HTML5lib/testdata/tokenizer/test2.test
vendored
Normal file
|
@ -0,0 +1,125 @@
|
|||
{"tests": [
|
||||
|
||||
{"description":"DOCTYPE without name",
|
||||
"input":"<!DOCTYPE>",
|
||||
"output":["ParseError", "ParseError", ["DOCTYPE", "", null, null, false]]},
|
||||
|
||||
{"description":"DOCTYPE without space before name",
|
||||
"input":"<!DOCTYPEhtml>",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, true]]},
|
||||
|
||||
{"description":"Incorrect DOCTYPE without a space before name",
|
||||
"input":"<!DOCTYPEfoo>",
|
||||
"output":["ParseError", ["DOCTYPE", "foo", null, null, true]]},
|
||||
|
||||
{"description":"DOCTYPE with publicId",
|
||||
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">",
|
||||
"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", null, true]]},
|
||||
|
||||
{"description":"DOCTYPE with EOF after PUBLIC",
|
||||
"input":"<!DOCTYPE html PUBLIC",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
||||
|
||||
{"description":"DOCTYPE with EOF after PUBLIC '",
|
||||
"input":"<!DOCTYPE html PUBLIC '",
|
||||
"output":["ParseError", ["DOCTYPE", "html", "", null, false]]},
|
||||
|
||||
{"description":"DOCTYPE with EOF after PUBLIC 'x",
|
||||
"input":"<!DOCTYPE html PUBLIC 'x",
|
||||
"output":["ParseError", ["DOCTYPE", "html", "x", null, false]]},
|
||||
|
||||
{"description":"DOCTYPE with systemId",
|
||||
"input":"<!DOCTYPE html SYSTEM \"-//W3C//DTD HTML Transitional 4.01//EN\">",
|
||||
"output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
|
||||
|
||||
{"description":"DOCTYPE with publicId and systemId",
|
||||
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\" \"-//W3C//DTD HTML Transitional 4.01//EN\">",
|
||||
"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
|
||||
|
||||
{"description":"Incomplete doctype",
|
||||
"input":"<!DOCTYPE html ",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
||||
|
||||
{"description":"Numeric entity representing the NUL character",
|
||||
"input":"�",
|
||||
"output":[["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Hexadecimal entity representing the NUL character",
|
||||
"input":"�",
|
||||
"output":[["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
|
||||
"input":"�",
|
||||
"output":[["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)",
|
||||
"input":"�",
|
||||
"output":[["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Numeric entity representing a Windows-1252 'codepoint'",
|
||||
"input":"‰",
|
||||
"output":["ParseError", ["Character", "\u2030"]]},
|
||||
|
||||
{"description":"Hexadecimal entity representing a Windows-1252 'codepoint'",
|
||||
"input":"‰",
|
||||
"output":["ParseError", ["Character", "\u2030"]]},
|
||||
|
||||
{"description":"Hexadecimal entity with mixed uppercase and lowercase",
|
||||
"input":"ꯍ",
|
||||
"output":[["Character", "\uABCD"]]},
|
||||
|
||||
{"description":"Entity without a name",
|
||||
"input":"&;",
|
||||
"output":["ParseError", ["Character", "&;"]]},
|
||||
|
||||
{"description":"Unescaped ampersand in attribute value",
|
||||
"input":"<h a='&'>",
|
||||
"output":["ParseError", ["StartTag", "h", { "a":"&" }]]},
|
||||
|
||||
{"description":"StartTag containing <",
|
||||
"input":"<a<b>",
|
||||
"output":[["StartTag", "a<b", { }]]},
|
||||
|
||||
{"description":"Non-void element containing trailing /",
|
||||
"input":"<h/>",
|
||||
"output":["ParseError", ["StartTag", "h", { }]]},
|
||||
|
||||
{"description":"Void element with permitted slash",
|
||||
"input":"<br/>",
|
||||
"output":[["StartTag", "br", { }]]},
|
||||
|
||||
{"description":"StartTag containing /",
|
||||
"input":"<h/a='b'>",
|
||||
"output":["ParseError", ["StartTag", "h", { "a":"b" }]]},
|
||||
|
||||
{"description":"Double-quoted attribute value",
|
||||
"input":"<h a=\"b\">",
|
||||
"output":[["StartTag", "h", { "a":"b" }]]},
|
||||
|
||||
{"description":"Unescaped </",
|
||||
"input":"</",
|
||||
"output":["ParseError", ["Character", "</"]]},
|
||||
|
||||
{"description":"Illegal end tag name",
|
||||
"input":"</1>",
|
||||
"output":["ParseError", ["Comment", "1"]]},
|
||||
|
||||
{"description":"Simili processing instruction",
|
||||
"input":"<?namespace>",
|
||||
"output":["ParseError", ["Comment", "?namespace"]]},
|
||||
|
||||
{"description":"A bogus comment stops at >, even if preceeded by two dashes",
|
||||
"input":"<?foo-->",
|
||||
"output":["ParseError", ["Comment", "?foo--"]]},
|
||||
|
||||
{"description":"Unescaped <",
|
||||
"input":"foo < bar",
|
||||
"output":[["Character", "foo "], "ParseError", ["Character", "< bar"]]},
|
||||
|
||||
{"description":"Null Byte Replacement",
|
||||
"input":"\u0000",
|
||||
"output":[["Character", "\ufffd"]]}
|
||||
|
||||
]}
|
||||
|
||||
|
1930
vendor/plugins/HTML5lib/testdata/tree-construction/tests1.dat
vendored
Normal file
1930
vendor/plugins/HTML5lib/testdata/tree-construction/tests1.dat
vendored
Normal file
File diff suppressed because it is too large
Load diff
779
vendor/plugins/HTML5lib/testdata/tree-construction/tests2.dat
vendored
Executable file
779
vendor/plugins/HTML5lib/testdata/tree-construction/tests2.dat
vendored
Executable file
|
@ -0,0 +1,779 @@
|
|||
#data
|
||||
<!DOCTYPE HTML>Test
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Test"
|
||||
|
||||
#data
|
||||
<textarea>test</div>test
|
||||
#errors
|
||||
10: missing document type declaration.
|
||||
17: unescaped '</' in CDATA or RCDATA block.
|
||||
25: unexpected end of file while parsing CDATA section for element textarea.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <textarea>
|
||||
| "test</div>test"
|
||||
|
||||
#data
|
||||
<table><td>
|
||||
#errors
|
||||
7: missing document type declaration.
|
||||
11: required tr element start tag implied by unexpected td element start tag.
|
||||
12: unexpected end of file implied table element end tag.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
|
||||
#data
|
||||
<table><td>test</tbody></table>
|
||||
#errors
|
||||
missing document type declarattion
|
||||
Unexpected and of file
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| "test"
|
||||
|
||||
#data
|
||||
<frame>test
|
||||
#errors
|
||||
missing document type declaration
|
||||
frame element can't occur here
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "test"
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><frameset>test
|
||||
#errors
|
||||
frameset can't contain text
|
||||
Unexpected end of file
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><frameset><!DOCTYPE HTML>
|
||||
#errors
|
||||
document type declaration can only occur at the start of a document
|
||||
Expected end tag </frameset>
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><font><p><b>test</font>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <font>
|
||||
| <p>
|
||||
| <font>
|
||||
| <b>
|
||||
| "test"
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><dt><div><dd>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <dt>
|
||||
| <div>
|
||||
| <dd>
|
||||
|
||||
#data
|
||||
<script></x
|
||||
#errors
|
||||
no document type
|
||||
</ in script
|
||||
Unexpected end of file. Expected </script> end tag.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <script>
|
||||
| "</x"
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<table><plaintext><td>
|
||||
#errors
|
||||
no document type
|
||||
<plaintext> directly inside table
|
||||
Characters inside table.
|
||||
Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <plaintext>
|
||||
| "<td>"
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<plaintext></plaintext>
|
||||
#errors
|
||||
No DOCTYPE seen.
|
||||
Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <plaintext>
|
||||
| "</plaintext>"
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><table><tr>TEST
|
||||
#errors
|
||||
TEST can't occur in <tr>
|
||||
Unexpected end of file.
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "TEST"
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><body t1=1><body t2=2><body t3=3 t4=4>
|
||||
#errors
|
||||
Unexpected start tag "body"
|
||||
Unexpected start tag "body"
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| t4="4"
|
||||
| t2="2"
|
||||
| t3="3"
|
||||
| t1="1"
|
||||
|
||||
#data
|
||||
</b test
|
||||
#errors
|
||||
Unexpected EOF in attribute
|
||||
Unexpected attribute in end tag.
|
||||
No doctype.
|
||||
Unexpected end tag.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML></b test<b &=&>X
|
||||
#errors
|
||||
Unexpected < in attribute
|
||||
End tag contains attributes.
|
||||
Unexpected end tag.
|
||||
Named entity didn't end with ;
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "X"
|
||||
|
||||
#data
|
||||
<!doctypehtml><scrIPt type=text/x-foobar;baz>X</SCRipt
|
||||
#errors
|
||||
No space after literal DOCTYPE.
|
||||
Unexpected EOF in (end) tag name
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <script>
|
||||
| type="text/x-foobar;baz"
|
||||
| "X"
|
||||
| <body>
|
||||
|
||||
#data
|
||||
&
|
||||
#errors
|
||||
No doctype.
|
||||
Unfinished entity.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "&"
|
||||
|
||||
#data
|
||||
&#
|
||||
#errors
|
||||
No doctype.
|
||||
Unfinished numeric entity.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "&#"
|
||||
|
||||
#data
|
||||
&#X
|
||||
#errors
|
||||
No doctype.
|
||||
Unfinished hexadecimal entity.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "&#X"
|
||||
|
||||
#data
|
||||
&#x
|
||||
#errors
|
||||
No doctype.
|
||||
Unfinished hexadecimal entity.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "&#x"
|
||||
|
||||
#data
|
||||
-
|
||||
#errors
|
||||
No doctype.
|
||||
Numeric entity didn't end with ;
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "-"
|
||||
|
||||
#data
|
||||
&x-test
|
||||
#errors
|
||||
No doctype.
|
||||
Unfinished named entity.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "&x-test"
|
||||
|
||||
#data
|
||||
<!doctypehtml><p><li>
|
||||
#errors
|
||||
No space after literal DOCTYPE.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <li>
|
||||
|
||||
#data
|
||||
<!doctypeHTML><p><dt>
|
||||
#errors
|
||||
No space after literal DOCTYPE.
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <dt>
|
||||
|
||||
#data
|
||||
<!doctypehtmL><p><dd>
|
||||
#errors
|
||||
No space after literal DOCTYPE.
|
||||
#document
|
||||
| <!DOCTYPE htmL>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <dd>
|
||||
|
||||
#data
|
||||
<!doctypehtml><p><form>
|
||||
#errors
|
||||
No space after literal DOCTYPE.
|
||||
Unexpected EOF.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <form>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><p><b><i><u></p> <p>X
|
||||
#errors
|
||||
Unexpected end tag </p>.
|
||||
Unexpected end EOF. Missing closing tags.
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <b>
|
||||
| <i>
|
||||
| <u>
|
||||
| " "
|
||||
| <p>
|
||||
| <b>
|
||||
| <i>
|
||||
| <u>
|
||||
| "X"
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><p></P>X
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| "X"
|
||||
|
||||
#data
|
||||
&
|
||||
#errors
|
||||
No doctype.
|
||||
No closing ; for the entity.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "&"
|
||||
|
||||
#data
|
||||
&AMp;
|
||||
#errors
|
||||
No doctype.
|
||||
Invalid entity.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "&AMp;"
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><html><head></head><body><thisISasillyTESTelementNameToMakeSureCrazyTagNamesArePARSEDcorrectLY>
|
||||
#errors
|
||||
Unexpected end of file.
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <thisisasillytestelementnametomakesurecrazytagnamesareparsedcorrectly>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML>X</body>X
|
||||
#errors
|
||||
Unexpected non-space characters in the after body phase.
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "XX"
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><!-- X
|
||||
#errors
|
||||
Unexpected end of file in comment.
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <!-- X -->
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><table><caption>test TEST</caption><td>test
|
||||
#errors
|
||||
Unexpected <td> in table body phase.
|
||||
Unexpected end of file.
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <caption>
|
||||
| "test TEST"
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| "test"
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><select><option><optgroup>
|
||||
#errors
|
||||
Unexpected end of file. Missing closing tags.
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| <option>
|
||||
| <optgroup>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><select><optgroup><option></optgroup><option><select><option>
|
||||
#errors
|
||||
Unexpected start tag <select> in <select>.
|
||||
Unexpected start tag <option>.
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| <optgroup>
|
||||
| <option>
|
||||
| <option>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><select><optgroup><option><optgroup>
|
||||
#errors
|
||||
Unexpected end of file. Missing closing tags.
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| <optgroup>
|
||||
| <option>
|
||||
| <optgroup>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><font><input><input></font>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <font>
|
||||
| <input>
|
||||
| <input>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><!-- XXX - XXX -->
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <!-- XXX - XXX -->
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><!-- XXX - XXX
|
||||
#errors
|
||||
Unexpected EOF in comment.
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <!-- XXX - XXX -->
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><!-- XXX - XXX - XXX -->
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <!-- XXX - XXX - XXX -->
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<isindex test=x name=x>
|
||||
#errors
|
||||
No doctype
|
||||
<isindex> is not ok!
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <form>
|
||||
| <hr>
|
||||
| <p>
|
||||
| <label>
|
||||
| "This is a searchable index. Insert your search keywords here:"
|
||||
| <input>
|
||||
| test="x"
|
||||
| name="isindex"
|
||||
| <hr>
|
||||
|
||||
#data
|
||||
test
|
||||
test
|
||||
#errors
|
||||
No doctype
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "test
|
||||
test"
|
||||
|
||||
#data
|
||||
<p><b><i><u></p>
|
||||
<p>X
|
||||
#errors
|
||||
No doctype
|
||||
Unexpected end tag p.
|
||||
Unexpected EOF.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <b>
|
||||
| <i>
|
||||
| <u>
|
||||
| "
|
||||
"
|
||||
| <p>
|
||||
| <b>
|
||||
| <i>
|
||||
| <u>
|
||||
| "X"
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><body><title>test</body></title>
|
||||
#errors
|
||||
Unexpected start tag that belongs in the head.
|
||||
Expected closing tag after </.
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <title>
|
||||
| "test</body>"
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><body><title>X</title><meta name=z><link rel=foo><style>
|
||||
x { content:"</style" } </style>
|
||||
#errors
|
||||
Unexpected start tag that belongs in head.
|
||||
Unexpected start tag that belongs in head.
|
||||
Unexpected start tag that belongs in head.
|
||||
Expected closing tag after </.
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <title>
|
||||
| "X"
|
||||
| <body>
|
||||
| <meta>
|
||||
| name="z"
|
||||
| <link>
|
||||
| rel="foo"
|
||||
| <style>
|
||||
| "
|
||||
x { content:"</style" } "
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><select><optgroup></optgroup></select>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| <optgroup>
|
||||
|
||||
#data
|
||||
|
||||
|
||||
#errors
|
||||
No doctype.
|
||||
#document
|
||||
| "
|
||||
"
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML> <html>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| " "
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><script>
|
||||
</script> <title>x</title> </head>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <script>
|
||||
| "
|
||||
"
|
||||
| " "
|
||||
| <title>
|
||||
| "x"
|
||||
| " "
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><html><body><html id=x>
|
||||
#errors
|
||||
duplicate html start tag
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| id="x"
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML>X</body><html id="x">
|
||||
#errors
|
||||
Unexpected html start tag in the after body phase.
|
||||
html needs to be the first start tag.
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| id="x"
|
||||
| <head>
|
||||
| <body>
|
||||
| "X"
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><head><html id=x>
|
||||
#errors
|
||||
html start tag too late
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| id="x"
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML>X</html>X
|
||||
#errors
|
||||
Unexpected non-space characters. Expected end of file.
|
||||
Unexpected non-space characters in after body phase. Expected end of file.
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "XX"
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML>X</html>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "X "
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML>X</html><p>X
|
||||
#errors
|
||||
Unexpected start tag <p> in trailing end phase.
|
||||
Unexpected start tag <p> in after body phase.
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "X"
|
||||
| <p>
|
||||
| "X"
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML>X<p/x/y/z>
|
||||
#errors
|
||||
Solidus (/) incorrectly placed.
|
||||
Solidus (/) incorrectly placed.
|
||||
Solidus (/) incorrectly placed.
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "X"
|
||||
| <p>
|
||||
| y=""
|
||||
| x=""
|
||||
| z=""
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><!--x--
|
||||
#errors
|
||||
Unexpected end of file in comment.
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <!-- x -->
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><table><tr><td></p></table>
|
||||
#errors
|
||||
Unexpected </p> end tag.
|
||||
#document
|
||||
| <!DOCTYPE HTML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
236
vendor/plugins/HTML5lib/testdata/tree-construction/tests3.dat
vendored
Normal file
236
vendor/plugins/HTML5lib/testdata/tree-construction/tests3.dat
vendored
Normal file
|
@ -0,0 +1,236 @@
|
|||
#data
|
||||
<head></head><style></style>
|
||||
#errors
|
||||
No DOCTYPE
|
||||
<style> in after-head mode
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <style>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<head></head><script></script>
|
||||
#errors
|
||||
No DOCTYPE
|
||||
<script> in after-head mode
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <script>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<head></head><!-- --><style></style><!-- --><script></script>
|
||||
#errors
|
||||
No DOCTYPE
|
||||
<style> in after-head mode
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <style>
|
||||
| <script>
|
||||
| <!-- -->
|
||||
| <!-- -->
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<head></head><!-- -->x<style></style><!-- --><script></script>
|
||||
#errors
|
||||
No DOCTYPE
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <!-- -->
|
||||
| <body>
|
||||
| "x"
|
||||
| <style>
|
||||
| <!-- -->
|
||||
| <script>
|
||||
|
||||
#data
|
||||
<!DOCTYPE htML><html><head></head><body><pre>
|
||||
</pre></body></html>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <pre>
|
||||
|
||||
#data
|
||||
<!DOCTYPE htML><html><head></head><body><pre>
|
||||
|
||||
foo</pre></body></html>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <pre>
|
||||
| "foo"
|
||||
|
||||
|
||||
#data
|
||||
<!DOCTYPE htML><html><head></head><body><pre>
|
||||
|
||||
foo
|
||||
</pre></body></html>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <pre>
|
||||
| "foo
|
||||
"
|
||||
|
||||
#data
|
||||
<!DOCTYPE htML><html><head></head><body><pre>x</pre><span>
|
||||
</span></body></html>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <pre>
|
||||
| "x"
|
||||
| <span>
|
||||
| "
|
||||
"
|
||||
|
||||
#data
|
||||
<!DOCTYPE htML><html><head></head><body><pre>x
|
||||
y</pre></body></html>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <pre>
|
||||
| "x
|
||||
y"
|
||||
|
||||
#data
|
||||
<!DOCTYPE htML><html><head></head><body><pre>x<div>
|
||||
y</pre></body></html>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <pre>
|
||||
| "x"
|
||||
| <div>
|
||||
| "
|
||||
| y"
|
||||
|
||||
#data
|
||||
<!DOCTYPE htML><HTML><META><HEAD></HEAD></HTML>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <meta>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE htML><HTML><HEAD><head></HEAD></HTML>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<textarea>foo<span>bar</span><i>baz
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <textarea>
|
||||
| "foo<span>bar</span><i>baz"
|
||||
|
||||
#data
|
||||
<title>foo<span>bar</em><i>baz
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <title>
|
||||
| "foo<span>bar</em><i>baz"
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE htML><textarea>
|
||||
</textarea>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <textarea>
|
||||
|
||||
#data
|
||||
<!DOCTYPE htML><textarea>
|
||||
|
||||
foo</textarea>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <textarea>
|
||||
| "foo"
|
||||
|
||||
#data
|
||||
<!DOCTYPE htML><html><head></head><body><ul><li><div><p><li></ul></body></html>
|
||||
#errors
|
||||
Missing end tag (div)
|
||||
#document
|
||||
| <!DOCTYPE htML>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <ul>
|
||||
| <li>
|
||||
| <div>
|
||||
| <p>
|
||||
| <li>
|
||||
|
||||
#data
|
||||
<!doctype html><nobr><nobr><nobr>
|
||||
#errors
|
||||
Unexpected end of file.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <nobr>
|
||||
| <nobr>
|
||||
| <nobr>
|
||||
|
||||
#data
|
||||
<!doctype html><nobr><nobr></nobr><nobr>
|
||||
#errors
|
||||
Unexpected end of file.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <nobr>
|
||||
| <nobr>
|
||||
| <nobr>
|
44
vendor/plugins/HTML5lib/testdata/tree-construction/tests4.dat
vendored
Normal file
44
vendor/plugins/HTML5lib/testdata/tree-construction/tests4.dat
vendored
Normal file
|
@ -0,0 +1,44 @@
|
|||
#data
|
||||
direct div content
|
||||
#errors
|
||||
#document-fragment div
|
||||
| "direct div content"
|
||||
|
||||
#data
|
||||
direct textarea content
|
||||
#errors
|
||||
#document-fragment textarea
|
||||
| "direct textarea content"
|
||||
|
||||
#data
|
||||
textarea content with <em>pseudo</em> <foo>markup
|
||||
#errors
|
||||
#document-fragment textarea
|
||||
| "textarea content with <em>pseudo</em> <foo>markup"
|
||||
|
||||
#data
|
||||
this is CDATA inside a <style> element
|
||||
#errors
|
||||
#document-fragment style
|
||||
| "this is CDATA inside a <style> element"
|
||||
|
||||
#data
|
||||
</plaintext>
|
||||
#errors
|
||||
#document-fragment plaintext
|
||||
| "</plaintext>"
|
||||
|
||||
#data
|
||||
setting html's innerHTML
|
||||
#errors
|
||||
#document-fragment html
|
||||
| <head>
|
||||
| <body>
|
||||
| "setting html's innerHTML"
|
||||
|
||||
#data
|
||||
<title>setting head's innerHTML</title>
|
||||
#errors
|
||||
#document-fragment head
|
||||
| <title>
|
||||
| "setting head's innerHTML"
|
120
vendor/plugins/HTML5lib/testdata/tree-construction/tests5.dat
vendored
Normal file
120
vendor/plugins/HTML5lib/testdata/tree-construction/tests5.dat
vendored
Normal file
|
@ -0,0 +1,120 @@
|
|||
#data
|
||||
<style> <!-- </style>x
|
||||
#errors
|
||||
No DOCTYPE
|
||||
Unexpected end of file
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <style>
|
||||
| " <!-- </style>x"
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<style> <!-- </style> --> </style>x
|
||||
#errors
|
||||
No DOCTYPE
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <style>
|
||||
| " <!-- </style> --> "
|
||||
| <body>
|
||||
| "x"
|
||||
|
||||
#data
|
||||
<style> <!--> </style>x
|
||||
#errors
|
||||
No DOCTYPE
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <style>
|
||||
| " <!--> "
|
||||
| <body>
|
||||
| "x"
|
||||
|
||||
#data
|
||||
<style> <!---> </style>x
|
||||
#errors
|
||||
No DOCTYPE
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <style>
|
||||
| " <!---> "
|
||||
| <body>
|
||||
| "x"
|
||||
|
||||
#data
|
||||
<iframe> <!---> </iframe>x
|
||||
#errors
|
||||
No DOCTYPE
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <iframe>
|
||||
| " <!---> "
|
||||
| "x"
|
||||
|
||||
#data
|
||||
<iframe> <!--- </iframe>->x</iframe> --> </iframe>x
|
||||
#errors
|
||||
No DOCTYPE
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <iframe>
|
||||
| " <!--- </iframe>->x</iframe> --> "
|
||||
| "x"
|
||||
|
||||
#data
|
||||
<script> <!-- </script> --> </script>x
|
||||
#errors
|
||||
No DOCTYPE
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <script>
|
||||
| " <!-- </script> --> "
|
||||
| <body>
|
||||
| "x"
|
||||
|
||||
#data
|
||||
<title> <!-- </title> --> </title>x
|
||||
#errors
|
||||
No DOCTYPE
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <title>
|
||||
| " <!-- </title> --> "
|
||||
| <body>
|
||||
| "x"
|
||||
|
||||
#data
|
||||
<textarea> <!--- </textarea>->x</textarea> --> </textarea>x
|
||||
#errors
|
||||
No DOCTYPE
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <textarea>
|
||||
| " <!--- </textarea>->x</textarea> --> "
|
||||
| "x"
|
||||
|
||||
#data
|
||||
<style> <!</-- </style>x
|
||||
#errors
|
||||
No DOCTYPE
|
||||
Unexpected end of file
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <style>
|
||||
| " <!</-- "
|
||||
| <body>
|
||||
| "x"
|
29
vendor/plugins/HTML5lib/testdata/tree-construction/tests6.dat
vendored
Normal file
29
vendor/plugins/HTML5lib/testdata/tree-construction/tests6.dat
vendored
Normal file
|
@ -0,0 +1,29 @@
|
|||
#data
|
||||
<!doctype html></head> <head>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| " "
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!doctype html></html> <head>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| " "
|
||||
|
||||
#data
|
||||
<!doctype html></body><meta>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <meta>
|
66
vendor/plugins/HTML5lib/tests/preamble.rb
vendored
66
vendor/plugins/HTML5lib/tests/preamble.rb
vendored
|
@ -1,23 +1,81 @@
|
|||
require 'test/unit'
|
||||
|
||||
HTML5LIB_BASE = File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__))))
|
||||
HTML5LIB_BASE = File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__))))
|
||||
|
||||
if File.exists?(File.join(HTML5LIB_BASE, 'testdata'))
|
||||
TESTDATA_DIR = File.join(HTML5LIB_BASE, 'testdata')
|
||||
else
|
||||
TESTDATA_DIR = File.join(File.dirname(File.dirname(File.expand_path(__FILE__))), 'testdata')
|
||||
end
|
||||
|
||||
$:.unshift File.join(File.dirname(File.dirname(__FILE__)),'lib')
|
||||
|
||||
$:.unshift File.dirname(__FILE__)
|
||||
|
||||
def html5lib_test_files(subdirectory)
|
||||
Dir[File.join(HTML5LIB_BASE, 'tests', subdirectory, '*.*')]
|
||||
Dir[File.join(TESTDATA_DIR, subdirectory, '*.*')]
|
||||
end
|
||||
|
||||
begin
|
||||
require 'jsonx'
|
||||
require 'rubygems'
|
||||
require 'json'
|
||||
rescue LoadError
|
||||
class JSON
|
||||
def self.parse json
|
||||
json.gsub! /"\s*:/, '"=>'
|
||||
json.gsub!(/"\s*:/, '"=>')
|
||||
json.gsub!(/\\u[0-9a-fA-F]{4}/) {|x| [x[2..-1].to_i(16)].pack('U')}
|
||||
null = nil
|
||||
eval json
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
module HTML5lib
|
||||
module TestSupport
|
||||
def self.startswith?(a, b)
|
||||
b[0... a.length] == a
|
||||
end
|
||||
|
||||
def self.parseTestcase(data)
|
||||
innerHTML = nil
|
||||
input = []
|
||||
output = []
|
||||
errors = []
|
||||
currentList = input
|
||||
data.split(/\n/).each do |line|
|
||||
if !line.empty? and !startswith?("#errors", line) and
|
||||
!startswith?("#document", line) and
|
||||
!startswith?("#data", line) and
|
||||
!startswith?("#document-fragment", line)
|
||||
|
||||
if currentList == output and startswith?("|", line)
|
||||
currentList.push(line[2..-1])
|
||||
else
|
||||
currentList.push(line)
|
||||
end
|
||||
elsif line == "#errors"
|
||||
currentList = errors
|
||||
elsif line == "#document" or startswith?("#document-fragment", line)
|
||||
if startswith?("#document-fragment", line)
|
||||
innerHTML = line[19..-1]
|
||||
raise AssertionError unless innerHTML
|
||||
end
|
||||
currentList = output
|
||||
end
|
||||
end
|
||||
return innerHTML, input.join("\n"), output.join("\n"), errors
|
||||
end
|
||||
|
||||
# convert the output of str(document) to the format used in the testcases
|
||||
def convertTreeDump(treedump)
|
||||
treedump.split(/\n/)[1..-1].map { |line| (line.length > 2 and line[0] == ?|) ? line[3..-1] : line }.join("\n")
|
||||
end
|
||||
|
||||
def sortattrs(output)
|
||||
output.gsub(/^(\s+)\w+=.*(\n\1\w+=.*)+/) do |match|
|
||||
match.split("\n").sort.join("\n")
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
|
|
37
vendor/plugins/HTML5lib/tests/test_encoding.rb
vendored
37
vendor/plugins/HTML5lib/tests/test_encoding.rb
vendored
|
@ -4,33 +4,32 @@ require 'html5lib/inputstream'
|
|||
|
||||
class Html5EncodingTestCase < Test::Unit::TestCase
|
||||
|
||||
begin
|
||||
begin
|
||||
require 'rubygems'
|
||||
require 'UniversalDetector'
|
||||
|
||||
def test_chardet
|
||||
File.open(File.join(HTML5LIB_BASE, 'tests', 'encoding', 'chardet', 'test_big5.txt')) do |file|
|
||||
stream = HTML5lib::HTMLInputStream.new(file, :chardet => true)
|
||||
assert_equal 'big5', stream.char_encoding.downcase
|
||||
end
|
||||
file = File.open(File.join(TESTDATA_DIR, 'encoding', 'chardet', 'test_big5.txt'), 'r')
|
||||
stream = HTML5lib::HTMLInputStream.new(file, :chardet => true)
|
||||
assert_equal 'big5', stream.char_encoding.downcase
|
||||
rescue LoadError
|
||||
puts "chardet not found, skipping chardet tests"
|
||||
end
|
||||
rescue LoadError
|
||||
puts "chardet not found, skipping chardet tests"
|
||||
end
|
||||
end
|
||||
|
||||
html5lib_test_files('encoding').each do |test_file|
|
||||
test_name = File.basename(test_file).sub('.dat', '').tr('-', '')
|
||||
html5lib_test_files('encoding').each do |test_file|
|
||||
test_name = File.basename(test_file).sub('.dat', '').tr('-', '')
|
||||
|
||||
File.read(test_file).split("#data\n").each_with_index do |data, index|
|
||||
next if data.empty?
|
||||
input, encoding = data.split(/\n#encoding\s+/, 2)
|
||||
encoding = encoding.split[0]
|
||||
File.read(test_file).split("#data\n").each_with_index do |data, index|
|
||||
next if data.empty?
|
||||
input, encoding = data.split(/\n#encoding\s+/, 2)
|
||||
encoding = encoding.split[0]
|
||||
|
||||
define_method 'test_%s_%d' % [ test_name, index + 1 ] do
|
||||
stream = HTML5lib::HTMLInputStream.new(input, :chardet => false)
|
||||
assert_equal encoding.downcase, stream.char_encoding.downcase, input
|
||||
end
|
||||
end
|
||||
define_method 'test_%s_%d' % [ test_name, index + 1 ] do
|
||||
stream = HTML5lib::HTMLInputStream.new(input, :chardet => false)
|
||||
assert_equal encoding.downcase, stream.char_encoding.downcase, input
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
|
4
vendor/plugins/HTML5lib/tests/test_lxp.rb
vendored
4
vendor/plugins/HTML5lib/tests/test_lxp.rb
vendored
|
@ -191,13 +191,13 @@ EOX
|
|||
end
|
||||
|
||||
def test_br
|
||||
assert_xhtml_equal <<EOX
|
||||
assert_xhtml_equal <<EOX1
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head><title>XLINK</title></head>
|
||||
<body>
|
||||
<br/>
|
||||
</body></html>
|
||||
EOX
|
||||
EOX1
|
||||
end
|
||||
|
||||
def xtest_strong
|
||||
|
|
63
vendor/plugins/HTML5lib/tests/test_parser.rb
vendored
63
vendor/plugins/HTML5lib/tests/test_parser.rb
vendored
|
@ -12,55 +12,14 @@ begin
|
|||
rescue LoadError
|
||||
end
|
||||
|
||||
$CHECK_PARSER_ERRORS = false
|
||||
$CHECK_PARSER_ERRORS = ARGV.delete('-p') # TODO
|
||||
|
||||
puts 'Testing: ' + $tree_types_to_test * ', '
|
||||
puts 'Testing tree builders: ' + $tree_types_to_test * ', '
|
||||
|
||||
|
||||
class Html5ParserTestCase < Test::Unit::TestCase
|
||||
|
||||
def self.startswith?(a, b)
|
||||
b[0... a.length] == a
|
||||
end
|
||||
|
||||
def self.parseTestcase(data)
|
||||
innerHTML = nil
|
||||
input = []
|
||||
output = []
|
||||
errors = []
|
||||
currentList = input
|
||||
data.split(/\n/).each do |line|
|
||||
if !line.empty? and !startswith?("#errors", line) and
|
||||
!startswith?("#document", line) and
|
||||
!startswith?("#data", line) and
|
||||
!startswith?("#document-fragment", line)
|
||||
|
||||
if currentList == output and startswith?("|", line)
|
||||
currentList.push(line[2..-1])
|
||||
else
|
||||
currentList.push(line)
|
||||
end
|
||||
elsif line == "#errors"
|
||||
currentList = errors
|
||||
elsif line == "#document" or startswith?("#document-fragment", line)
|
||||
if startswith?("#document-fragment", line)
|
||||
innerHTML = line[19..-1]
|
||||
raise AssertionError unless innerHTML
|
||||
end
|
||||
currentList = output
|
||||
end
|
||||
end
|
||||
return innerHTML, input.join("\n"), output.join("\n"), errors
|
||||
end
|
||||
|
||||
# convert the output of str(document) to the format used in the testcases
|
||||
def convertTreeDump(treedump)
|
||||
treedump.split(/\n/)[1..-1].map { |line| (line.length > 2 and line[0] == ?|) ? line[3..-1] : line }.join("\n")
|
||||
end
|
||||
|
||||
def sortattrs(output)
|
||||
output.gsub(/^(\s+)\w+=.*(\n\1\w+=.*)+/) { |match| match.split("\n").sort.join("\n") }
|
||||
end
|
||||
include HTML5lib
|
||||
include TestSupport
|
||||
|
||||
html5lib_test_files('tree-construction').each do |test_file|
|
||||
|
||||
|
@ -69,12 +28,13 @@ class Html5ParserTestCase < Test::Unit::TestCase
|
|||
File.read(test_file).split("#data\n").each_with_index do |data, index|
|
||||
next if data.empty?
|
||||
|
||||
innerHTML, input, expected_output, expected_errors = parseTestcase(data)
|
||||
innerHTML, input, expected_output, expected_errors =
|
||||
TestSupport.parseTestcase(data)
|
||||
|
||||
$tree_types_to_test.each do |tree_name|
|
||||
define_method 'test_%s_%d_%s' % [ test_name, index + 1, tree_name ] do
|
||||
|
||||
parser = HTML5lib::HTMLParser.new(:tree => HTML5lib::TreeBuilders.getTreeBuilder(tree_name))
|
||||
parser = HTMLParser.new(:tree => TreeBuilders[tree_name])
|
||||
|
||||
if innerHTML
|
||||
parser.parseFragment(input, innerHTML)
|
||||
|
@ -85,16 +45,17 @@ class Html5ParserTestCase < Test::Unit::TestCase
|
|||
actual_output = convertTreeDump(parser.tree.testSerializer(parser.tree.document))
|
||||
|
||||
assert_equal sortattrs(expected_output), sortattrs(actual_output), [
|
||||
'Input:', input,
|
||||
'Expected:', expected_output,
|
||||
'Recieved:', actual_output
|
||||
'', 'Input:', input,
|
||||
'', 'Expected:', expected_output,
|
||||
'', 'Recieved:', actual_output
|
||||
].join("\n")
|
||||
|
||||
if $CHECK_PARSER_ERRORS
|
||||
actual_errors = parser.errors.map do |(line, col), message|
|
||||
'Line: %i Col: %i %s' % [line, col, message]
|
||||
end
|
||||
assert_equal parser.errors.length, expected_errors.length, [
|
||||
assert_equal expected_errors.length, parser.errors.length, [
|
||||
'Input', input + "\n",
|
||||
'Expected errors:', expected_errors.join("\n"),
|
||||
'Actual errors:', actual_errors.join("\n")
|
||||
].join("\n")
|
||||
|
|
252
vendor/plugins/HTML5lib/tests/test_sanitizer.rb
vendored
252
vendor/plugins/HTML5lib/tests/test_sanitizer.rb
vendored
|
@ -2,209 +2,145 @@
|
|||
|
||||
require File.join(File.dirname(__FILE__), 'preamble')
|
||||
|
||||
require 'html5lib/sanitizer'
|
||||
require 'html5lib/html5parser'
|
||||
require 'html5lib/liberalxmlparser'
|
||||
require 'html5lib/treewalkers'
|
||||
require 'html5lib/serializer'
|
||||
require 'html5lib/sanitizer'
|
||||
|
||||
class SanitizeTest < Test::Unit::TestCase
|
||||
include HTML5lib
|
||||
|
||||
def sanitize_xhtml stream
|
||||
XHTMLParser.parseFragment(stream, :tokenizer => HTMLSanitizer).join('').gsub(/'/,'"')
|
||||
XHTMLParser.parseFragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8'}).to_s
|
||||
end
|
||||
|
||||
def sanitize_html stream
|
||||
HTMLParser.parseFragment(stream, :tokenizer => HTMLSanitizer).join('').gsub(/'/,'"')
|
||||
HTMLParser.parseFragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8'}).to_s
|
||||
end
|
||||
|
||||
def sanitize_rexml stream
|
||||
require 'rexml/document'
|
||||
doc = REXML::Document.new("<div xmlns='http://www.w3.org/1999/xhtml'>#{stream}</div>")
|
||||
tokens = TreeWalkers.getTreeWalker('rexml').new(doc)
|
||||
HTMLSerializer.serialize(tokens, {:encoding=>'utf-8',
|
||||
:quote_attr_values => true,
|
||||
:quote_char => "'",
|
||||
:minimize_boolean_attributes => false,
|
||||
:use_trailing_solidus => true,
|
||||
:omit_optional_tags => false,
|
||||
:inject_meta_charset => false,
|
||||
:sanitize => true}).gsub(/\A<div xmlns='http:\/\/www.w3.org\/1999\/xhtml'>(.*)<\/div>\Z/m, '\1')
|
||||
rescue REXML::ParseException
|
||||
return "Ill-formed XHTML!"
|
||||
end
|
||||
|
||||
def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
|
||||
assert_equal htmloutput, sanitize_html(input)
|
||||
assert_equal xhtmloutput, sanitize_xhtml(input)
|
||||
assert_equal rexmloutput, sanitize_rexml(input)
|
||||
end
|
||||
|
||||
HTMLSanitizer::ALLOWED_ELEMENTS.each do |tag_name|
|
||||
next if %w[caption col colgroup optgroup option table tbody td tfoot th thead tr].include?(tag_name) ### TODO
|
||||
define_method "test_should_allow_#{tag_name}_tag" do
|
||||
if tag_name == 'image'
|
||||
assert_equal "<img title=\"1\"/>foo <bad>bar</bad> baz",
|
||||
sanitize_html("<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>")
|
||||
input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
|
||||
htmloutput = "<#{tag_name.downcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.downcase}>"
|
||||
xhtmloutput = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
|
||||
rexmloutput = xhtmloutput
|
||||
|
||||
if %w[caption colgroup optgroup option tbody td tfoot th thead tr].include?(tag_name)
|
||||
htmloutput = "foo <bad>bar</bad> baz"
|
||||
xhtmloutput = htmloutput
|
||||
elsif tag_name == 'col'
|
||||
htmloutput = "foo <bad>bar</bad> baz"
|
||||
xhtmloutput = htmloutput
|
||||
rexmloutput = "<col title='1' />"
|
||||
elsif tag_name == 'table'
|
||||
htmloutput = "foo <bad>bar</bad>baz<table title='1'> </table>"
|
||||
xhtmloutput = htmloutput
|
||||
elsif tag_name == 'image'
|
||||
htmloutput = "<img title='1'/>foo <bad>bar</bad> baz"
|
||||
xhtmloutput = htmloutput
|
||||
rexmloutput = "<image title='1'>foo <bad>bar</bad> baz</image>"
|
||||
elsif VOID_ELEMENTS.include?(tag_name)
|
||||
assert_equal "<#{tag_name} title=\"1\"/>foo <bad>bar</bad> baz",
|
||||
sanitize_html("<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>")
|
||||
else
|
||||
assert_equal "<#{tag_name.downcase} title=\"1\">foo <bad>bar</bad> baz</#{tag_name.downcase}>",
|
||||
sanitize_html("<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>")
|
||||
assert_equal "<#{tag_name} title=\"1\">foo <bad>bar</bad> baz</#{tag_name}>",
|
||||
sanitize_xhtml("<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>")
|
||||
htmloutput = "<#{tag_name} title='1'/>foo <bad>bar</bad> baz"
|
||||
xhtmloutput = htmloutput
|
||||
htmloutput += '<br/>' if tag_name == 'br'
|
||||
rexmloutput = "<#{tag_name} title='1' />"
|
||||
end
|
||||
check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
|
||||
end
|
||||
end
|
||||
|
||||
HTMLSanitizer::ALLOWED_ELEMENTS.each do |tag_name|
|
||||
define_method "test_should_forbid_#{tag_name.upcase}_tag" do
|
||||
assert_equal "<#{tag_name.upcase} title=\"1\">foo <bad>bar</bad> baz</#{tag_name.upcase}>",
|
||||
sanitize_html("<#{tag_name.upcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.upcase}>")
|
||||
input = "<#{tag_name.upcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.upcase}>"
|
||||
output = "<#{tag_name.upcase} title=\"1\">foo <bad>bar</bad> baz</#{tag_name.upcase}>"
|
||||
check_sanitization(input, output, output, output)
|
||||
end
|
||||
end
|
||||
|
||||
HTMLSanitizer::ALLOWED_ATTRIBUTES.each do |attribute_name|
|
||||
next if attribute_name == 'style'
|
||||
define_method "test_should_allow_#{attribute_name}_attribute" do
|
||||
assert_equal "<p #{attribute_name.downcase}=\"foo\">foo <bad>bar</bad> baz</p>",
|
||||
sanitize_html("<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>")
|
||||
assert_equal "<p #{attribute_name}=\"foo\">foo <bad>bar</bad> baz</p>",
|
||||
sanitize_xhtml("<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>")
|
||||
input = "<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>"
|
||||
output = "<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>"
|
||||
htmloutput = "<p #{attribute_name.downcase}='foo'>foo <bad>bar</bad> baz</p>"
|
||||
check_sanitization(input, htmloutput, output, output)
|
||||
end
|
||||
end
|
||||
|
||||
HTMLSanitizer::ALLOWED_ATTRIBUTES.each do |attribute_name|
|
||||
define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do
|
||||
assert_equal "<p>foo <bad>bar</bad> baz</p>",
|
||||
sanitize_html("<p #{attribute_name.upcase}='display: none;'>foo <bad>bar</bad> baz</p>")
|
||||
input = "<p #{attribute_name.upcase}='display: none;'>foo <bad>bar</bad> baz</p>"
|
||||
output = "<p>foo <bad>bar</bad> baz</p>"
|
||||
check_sanitization(input, output, output, output)
|
||||
end
|
||||
end
|
||||
|
||||
HTMLSanitizer::ALLOWED_PROTOCOLS.each do |protocol|
|
||||
define_method "test_should_allow_#{protocol}_uris" do
|
||||
assert_equal "<a href=\"#{protocol}\">foo</a>",
|
||||
sanitize_html(%(<a href="#{protocol}">foo</a>))
|
||||
input = %(<a href="#{protocol}">foo</a>)
|
||||
output = "<a href='#{protocol}'>foo</a>"
|
||||
check_sanitization(input, output, output, output)
|
||||
end
|
||||
end
|
||||
|
||||
HTMLSanitizer::ALLOWED_PROTOCOLS.each do |protocol|
|
||||
define_method "test_should_allow_uppercase_#{protocol}_uris" do
|
||||
assert_equal "<a href=\"#{protocol.upcase}\">foo</a>",
|
||||
sanitize_html(%(<a href="#{protocol.upcase}">foo</a>))
|
||||
input = %(<a href="#{protocol.upcase}">foo</a>)
|
||||
output = "<a href='#{protocol.upcase}'>foo</a>"
|
||||
check_sanitization(input, output, output, output)
|
||||
end
|
||||
end
|
||||
|
||||
def test_should_allow_anchors
|
||||
assert_equal "<a href=\"foo\"><script>baz</script></a>",
|
||||
sanitize_html("<a href='foo' onclick='bar'><script>baz</script></a>")
|
||||
end
|
||||
|
||||
# RFC 3986, sec 4.2
|
||||
def test_allow_colons_in_path_component
|
||||
assert_equal "<a href=\"./this:that\">foo</a>",
|
||||
sanitize_html("<a href=\"./this:that\">foo</a>")
|
||||
end
|
||||
|
||||
%w(src width height alt).each do |img_attr|
|
||||
define_method "test_should_allow_image_#{img_attr}_attribute" do
|
||||
assert_equal "<img #{img_attr}=\"foo\"/>",
|
||||
sanitize_html("<img #{img_attr}='foo' onclick='bar' />")
|
||||
end
|
||||
end
|
||||
|
||||
def test_should_handle_non_html
|
||||
assert_equal 'abc', sanitize_html("abc")
|
||||
end
|
||||
|
||||
def test_should_handle_blank_text
|
||||
assert_equal '', sanitize_html('')
|
||||
end
|
||||
|
||||
[%w(img src), %w(a href)].each do |(tag, attr)|
|
||||
close = VOID_ELEMENTS.include?(tag) ? "/>boo" : ">boo</#{tag}>"
|
||||
|
||||
define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols" do
|
||||
assert_equal %(<#{tag} title="1"#{close}), sanitize_html(%(<#{tag} #{attr}="javascript:XSS" title="1">boo</#{tag}>))
|
||||
end
|
||||
|
||||
define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols_and_whitespace" do
|
||||
assert_equal %(<#{tag} title="1"#{close}), sanitize_html(%(<#{tag} #{attr}=" javascript:XSS" title="1">boo</#{tag}>))
|
||||
end
|
||||
end
|
||||
|
||||
[%(<img src="javascript:alert('XSS');" />),
|
||||
%(<img src=javascript:alert('XSS') />),
|
||||
%(<img src="JaVaScRiPt:alert('XSS')" />),
|
||||
%(<img src='javascript:alert("XSS")' />),
|
||||
%(<img src='javascript:alert(String.fromCharCode(88,83,83))' />),
|
||||
%(<img src='javascript:alert('XSS')' />),
|
||||
%(<img src='javascript:alert('XSS')' />),
|
||||
%(<img src='javascript:alert('XSS')' />),
|
||||
%(<img src="jav\tascript:alert('XSS');" />),
|
||||
%(<img src="jav	ascript:alert('XSS');" />),
|
||||
%(<img src="jav
ascript:alert('XSS');" />),
|
||||
%(<img src="jav
ascript:alert('XSS');" />),
|
||||
%(<img src="  javascript:alert('XSS');" />),
|
||||
%(<img src=" javascript:alert('XSS');" />),
|
||||
%(<img src=" javascript:alert('XSS');" />)].each_with_index do |img_hack, i|
|
||||
define_method "test_should_not_fall_for_xss_image_hack_#{i}" do
|
||||
assert_equal "<img/>", sanitize_html(img_hack)
|
||||
end
|
||||
end
|
||||
|
||||
def test_should_sanitize_tag_broken_up_by_null
|
||||
assert_equal "<scr\357\277\275ipt>alert(\"XSS\")</scr\357\277\275ipt>", sanitize_html(%(<scr\0ipt>alert(\"XSS\")</scr\0ipt>))
|
||||
end
|
||||
|
||||
def test_should_sanitize_invalid_script_tag
|
||||
assert_equal "<script XSS=\"\" SRC=\"http://ha.ckers.org/xss.js\"></script>", sanitize_html(%(<script/XSS SRC="http://ha.ckers.org/xss.js"></script>))
|
||||
end
|
||||
|
||||
def test_should_sanitize_script_tag_with_multiple_open_brackets
|
||||
assert_equal "<<script>alert(\"XSS\");//<</script>", sanitize_html(%(<<script>alert("XSS");//<</script>))
|
||||
assert_equal %(<iframe src=\"http://ha.ckers.org/scriptlet.html\"><), sanitize_html(%(<iframe src=http://ha.ckers.org/scriptlet.html\n<))
|
||||
end
|
||||
|
||||
def test_should_sanitize_unclosed_script
|
||||
assert_equal "<script src=\"http://ha.ckers.org/xss.js?\"><b/>", sanitize_html(%(<script src=http://ha.ckers.org/xss.js?<b>))
|
||||
end
|
||||
|
||||
def test_should_sanitize_half_open_scripts
|
||||
assert_equal "<img/>", sanitize_html(%(<img src="javascript:alert('XSS')"))
|
||||
end
|
||||
|
||||
def test_should_not_fall_for_ridiculous_hack
|
||||
img_hack = %(<img\nsrc\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n />)
|
||||
assert_equal "<img/>", sanitize_html(img_hack)
|
||||
end
|
||||
|
||||
def test_platypus
|
||||
assert_equal %(<a href=\"http://www.ragingplatypus.com/\" style=\"display: block; width: 100%; height: 100%; background-color: black; background-x: center; background-y: center;\">never trust your upstream platypus</a>),
|
||||
sanitize_html(%(<a href="http://www.ragingplatypus.com/" style="display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;">never trust your upstream platypus</a>))
|
||||
end
|
||||
|
||||
def test_xul
|
||||
assert_equal %(<p style="">fubar</p>),
|
||||
sanitize_html(%(<p style="-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')">fubar</p>))
|
||||
end
|
||||
|
||||
def test_input_image
|
||||
assert_equal %(<input type="image"/>),
|
||||
sanitize_html(%(<input type="image" src="javascript:alert('XSS');" />))
|
||||
end
|
||||
|
||||
def test_non_alpha_non_digit
|
||||
assert_equal "<script XSS=\"\" src=\"http://ha.ckers.org/xss.js\"></script>",
|
||||
sanitize_html(%(<script/XSS src="http://ha.ckers.org/xss.js"></script>))
|
||||
assert_equal "<a>foo</a>",
|
||||
sanitize_html('<a onclick!#$%&()*~+-_.,:;?@[/|\]^`=alert("XSS")>foo</a>')
|
||||
assert_equal "<img src=\"http://ha.ckers.org/xss.js\"/>",
|
||||
sanitize_html('<img/src="http://ha.ckers.org/xss.js"/>')
|
||||
end
|
||||
|
||||
def test_img_dynsrc_lowsrc
|
||||
assert_equal "<img/>",
|
||||
sanitize_html(%(<img dynsrc="javascript:alert('XSS')" />))
|
||||
assert_equal "<img/>",
|
||||
sanitize_html(%(<img lowsrc="javascript:alert('XSS')" />))
|
||||
end
|
||||
|
||||
def test_div_background_image_unicode_encoded
|
||||
assert_equal '<div style="">foo</div>',
|
||||
sanitize_html(%(<div style="background-image:\0075\0072\006C\0028'\006a\0061\0076\0061\0073\0063\0072\0069\0070\0074\003a\0061\006c\0065\0072\0074\0028.1027\0058.1053\0053\0027\0029'\0029">foo</div>))
|
||||
end
|
||||
|
||||
def test_div_expression
|
||||
assert_equal '<div style="">foo</div>',
|
||||
sanitize_html(%(<div style="width: expression(alert('XSS'));">foo</div>))
|
||||
end
|
||||
|
||||
def test_img_vbscript
|
||||
assert_equal '<img/>',
|
||||
sanitize_html(%(<img src='vbscript:msgbox("XSS")' />))
|
||||
end
|
||||
|
||||
def test_should_handle_astral_plane_characters
|
||||
assert_equal "<p>\360\235\222\265 \360\235\224\270</p>",
|
||||
sanitize_html("<p>𝒵 𝔸</p>")
|
||||
input = "<p>𝒵 𝔸</p>"
|
||||
output = "<p>\360\235\222\265 \360\235\224\270</p>"
|
||||
check_sanitization(input, output, output, output)
|
||||
|
||||
input = "<p><tspan>\360\235\224\270</tspan> a</p>"
|
||||
output = "<p><tspan>\360\235\224\270</tspan> a</p>"
|
||||
check_sanitization(input, output, output, output)
|
||||
end
|
||||
|
||||
# This affects only NS4. Is it worth fixing?
|
||||
# def test_javascript_includes
|
||||
# input = %(<div size="&{alert('XSS')}">foo</div>)
|
||||
# output = "<div>foo</div>"
|
||||
# check_sanitization(input, output, output, output)
|
||||
# end
|
||||
|
||||
html5lib_test_files('sanitizer').each do |filename|
|
||||
JSON::parse(open(filename).read).each do |test|
|
||||
define_method "test_#{test['name']}" do
|
||||
check_sanitization(
|
||||
test['input'],
|
||||
test['output'],
|
||||
test['xhtml'] || test['output'],
|
||||
test['rexml'] || test['output']
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
68
vendor/plugins/HTML5lib/tests/test_serializer.rb
vendored
Normal file
68
vendor/plugins/HTML5lib/tests/test_serializer.rb
vendored
Normal file
|
@ -0,0 +1,68 @@
|
|||
require File.join(File.dirname(__FILE__), 'preamble')
|
||||
|
||||
require 'html5lib/html5parser'
|
||||
require 'html5lib/serializer'
|
||||
require 'html5lib/treewalkers'
|
||||
|
||||
#Run the serialize error checks
|
||||
checkSerializeErrors = false
|
||||
|
||||
class JsonWalker < HTML5lib::TreeWalkers::Base
|
||||
def each
|
||||
@tree.each do |token|
|
||||
case token[0]
|
||||
when 'StartTag'
|
||||
yield startTag(token[1], token[2])
|
||||
when 'EndTag'
|
||||
yield endTag(token[1])
|
||||
when 'EmptyTag'
|
||||
yield emptyTag(token[1], token[2])
|
||||
when 'Comment'
|
||||
yield comment(token[1])
|
||||
when 'Characters', 'SpaceCharacters'
|
||||
text(token[1]) {|textToken| yield textToken}
|
||||
when 'Doctype'
|
||||
yield doctype(token[1])
|
||||
else
|
||||
raise "Unknown token type: " + token[0]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class Html5SerializeTestcase < Test::Unit::TestCase
|
||||
html5lib_test_files('serializer').each do |filename|
|
||||
test_name = File.basename(filename).sub('.test', '')
|
||||
tests = JSON::parse(open(filename).read)
|
||||
tests['tests'].each_with_index do |test, index|
|
||||
|
||||
define_method "test_#{test_name}_#{index+1}" do
|
||||
if test["options"] and test["options"]["encoding"]
|
||||
test["options"][:encoding] = test["options"]["encoding"]
|
||||
end
|
||||
|
||||
result = HTML5lib::HTMLSerializer.
|
||||
serialize(JsonWalker.new(test["input"]), (test["options"] || {}))
|
||||
expected = test["expected"]
|
||||
if expected.length == 1
|
||||
assert_equal(expected[0], result, test["description"])
|
||||
elsif !expected.include?(result)
|
||||
flunk("Expected: #{expected.inspect}, Received: #{result.inspect}")
|
||||
end
|
||||
|
||||
return if test_name == 'optionaltags'
|
||||
|
||||
result = HTML5lib::XHTMLSerializer.
|
||||
serialize(JsonWalker.new(test["input"]), (test["options"] || {}))
|
||||
expected = test["xhtml"] || test["expected"]
|
||||
if expected.length == 1
|
||||
assert_equal(expected[0], result, test["description"])
|
||||
elsif !expected.include?(result)
|
||||
flunk("Expected: #{expected.inspect}, Received: #{result.inspect}")
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
62
vendor/plugins/HTML5lib/tests/test_stream.rb
vendored
Executable file
62
vendor/plugins/HTML5lib/tests/test_stream.rb
vendored
Executable file
|
@ -0,0 +1,62 @@
|
|||
require File.join(File.dirname(__FILE__), 'preamble')
|
||||
|
||||
require 'html5lib/inputstream'
|
||||
|
||||
class HTMLInputStreamTest < Test::Unit::TestCase
|
||||
include HTML5lib
|
||||
|
||||
def test_char_ascii
|
||||
stream = HTMLInputStream.new("'", :encoding=>'ascii')
|
||||
assert_equal('ascii', stream.char_encoding)
|
||||
assert_equal("'", stream.char)
|
||||
end
|
||||
|
||||
def test_char_null
|
||||
stream = HTMLInputStream.new("\x00")
|
||||
assert_equal("\xef\xbf\xbd", stream.char)
|
||||
end
|
||||
|
||||
def test_char_utf8
|
||||
stream = HTMLInputStream.new("\xe2\x80\x98", :encoding=>'utf-8')
|
||||
assert_equal('utf-8', stream.char_encoding)
|
||||
assert_equal("\xe2\x80\x98", stream.char)
|
||||
end
|
||||
|
||||
def test_char_win1252
|
||||
stream = HTMLInputStream.new("\xa2\xc5\xf1\x92\x86")
|
||||
assert_equal('windows-1252', stream.char_encoding)
|
||||
assert_equal("\xc2\xa2", stream.char)
|
||||
assert_equal("\xc3\x85", stream.char)
|
||||
assert_equal("\xc3\xb1", stream.char)
|
||||
assert_equal("\xe2\x80\x99", stream.char)
|
||||
assert_equal("\xe2\x80\xa0", stream.char)
|
||||
end
|
||||
|
||||
def test_bom
|
||||
stream = HTMLInputStream.new("\xef\xbb\xbf" + "'")
|
||||
assert_equal('utf-8', stream.char_encoding)
|
||||
assert_equal("'", stream.char)
|
||||
end
|
||||
|
||||
begin
|
||||
require 'iconv'
|
||||
|
||||
def test_utf_16
|
||||
stream = HTMLInputStream.new("\xff\xfe" + " \x00"*1025)
|
||||
assert(stream.char_encoding, 'utf-16-le')
|
||||
assert_equal(1025, stream.chars_until(' ',true).length)
|
||||
end
|
||||
rescue LoadError
|
||||
puts "iconv not found, skipping iconv tests"
|
||||
end
|
||||
|
||||
def test_newlines
|
||||
stream = HTMLInputStream.new("\xef\xbb\xbf" + "a\nbb\r\nccc\rdddd")
|
||||
assert_equal([1,0], stream.position)
|
||||
assert_equal("a\nbb\n", stream.chars_until('c'))
|
||||
assert_equal([3,0], stream.position)
|
||||
assert_equal("ccc\ndddd", stream.chars_until('x'))
|
||||
assert_equal([4,4], stream.position)
|
||||
assert_equal([1,2,3], stream.instance_eval {@line_lengths})
|
||||
end
|
||||
end
|
|
@ -30,9 +30,10 @@ class Html5TokenizerTestCase < Test::Unit::TestCase
|
|||
def tokenizer_test(data)
|
||||
(data['contentModelFlags'] || [:PCDATA]).each do |content_model_flag|
|
||||
message = [
|
||||
'Description:', data['description'],
|
||||
'Input:', data['input'],
|
||||
'Content Model Flag:', content_model_flag ] * "\n"
|
||||
'', 'Description:', data['description'],
|
||||
'', 'Input:', data['input'],
|
||||
'', 'Content Model Flag:', content_model_flag,
|
||||
'' ] * "\n"
|
||||
|
||||
assert_nothing_raised message do
|
||||
tokenizer = HTML5lib::HTMLTokenizer.new(data['input'])
|
||||
|
|
113
vendor/plugins/HTML5lib/tests/test_treewalkers.rb
vendored
Normal file
113
vendor/plugins/HTML5lib/tests/test_treewalkers.rb
vendored
Normal file
|
@ -0,0 +1,113 @@
|
|||
require File.join(File.dirname(__FILE__), 'preamble')
|
||||
|
||||
require 'html5lib/html5parser'
|
||||
require 'html5lib/treewalkers'
|
||||
require 'html5lib/treebuilders'
|
||||
|
||||
$tree_types_to_test = {
|
||||
'simpletree' =>
|
||||
{:builder => HTML5lib::TreeBuilders['simpletree'],
|
||||
:walker => HTML5lib::TreeWalkers['simpletree']},
|
||||
'rexml' =>
|
||||
{:builder => HTML5lib::TreeBuilders['rexml'],
|
||||
:walker => HTML5lib::TreeWalkers['rexml']},
|
||||
'hpricot' =>
|
||||
{:builder => HTML5lib::TreeBuilders['hpricot'],
|
||||
:walker => HTML5lib::TreeWalkers['hpricot']},
|
||||
}
|
||||
|
||||
puts 'Testing tree walkers: ' + $tree_types_to_test.keys * ', '
|
||||
|
||||
class TestTreeWalkers < Test::Unit::TestCase
|
||||
include HTML5lib::TestSupport
|
||||
|
||||
def concatenateCharacterTokens(tokens)
|
||||
charactersToken = nil
|
||||
for token in tokens
|
||||
type = token[:type]
|
||||
if [:Characters, :SpaceCharacters].include?(type)
|
||||
if charactersToken == nil
|
||||
charactersToken = {:type => :Characters, :data => token[:data]}
|
||||
else
|
||||
charactersToken[:data] += token[:data]
|
||||
end
|
||||
else
|
||||
if charactersToken != nil
|
||||
yield charactersToken
|
||||
charactersToken = nil
|
||||
end
|
||||
yield token
|
||||
end
|
||||
end
|
||||
yield charactersToken if charactersToken != nil
|
||||
end
|
||||
|
||||
def convertTokens(tokens)
|
||||
output = []
|
||||
indent = 0
|
||||
concatenateCharacterTokens(tokens) do |token|
|
||||
case token[:type]
|
||||
when :StartTag, :EmptyTag
|
||||
output << "#{' '*indent}<#{token[:name]}>"
|
||||
indent += 2
|
||||
for name, value in token[:data].to_a.sort
|
||||
next if name=='xmlns'
|
||||
output << "#{' '*indent}#{name}=\"#{value}\""
|
||||
end
|
||||
indent -= 2 if token[:type] == :EmptyTag
|
||||
when :EndTag
|
||||
indent -= 2
|
||||
when :Comment
|
||||
output << "#{' '*indent}<!-- #{token[:data]} -->"
|
||||
when :Doctype
|
||||
output << "#{' '*indent}<!DOCTYPE #{token[:name]}>"
|
||||
when :Characters, :SpaceCharacters
|
||||
output << "#{' '*indent}\"#{token[:data]}\""
|
||||
else
|
||||
# TODO: what to do with errors?
|
||||
end
|
||||
end
|
||||
return output.join("\n")
|
||||
end
|
||||
|
||||
html5lib_test_files('tree-construction').each do |test_file|
|
||||
|
||||
test_name = File.basename(test_file).sub('.dat', '')
|
||||
next if test_name == 'tests5' # TODO
|
||||
|
||||
File.read(test_file).split("#data\n").each_with_index do |data, index|
|
||||
next if data.empty?
|
||||
|
||||
innerHTML, input, expected_output, expected_errors =
|
||||
HTML5lib::TestSupport::parseTestcase(data)
|
||||
|
||||
$tree_types_to_test.each do |tree_name, tree_class|
|
||||
|
||||
define_method "test_#{test_name}_#{index}_#{tree_name}" do
|
||||
|
||||
parser = HTML5lib::HTMLParser.new(:tree => tree_class[:builder])
|
||||
|
||||
if innerHTML
|
||||
parser.parseFragment(input, innerHTML)
|
||||
else
|
||||
parser.parse(input)
|
||||
end
|
||||
|
||||
document = parser.tree.getDocument
|
||||
|
||||
begin
|
||||
output = sortattrs(convertTokens(tree_class[:walker].new(document)))
|
||||
expected = sortattrs(expected_output)
|
||||
assert_equal expected, output, [
|
||||
'', 'Input:', input,
|
||||
'', 'Expected:', expected,
|
||||
'', 'Recieved:', output
|
||||
].join("\n")
|
||||
rescue NotImplementedError
|
||||
# Amnesty for those that confess...
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -11,14 +11,15 @@ class TokenizerTestParser
|
|||
debug = nil
|
||||
for token in @tokenizer
|
||||
debug = token.inspect if token[:type] == :ParseError
|
||||
send ('process' + token[:type].to_s), token
|
||||
send(('process' + token[:type].to_s), token)
|
||||
end
|
||||
|
||||
return @outputTokens
|
||||
end
|
||||
|
||||
def processDoctype(token)
|
||||
@outputTokens.push(["DOCTYPE", token[:name], token[:data]])
|
||||
@outputTokens.push(["DOCTYPE", token[:name], token[:publicId],
|
||||
token[:systemId], token[:correct]])
|
||||
end
|
||||
|
||||
def processStartTag(token)
|
||||
|
|
|
@ -7,6 +7,7 @@ module ActionController
|
|||
end
|
||||
|
||||
module Caching
|
||||
|
||||
module Actions
|
||||
|
||||
# All documentation is keeping DRY in the plugin's README
|
||||
|
@ -17,7 +18,7 @@ module ActionController
|
|||
end
|
||||
|
||||
def expire_one_action(options)
|
||||
expire_fragment(Regexp.new(".*/" + ActionCachePath.path_for(self, options) + ".*"))
|
||||
expire_fragment(Regexp.new(".*/" + Regexp.escape(ActionCachePath.path_for(self, options)) + ".*"))
|
||||
end
|
||||
|
||||
def expire_action(options = {})
|
||||
|
@ -134,7 +135,7 @@ module ActionController
|
|||
controller.response.headers['Cache-Control'] == 'no-cache'
|
||||
controller.response.headers['Cache-Control'] = "max-age=#{controller.response.time_to_live}"
|
||||
end
|
||||
controller.response.headers['Etag'] = "\"#{MD5.new(controller.response.body).to_s}\""
|
||||
controller.response.headers['Etag'] = %{"#{MD5.new(controller.response.body).to_s}"}
|
||||
controller.response.headers['Last-Modified'] ||= Time.now.httpdate
|
||||
end
|
||||
|
||||
|
@ -147,7 +148,7 @@ module ActionController
|
|||
|
||||
def send_not_modified(controller)
|
||||
controller.logger.info "Send Not Modified"
|
||||
controller.response.headers['Etag'] = "\"#{MD5.new(fragment_body(controller)).to_s}\""
|
||||
controller.response.headers['Etag'] = %{"#{MD5.new(fragment_body(controller)).to_s}"}
|
||||
controller.render(:text => "", :status => 304)
|
||||
end
|
||||
|
||||
|
|
|
@ -154,6 +154,22 @@ Example:
|
|||
CSS: style.css math.css
|
||||
|
||||
=end
|
||||
# Render to an HTML fragment (returns a REXML document tree)
|
||||
def to_html_tree
|
||||
div = Element.new 'div'
|
||||
div.attributes['class'] = 'maruku_wrapper_div'
|
||||
children_to_html.each do |e|
|
||||
div << e
|
||||
end
|
||||
|
||||
# render footnotes
|
||||
if @doc.footnotes_order.size > 0
|
||||
div << render_footnotes
|
||||
end
|
||||
|
||||
doc = Document.new(nil,{:respect_whitespace =>:all})
|
||||
doc << div
|
||||
end
|
||||
|
||||
# Render to a complete HTML document (returns a REXML document tree)
|
||||
def to_html_document_tree
|
||||
|
|
|
@ -365,7 +365,7 @@ Otherwise, a standard `verbatim` environment is used.
|
|||
color = get_setting(:code_background_color)
|
||||
colorspec = latex_color(color, 'colorbox')
|
||||
|
||||
"#{colorspec}{\\tt #{s}}"
|
||||
"{#{colorspec}{\\tt #{s}}}"
|
||||
end
|
||||
|
||||
def to_latex_immediate_link
|
||||
|
|
Loading…
Reference in a new issue