Upgrade to Rails 2.0.2

Upgraded to Rails 2.0.2, except that we maintain

   vendor/rails/actionpack/lib/action_controller/routing.rb

from Rail 1.2.6 (at least for now), so that Routes don't change. We still
get to enjoy Rails's many new features.

Also fixed a bug in Chunk-handling: disable WikiWord processing in tags (for real this time).
This commit is contained in:
Jacques Distler 2007-12-21 01:48:59 -06:00
parent 0f6889e09f
commit 6873fc8026
1083 changed files with 52810 additions and 41058 deletions

View file

@ -1,9 +1,9 @@
require File.dirname(__FILE__) + '/tokenizer'
require File.dirname(__FILE__) + '/node'
require File.dirname(__FILE__) + '/selector'
require 'html/tokenizer'
require 'html/node'
require 'html/selector'
require 'html/sanitizer'
module HTML #:nodoc:
# A top-level HTMl document. You give it a body of text, and it will parse that
# text into a tree of nodes.
class Document #:nodoc:
@ -23,6 +23,9 @@ module HTML #:nodoc:
if node.tag?
if node_stack.length > 1 && node.closing == :close
if node_stack.last.name == node.name
if node_stack.last.children.empty?
node_stack.last.children << Text.new(node_stack.last, node.line, node.position, "")
end
node_stack.pop
else
open_start = node_stack.last.position - 20

View file

@ -0,0 +1,173 @@
module HTML
class Sanitizer
def sanitize(text, options = {})
return text unless sanitizeable?(text)
tokenize(text, options).join
end
def sanitizeable?(text)
!(text.nil? || text.empty? || !text.index("<"))
end
protected
def tokenize(text, options)
tokenizer = HTML::Tokenizer.new(text)
result = []
while token = tokenizer.next
node = Node.parse(nil, 0, 0, token, false)
process_node node, result, options
end
result
end
def process_node(node, result, options)
result << node.to_s
end
end
class FullSanitizer < Sanitizer
def sanitize(text, options = {})
result = super
# strip any comments, and if they have a newline at the end (ie. line with
# only a comment) strip that too
result.gsub!(/<!--(.*?)-->[\n]?/m, "") if result
# Recurse - handle all dirty nested tags
result == text ? result : sanitize(result, options)
end
def process_node(node, result, options)
result << node.to_s if node.class == HTML::Text
end
end
class LinkSanitizer < FullSanitizer
cattr_accessor :included_tags, :instance_writer => false
self.included_tags = Set.new(%w(a href))
def sanitizeable?(text)
!(text.nil? || text.empty? || !((text.index("<a") || text.index("<href")) && text.index(">")))
end
protected
def process_node(node, result, options)
result << node.to_s unless node.is_a?(HTML::Tag) && included_tags.include?(node.name)
end
end
class WhiteListSanitizer < Sanitizer
[:protocol_separator, :uri_attributes, :allowed_attributes, :allowed_tags, :allowed_protocols, :bad_tags,
:allowed_css_properties, :allowed_css_keywords, :shorthand_css_properties].each do |attr|
class_inheritable_accessor attr, :instance_writer => false
end
# A regular expression of the valid characters used to separate protocols like
# the ':' in 'http://foo.com'
self.protocol_separator = /:|(&#0*58)|(&#x70)|(%|&#37;)3A/
# Specifies a Set of HTML attributes that can have URIs.
self.uri_attributes = Set.new(%w(href src cite action longdesc xlink:href lowsrc))
# Specifies a Set of 'bad' tags that the #sanitize helper will remove completely, as opposed
# to just escaping harmless tags like &lt;font&gt;
self.bad_tags = Set.new(%w(script))
# Specifies the default Set of tags that the #sanitize helper will allow unscathed.
self.allowed_tags = Set.new(%w(strong em b i p code pre tt samp kbd var sub
sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dt dd abbr
acronym a img blockquote del ins))
# Specifies the default Set of html attributes that the #sanitize helper will leave
# in the allowed tag.
self.allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr))
# Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept.
self.allowed_protocols = Set.new(%w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto
feed svn urn aim rsync tag ssh sftp rtsp afs))
# Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept.
self.allowed_css_properties = Set.new(%w(azimuth background-color border-bottom-color border-collapse
border-color border-left-color border-right-color border-top-color clear color cursor direction display
elevation float font font-family font-size font-style font-variant font-weight height letter-spacing line-height
overflow pause pause-after pause-before pitch pitch-range richness speak speak-header speak-numeral speak-punctuation
speech-rate stress text-align text-decoration text-indent unicode-bidi vertical-align voice-family volume white-space
width))
# Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept.
self.allowed_css_keywords = Set.new(%w(auto aqua black block blue bold both bottom brown center
collapse dashed dotted fuchsia gray green !important italic left lime maroon medium none navy normal
nowrap olive pointer purple red right solid silver teal top transparent underline white yellow))
# Specifies the default Set of allowed shorthand css properties for the #sanitize and #sanitize_css helpers.
self.shorthand_css_properties = Set.new(%w(background border margin padding))
# Sanitizes a block of css code. Used by #sanitize when it comes across a style attribute
def sanitize_css(style)
# disallow urls
style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ')
# gauntlet
if style !~ /^([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*$/ ||
style !~ /^(\s*[-\w]+\s*:\s*[^:;]*(;|$))*$/
return ''
end
clean = []
style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop,val|
if allowed_css_properties.include?(prop.downcase)
clean << prop + ': ' + val + ';'
elsif shorthand_css_properties.include?(prop.split('-')[0].downcase)
unless val.split().any? do |keyword|
!allowed_css_keywords.include?(keyword) &&
keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/
end
clean << prop + ': ' + val + ';'
end
end
end
clean.join(' ')
end
protected
def tokenize(text, options)
options[:parent] = []
options[:attributes] ||= allowed_attributes
options[:tags] ||= allowed_tags
super
end
def process_node(node, result, options)
result << case node
when HTML::Tag
if node.closing == :close
options[:parent].shift
else
options[:parent].unshift node.name
end
process_attributes_for node, options
options[:tags].include?(node.name) ? node : nil
else
bad_tags.include?(options[:parent].first) ? nil : node.to_s.gsub(/</, "&lt;")
end
end
def process_attributes_for(node, options)
return unless node.attributes
node.attributes.keys.each do |attr_name|
value = node.attributes[attr_name].to_s
if !options[:attributes].include?(attr_name) || contains_bad_protocols?(attr_name, value)
node.attributes.delete(attr_name)
else
node.attributes[attr_name] = attr_name == 'style' ? sanitize_css(value) : CGI::escapeHTML(value)
end
end
end
def contains_bad_protocols?(attr_name, value)
uri_attributes.include?(attr_name) &&
(value =~ /(^[^\/:]*):|(&#0*58)|(&#x70)|(%|&#37;)3A/ && !allowed_protocols.include?(value.split(protocol_separator).first))
end
end
end

View file

@ -240,19 +240,24 @@ module HTML
raise ArgumentError, "CSS expression cannot be empty" if selector.empty?
@source = ""
values = values[0] if values.size == 1 && values[0].is_a?(Array)
# We need a copy to determine if we failed to parse, and also
# preserve the original pass by-ref statement.
statement = selector.strip.dup
# Create a simple selector, along with negation.
simple_selector(statement, values).each { |name, value| instance_variable_set("@#{name}", value) }
@alternates = []
@depends = nil
# Alternative selector.
if statement.sub!(/^\s*,\s*/, "")
second = Selector.new(statement, values)
(@alternates ||= []) << second
@alternates << second
# If there are alternate selectors, we group them in the top selector.
if alternates = second.instance_variable_get(:@alternates)
second.instance_variable_set(:@alternates, nil)
second.instance_variable_set(:@alternates, [])
@alternates.concat alternates
end
@source << " , " << second.to_s
@ -412,7 +417,7 @@ module HTML
# If this selector is part of the group, try all the alternative
# selectors (unless first_only).
if @alternates && (!first_only || !matches)
if !first_only || !matches
@alternates.each do |alternate|
break if matches && first_only
if subset = alternate.match(element, first_only)
@ -789,15 +794,15 @@ module HTML
# eventually, and array of substitution values.
#
# This method is called from four places, so it helps to put it here
# for resue. The only logic deals with the need to detect comma
# for reuse. The only logic deals with the need to detect comma
# separators (alternate) and apply them to the selector group of the
# top selector.
def next_selector(statement, values)
second = Selector.new(statement, values)
# If there are alternate selectors, we group them in the top selector.
if alternates = second.instance_variable_get(:@alternates)
second.instance_variable_set(:@alternates, nil)
(@alternates ||= []).concat alternates
second.instance_variable_set(:@alternates, [])
@alternates.concat alternates
end
second
end

View file

@ -1,97 +0,0 @@
require 'rexml/document'
# SimpleXML like xml parser. Written by leon breet from the ruby on rails Mailing list
class XmlNode #:nodoc:
attr :node
def initialize(node, options = {})
@node = node
@children = {}
@raise_errors = options[:raise_errors]
end
def self.from_xml(xml_or_io)
document = REXML::Document.new(xml_or_io)
if document.root
XmlNode.new(document.root)
else
XmlNode.new(document)
end
end
def node_encoding
@node.encoding
end
def node_name
@node.name
end
def node_value
@node.text
end
def node_value=(value)
@node.text = value
end
def xpath(expr)
matches = nil
REXML::XPath.each(@node, expr) do |element|
matches ||= XmlNodeList.new
matches << (@children[element] ||= XmlNode.new(element))
end
matches
end
def method_missing(name, *args)
name = name.to_s
nodes = nil
@node.each_element(name) do |element|
nodes ||= XmlNodeList.new
nodes << (@children[element] ||= XmlNode.new(element))
end
nodes
end
def <<(node)
if node.is_a? REXML::Node
child = node
elsif node.respond_to? :node
child = node.node
end
@node.add_element child
@children[child] ||= XmlNode.new(child)
end
def [](name)
@node.attributes[name.to_s]
end
def []=(name, value)
@node.attributes[name.to_s] = value
end
def to_s
@node.to_s
end
def to_i
to_s.to_i
end
end
class XmlNodeList < Array #:nodoc:
def [](i)
i.is_a?(String) ? super(0)[i] : super(i)
end
def []=(i, value)
i.is_a?(String) ? self[0][i] = value : super(i, value)
end
def method_missing(name, *args)
name = name.to_s
self[0].__send__(name, *args)
end
end

File diff suppressed because it is too large Load diff