Upgrade to Rails 2.0.2

Upgraded to Rails 2.0.2, except that we maintain vendor/rails/actionpack/lib/action_controller/routing.rb from Rail 1.2.6 (at least for now), so that Routes don't change. We still get to enjoy Rails's many new features. Also fixed a bug in Chunk-handling: disable WikiWord processing in tags (for real this time).
2007-12-21 01:48:59 -06:00 · 2007-12-21 01:48:59 -06:00 · 6873fc8026
commit 6873fc8026
parent 0f6889e09f
1083 changed files with 52810 additions and 41058 deletions
--- a/vendor/rails/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb
+++ b/vendor/rails/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb
@ -1,9 +1,9 @@
-require File.dirname(__FILE__) + '/tokenizer'
-require File.dirname(__FILE__) + '/node'
-require File.dirname(__FILE__) + '/selector'
+require 'html/tokenizer'
+require 'html/node'
+require 'html/selector'
+require 'html/sanitizer'

 module HTML #:nodoc:
-  
  # A top-level HTMl document. You give it a body of text, and it will parse that
  # text into a tree of nodes.
  class Document #:nodoc:
@ -23,6 +23,9 @@ module HTML #:nodoc:
        if node.tag?
          if node_stack.length > 1 && node.closing == :close
            if node_stack.last.name == node.name
+              if node_stack.last.children.empty?
+                node_stack.last.children << Text.new(node_stack.last, node.line, node.position, "")
+              end
              node_stack.pop
            else
              open_start = node_stack.last.position - 20
--- a/vendor/rails/actionpack/lib/action_controller/vendor/html-scanner/html/sanitizer.rb
+++ b/vendor/rails/actionpack/lib/action_controller/vendor/html-scanner/html/sanitizer.rb
@ -0,0 +1,173 @@
+module HTML
+  class Sanitizer
+    def sanitize(text, options = {})
+      return text unless sanitizeable?(text)
+      tokenize(text, options).join
+    end
+    
+    def sanitizeable?(text)
+      !(text.nil? || text.empty? || !text.index("<"))
+    end
+    
+  protected
+    def tokenize(text, options)
+      tokenizer = HTML::Tokenizer.new(text)
+      result = []
+      while token = tokenizer.next
+        node = Node.parse(nil, 0, 0, token, false)
+        process_node node, result, options
+      end
+      result
+    end
+    
+    def process_node(node, result, options)
+      result << node.to_s
+    end
+  end
+  
+  class FullSanitizer < Sanitizer
+    def sanitize(text, options = {})
+      result = super
+      # strip any comments, and if they have a newline at the end (ie. line with
+      # only a comment) strip that too
+      result.gsub!(/<!--(.*?)-->[\n]?/m, "") if result
+      # Recurse - handle all dirty nested tags
+      result == text ? result : sanitize(result, options)
+    end
+    
+    def process_node(node, result, options)
+      result << node.to_s if node.class == HTML::Text
+    end
+  end
+  
+  class LinkSanitizer < FullSanitizer
+    cattr_accessor :included_tags, :instance_writer => false
+    self.included_tags = Set.new(%w(a href))
+
+    def sanitizeable?(text)
+      !(text.nil? || text.empty? || !((text.index("<a") || text.index("<href")) && text.index(">")))
+    end
+    
+  protected
+    def process_node(node, result, options)
+      result << node.to_s unless node.is_a?(HTML::Tag) && included_tags.include?(node.name) 
+    end
+  end
+  
+  class WhiteListSanitizer < Sanitizer
+    [:protocol_separator, :uri_attributes, :allowed_attributes, :allowed_tags, :allowed_protocols, :bad_tags,
+     :allowed_css_properties, :allowed_css_keywords, :shorthand_css_properties].each do |attr|
+      class_inheritable_accessor attr, :instance_writer => false
+    end
+
+    # A regular expression of the valid characters used to separate protocols like
+    # the ':' in 'http://foo.com'
+    self.protocol_separator     = /:|(&#0*58)|(&#x70)|(%|&#37;)3A/
+    
+    # Specifies a Set of HTML attributes that can have URIs.
+    self.uri_attributes         = Set.new(%w(href src cite action longdesc xlink:href lowsrc))
+
+    # Specifies a Set of 'bad' tags that the #sanitize helper will remove completely, as opposed
+    # to just escaping harmless tags like &lt;font&gt;
+    self.bad_tags               = Set.new(%w(script))
+    
+    # Specifies the default Set of tags that the #sanitize helper will allow unscathed.
+    self.allowed_tags           = Set.new(%w(strong em b i p code pre tt samp kbd var sub 
+      sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dt dd abbr 
+      acronym a img blockquote del ins))
+
+    # Specifies the default Set of html attributes that the #sanitize helper will leave 
+    # in the allowed tag.
+    self.allowed_attributes     = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr))
+    
+    # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept.
+    self.allowed_protocols      = Set.new(%w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto 
+      feed svn urn aim rsync tag ssh sftp rtsp afs))
+    
+    # Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept.
+    self.allowed_css_properties = Set.new(%w(azimuth background-color border-bottom-color border-collapse 
+      border-color border-left-color border-right-color border-top-color clear color cursor direction display 
+      elevation float font font-family font-size font-style font-variant font-weight height letter-spacing line-height
+      overflow pause pause-after pause-before pitch pitch-range richness speak speak-header speak-numeral speak-punctuation
+      speech-rate stress text-align text-decoration text-indent unicode-bidi vertical-align voice-family volume white-space
+      width))
+  
+    # Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept.
+    self.allowed_css_keywords   = Set.new(%w(auto aqua black block blue bold both bottom brown center
+      collapse dashed dotted fuchsia gray green !important italic left lime maroon medium none navy normal
+      nowrap olive pointer purple red right solid silver teal top transparent underline white yellow))
+
+    # Specifies the default Set of allowed shorthand css properties for the #sanitize and #sanitize_css helpers.
+    self.shorthand_css_properties = Set.new(%w(background border margin padding))
+
+    # Sanitizes a block of css code.  Used by #sanitize when it comes across a style attribute
+    def sanitize_css(style)
+      # disallow urls
+      style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ')
+
+      # gauntlet
+      if style !~ /^([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*$/ ||
+          style !~ /^(\s*[-\w]+\s*:\s*[^:;]*(;|$))*$/
+        return ''
+      end
+
+      clean = []
+      style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop,val|
+        if allowed_css_properties.include?(prop.downcase)
+          clean <<  prop + ': ' + val + ';'
+        elsif shorthand_css_properties.include?(prop.split('-')[0].downcase) 
+          unless val.split().any? do |keyword|
+            !allowed_css_keywords.include?(keyword) && 
+              keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/
+          end
+            clean << prop + ': ' + val + ';'
+          end
+        end
+      end
+      clean.join(' ')
+    end
+
+  protected
+    def tokenize(text, options)
+      options[:parent] = []
+      options[:attributes] ||= allowed_attributes
+      options[:tags]       ||= allowed_tags
+      super
+    end
+
+    def process_node(node, result, options)
+      result << case node
+        when HTML::Tag
+          if node.closing == :close
+            options[:parent].shift
+          else
+            options[:parent].unshift node.name
+          end
+          
+          process_attributes_for node, options
+
+          options[:tags].include?(node.name) ? node : nil
+        else
+          bad_tags.include?(options[:parent].first) ? nil : node.to_s.gsub(/</, "&lt;")
+      end
+    end
+    
+    def process_attributes_for(node, options)
+      return unless node.attributes
+      node.attributes.keys.each do |attr_name|
+        value = node.attributes[attr_name].to_s
+
+        if !options[:attributes].include?(attr_name) || contains_bad_protocols?(attr_name, value)
+          node.attributes.delete(attr_name)
+        else
+          node.attributes[attr_name] = attr_name == 'style' ? sanitize_css(value) : CGI::escapeHTML(value)
+        end
+      end
+    end
+
+    def contains_bad_protocols?(attr_name, value)
+      uri_attributes.include?(attr_name) && 
+      (value =~ /(^[^\/:]*):|(&#0*58)|(&#x70)|(%|&#37;)3A/ && !allowed_protocols.include?(value.split(protocol_separator).first))
+    end
+  end
+end
--- a/vendor/rails/actionpack/lib/action_controller/vendor/html-scanner/html/selector.rb
+++ b/vendor/rails/actionpack/lib/action_controller/vendor/html-scanner/html/selector.rb
@ -240,19 +240,24 @@ module HTML
      raise ArgumentError, "CSS expression cannot be empty" if selector.empty?
      @source = ""
      values = values[0] if values.size == 1 && values[0].is_a?(Array)
+
      # We need a copy to determine if we failed to parse, and also
      # preserve the original pass by-ref statement.
      statement = selector.strip.dup
+
      # Create a simple selector, along with negation.
      simple_selector(statement, values).each { |name, value| instance_variable_set("@#{name}", value) }

+      @alternates = []
+      @depends = nil
+
      # Alternative selector.
      if statement.sub!(/^\s*,\s*/, "")
        second = Selector.new(statement, values)
-        (@alternates ||= []) << second
+        @alternates << second
        # If there are alternate selectors, we group them in the top selector.
        if alternates = second.instance_variable_get(:@alternates)
-          second.instance_variable_set(:@alternates, nil)
+          second.instance_variable_set(:@alternates, [])
          @alternates.concat alternates
        end
        @source << " , " << second.to_s
@ -412,7 +417,7 @@ module HTML

      # If this selector is part of the group, try all the alternative
      # selectors (unless first_only).
-      if @alternates && (!first_only || !matches)
+      if !first_only || !matches
        @alternates.each do |alternate|
          break if matches && first_only
          if subset = alternate.match(element, first_only)
@ -789,15 +794,15 @@ module HTML
    # eventually, and array of substitution values.
    #
    # This method is called from four places, so it helps to put it here
-    # for resue. The only logic deals with the need to detect comma
+    # for reuse. The only logic deals with the need to detect comma
    # separators (alternate) and apply them to the selector group of the
    # top selector.
    def next_selector(statement, values)
      second = Selector.new(statement, values)
      # If there are alternate selectors, we group them in the top selector.
      if alternates = second.instance_variable_get(:@alternates)
-        second.instance_variable_set(:@alternates, nil)
-        (@alternates ||= []).concat alternates
+        second.instance_variable_set(:@alternates, [])
+        @alternates.concat alternates
      end
      second
    end
--- a/vendor/rails/actionpack/lib/action_controller/vendor/xml_node.rb
+++ b/vendor/rails/actionpack/lib/action_controller/vendor/xml_node.rb
@ -1,97 +0,0 @@
-require 'rexml/document'
-
-# SimpleXML like xml parser. Written by leon breet from the ruby on rails Mailing list
-class XmlNode #:nodoc:
-  attr :node
-
-  def initialize(node, options = {})
-    @node = node
-    @children = {}
-    @raise_errors = options[:raise_errors]
-  end
-
-  def self.from_xml(xml_or_io)
-    document = REXML::Document.new(xml_or_io)
-    if document.root 
-      XmlNode.new(document.root) 
-    else
-      XmlNode.new(document) 
-    end
-  end
-
-  def node_encoding
-    @node.encoding
-  end
-
-  def node_name
-    @node.name
-  end
-
-  def node_value
-    @node.text
-  end
-
-  def node_value=(value)
-    @node.text = value
-  end
-
-  def xpath(expr)
-    matches = nil
-    REXML::XPath.each(@node, expr) do |element|
-      matches ||= XmlNodeList.new
-      matches << (@children[element] ||= XmlNode.new(element))
-    end
-    matches
-  end
-
-  def method_missing(name, *args)
-    name = name.to_s
-    nodes = nil
-    @node.each_element(name) do |element|
-      nodes ||= XmlNodeList.new
-      nodes << (@children[element] ||= XmlNode.new(element))
-    end
-    nodes
-  end
-
-  def <<(node)
-    if node.is_a? REXML::Node
-      child = node
-    elsif node.respond_to? :node
-      child = node.node
-    end
-    @node.add_element child
-    @children[child] ||= XmlNode.new(child)
-  end
-
-  def [](name)
-    @node.attributes[name.to_s]
-  end
-
-  def []=(name, value)
-    @node.attributes[name.to_s] = value
-  end
-
-  def to_s
-    @node.to_s
-  end
-
-  def to_i
-    to_s.to_i
-  end
-end
-
-class XmlNodeList < Array #:nodoc:
-  def [](i)
-    i.is_a?(String) ? super(0)[i] : super(i)
-  end
-
-  def []=(i, value)
-    i.is_a?(String) ? self[0][i] = value : super(i, value)
-  end
-
-  def method_missing(name, *args)
-    name = name.to_s
-    self[0].__send__(name, *args)
-  end
-end
--- a/vendor/rails/actionpack/lib/action_controller/vendor/xml_simple.rb
+++ b/vendor/rails/actionpack/lib/action_controller/vendor/xml_simple.rb