Merge branch 'bzr/golem' of /Users/distler/Sites/code/instiki

2009-12-05 10:53:19 -06:00 · 2009-12-05 10:53:19 -06:00 · 36bf257d72
commit 36bf257d72
parent 0bc05f11fd 171c12d2c1
1 changed files with 23 additions and 10 deletions
--- a/lib/stringsupport.rb
+++ b/lib/stringsupport.rb
@ -6,20 +6,30 @@ class String
 #
 # Under 1.8, this is a NOOP. Under 1.9, it sets the encoding to "ASCII-8BIT"
 #--
 if "".respond_to?(:force_encoding)
  def as_bytes
    force_encoding("ASCII-8BIT")
  end
 else
  def as_bytes
    force_encoding("ASCII-8BIT") if self.respond_to?(:force_encoding)
    self
  end
 end
 #++
 # A method to allow string-oriented operations in both Ruby 1.8 and Ruby 1.9
 #
 # Under 1.8, this is a NOOP. Under 1.9, it sets the encoding to "UTF-8"
 #--
 if "".respond_to?(:force_encoding)
  def as_utf8
    force_encoding("UTF-8")
  end
 else
  def as_utf8
    force_encoding("UTF-8") if self.respond_to?(:force_encoding)
    self
  end
 end
 #++
 # Take a string, and remove any invalid substrings, returning a valid utf-8 string.
@ -29,14 +39,17 @@ class String
 #
 # returns a valid utf-8 string, purged of any subsequences of illegal bytes.
 #--
 if "".respond_to?(:force_encoding)
  def purify
-     text = self.dup.check_ncrs.as_utf8
+    text = check_ncrs.as_utf8
     if text.respond_to?(:force_encoding)
    text.chars.collect{|c| c.as_bytes}.grep(UTF8_REGEX).join.as_utf8
-     else
+  end
 else
  def purify
    text = check_ncrs
    text.split(//u).grep(UTF8_REGEX).join
  end
-   end
+end
  def check_ncrs
    text = gsub(/&#[xX]([a-fA-F0-9]+);/) { |m| [$1.hex].pack('U*').as_bytes =~ UTF8_REGEX ? m : '' }