Ensure the_content REALLY is utf-8

Our check that the the_content was valid utf-8 was rather busted.
This one works right. In particular, we needed to expand NCRs before checking.
master
Jacques Distler 2008-01-03 15:27:03 -06:00
parent c89aeb6665
commit ebc409e1a0
3 changed files with 55 additions and 11 deletions

View File

@ -1,5 +1,4 @@
require 'fileutils'
#require 'redcloth_for_tex'
require 'maruku'
require 'parsedate'
require 'zip/zip'
@ -147,7 +146,7 @@ class WikiController < ApplicationController
def search
@query = params['query']
render(:text => "Your query string was not valid utf-8", :layout => 'error', :status => 400) and return if !@query.is_utf8?
render(:text => "Your query string was not valid utf-8", :layout => 'error', :status => 400) and return unless @query.is_utf8?
@title_results = @web.select { |page| page.name =~ /#{@query}/i }.sort
@results = @web.select { |page| page.content =~ /#{@query}/i }.sort
all_pages_found = (@results + @title_results).uniq
@ -235,20 +234,32 @@ class WikiController < ApplicationController
end
author_name = params['author']
author_name = 'AnonymousCoward' if author_name =~ /^\s*$/
render(:text => "Your name was not valid utf-8", :layout => 'error', :status => 400) and return if !author_name.is_utf8?
render(:text => "Your name was not valid utf-8", :layout => 'error', :status => 400) and return unless author_name.is_utf8?
cookies['author'] = { :value => author_name, :expires => Time.utc(2030) }
begin
the_content = params['content']
render(:text => "Your content was not valid utf-8", :layout => 'error', :status => 400) and return if !the_content.is_utf8?
filter_spam(the_content)
if @page
wiki.revise_page(@web_name, @page_name, the_content, Time.now,
Author.new(author_name, remote_ip), PageRenderer.new)
@page.unlock
if the_content.is_utf8?
wiki.revise_page(@web_name, @page_name, the_content, Time.now,
Author.new(author_name, remote_ip), PageRenderer.new)
@page.unlock
else
flash[:error] = 'Your content was not valid utf-8.'
@page.unlock
redirect_to :back
return
end
else
wiki.write_page(@web_name, @page_name, the_content, Time.now,
Author.new(author_name, remote_ip), PageRenderer.new)
if the_content.is_utf8?
wiki.write_page(@web_name, @page_name, the_content, Time.now,
Author.new(author_name, remote_ip), PageRenderer.new)
else
flash[:error] = 'Your content was not valid utf-8.'
redirect_to :back
return
end
end
redirect_to_page @page_name
rescue => e

View File

@ -128,8 +128,13 @@ class String
# string.is_utf8? -> boolean
#
# returns true if the sequence of bytes in string is valid utf-8
#--
def is_utf8?
self =~ /^(
#expand NCRs to utf-8
text = self.gsub(/&#x([a-fA-F0-9]+);/) {|m| [$1.hex].pack('U*') }
text.gsub!(/&#(\d+);/) {|m| [$1.to_i].pack('U*') }
#ensure the resulting string of bytes is valid utf-8
text =~ /\A(
[\x09\x0A\x0D\x20-\x7E] # ASCII
| [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
@ -140,8 +145,9 @@ class String
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
)*$/x;
)*\Z/x;
end
#++
#:stopdoc:
MATHML_ENTITIES = {

View File

@ -552,6 +552,33 @@ class WikiControllerTest < Test::Unit::TestCase
assert_equal 'AuthorOfNewPage', new_page.author
end
def test_save_not_utf8
r = process 'save', 'web' => 'wiki1', 'id' => 'NewPage', 'content' => "Contents of a new page\r\n\000",
'author' => 'AuthorOfNewPage'
assert_redirected_to :web => 'wiki1', :action => 'new', :id => 'NewPage'
assert_equal ['AuthorOfNewPage'], r.cookies['author'].value
assert_equal Time.utc(2030), r.cookies['author'].expires
end
def test_save_not_utf8_ncr
r = process 'save', 'web' => 'wiki1', 'id' => 'NewPage', 'content' => "Contents of a new page\r\n&#xfffe;",
'author' => 'AuthorOfNewPage'
assert_redirected_to :web => 'wiki1', :action => 'new', :id => 'NewPage'
assert_equal ['AuthorOfNewPage'], r.cookies['author'].value
assert_equal Time.utc(2030), r.cookies['author'].expires
end
def test_save_not_utf8_dec_ncr
r = process 'save', 'web' => 'wiki1', 'id' => 'NewPage', 'content' => "Contents of a new page\r\n&#65535;",
'author' => 'AuthorOfNewPage'
assert_redirected_to :web => 'wiki1', :action => 'new', :id => 'NewPage'
assert_equal ['AuthorOfNewPage'], r.cookies['author'].value
assert_equal Time.utc(2030), r.cookies['author'].expires
end
def test_save_new_revision_of_existing_page
@home.lock(Time.now, 'Batman')
current_revisions = @home.revisions.size