From c79fef9c0164d719615de935c85085028363aacf Mon Sep 17 00:00:00 2001 From: Jacques Distler Date: Mon, 7 Sep 2009 16:02:36 -0500 Subject: [PATCH] Clean, rather than Complain Previously, if the user tried to submit content which was malformed utf-8, Instiki would complain loudly to him. A slightly more user-friendly approach was suggested by the latest Rails 2.3.4, and a conversation with Sam Ruby (who suggested some improvements). Now, instead of complaining, we remove the offending bytes, leaving a well-formed utf-8 string, which we pretend is what the user meant to submit. --- app/controllers/application_controller.rb | 2 +- app/controllers/file_controller.rb | 8 +-- app/controllers/wiki_controller.rb | 31 +++------- app/views/wiki/edit.rhtml | 2 +- app/views/wiki/new.rhtml | 2 +- app/views/wiki/search.rhtml | 6 +- lib/stringsupport.rb | 60 ++++++++++++------- test/functional/wiki_controller_test.rb | 59 ++++++++++++------ vendor/rails/activerecord/examples/.gitignore | 1 - 9 files changed, 96 insertions(+), 75 deletions(-) delete mode 100644 vendor/rails/activerecord/examples/.gitignore diff --git a/app/controllers/application_controller.rb b/app/controllers/application_controller.rb index d8bab69b..4f921674 100644 --- a/app/controllers/application_controller.rb +++ b/app/controllers/application_controller.rb @@ -140,7 +140,7 @@ class ApplicationController < ActionController::Base

Internal Error

An application error occurred while processing your request.

- + EOL end diff --git a/app/controllers/file_controller.rb b/app/controllers/file_controller.rb index 2e293641..8d8dae15 100644 --- a/app/controllers/file_controller.rb +++ b/app/controllers/file_controller.rb @@ -124,14 +124,10 @@ class FileController < ApplicationController zip = Zip::ZipInputStream.open(archive) while (entry = zip.get_next_entry) do ext_length = File.extname(entry.name).length - page_name = entry.name[0..-(ext_length + 1)] - page_content = entry.get_input_stream.read + page_name = entry.name[0..-(ext_length + 1)].purify + page_content = entry.get_input_stream.read.purify logger.info "Processing page '#{page_name}'" begin - if !page_content.is_utf8? - logger.info "Page '#{page_name}' contains non-utf8 character data. Skipping." - next - end existing_page = @wiki.read_page(@web.address, page_name) if existing_page if existing_page.content == page_content diff --git a/app/controllers/wiki_controller.rb b/app/controllers/wiki_controller.rb index 1490c810..61226d94 100644 --- a/app/controllers/wiki_controller.rb +++ b/app/controllers/wiki_controller.rb @@ -162,8 +162,7 @@ class WikiController < ApplicationController end def search - @query = params['query'] - render(:text => "Your query string was not valid utf-8", :layout => 'error', :status => 400) and return unless @query.is_utf8? + @query = params['query'].purify @title_results = @web.select { |page| page.name =~ /#{@query}/i }.sort @results = @web.select { |page| page.content =~ /#{@query}/i }.sort all_pages_found = (@results + @title_results).uniq @@ -180,7 +179,7 @@ class WikiController < ApplicationController end def edit - if @page.nil? or not @page_name.is_utf8? + if @page.nil? redirect_home elsif @page.locked?(Time.now) and not params['break_lock'] redirect_to :web => @web_name, :action => 'locked', :id => @page_name @@ -190,12 +189,10 @@ class WikiController < ApplicationController end def locked - render(:text => 'Page name is not valid utf-8.', :status => 400, :layout => 'error') unless @page_name.is_utf8? # to template end def new - render(:text => 'Page name is not valid utf-8.', :status => 400, :layout => 'error') unless @page_name.is_utf8? # to template end @@ -258,32 +255,22 @@ class WikiController < ApplicationController end def save - render(:status => 404, :text => 'Undefined page name', :layout => 'error') and return if @page_name.nil? or not @page_name.is_utf8? + render(:status => 404, :text => 'Undefined page name', :layout => 'error') and return if @page_name.nil? unless (request.post? || ENV["RAILS_ENV"] == "test") headers['Allow'] = 'POST' render(:status => 405, :text => 'You must use an HTTP POST', :layout => 'error') return end - author_name = params['author'] + author_name = params['author'].purify author_name = 'AnonymousCoward' if author_name =~ /^\s*$/ begin - raise Instiki::ValidationError.new('Your name was not valid utf-8') unless author_name.is_utf8? raise Instiki::ValidationError.new('Your name cannot contain a "."') if author_name.include? '.' cookies['author'] = { :value => author_name, :expires => Time.utc(2030) } - the_content = params['content'] + the_content = params['content'].purify filter_spam(the_content) - unless the_content.is_utf8? - if @page - the_content = @page.content - else - the_content = '' - end - raise Instiki::ValidationError.new('Your content was not valid utf-8.') - end if @page - new_name = params['new_name'] || @page_name - raise Instiki::ValidationError.new('Your new title was not valid utf-8.') unless new_name.is_utf8? + new_name = params['new_name'] ? params['new_name'].purify : @page_name raise Instiki::ValidationError.new('Your new title cannot contain a "."') if new_name.include? '.' raise Instiki::ValidationError.new('A page named "' + new_name.escapeHTML + '" already exists.') if @page_name != new_name && @web.has_page?(new_name) wiki.revise_page(@web_name, @page_name, new_name, the_content, Time.now, @@ -325,7 +312,7 @@ class WikiController < ApplicationController end end else - if not @page_name.nil? and @page_name.is_utf8? and not @page_name.empty? + if not @page_name.nil? and not @page_name.empty? real_page = WikiReference.page_that_redirects_for(@web, @page_name) if real_page flash[:info] = "Redirected from \"#{@page_name}\"." @@ -354,7 +341,7 @@ class WikiController < ApplicationController end render :action => 'history' else - if not @page_name.nil? and @page_name.is_utf8? and not @page_name.empty? + if not @page_name.nil? and not @page_name.empty? redirect_to :web => @web_name, :action => 'new', :id => @page_name else render :text => 'Page name is not specified', :status => 404, :layout => 'error' @@ -397,7 +384,7 @@ class WikiController < ApplicationController end def load_page - @page_name = params['id'] + @page_name = params['id'] ? params['id'].purify : nil @page = @wiki.read_page(@web_name, @page_name) if @page_name end diff --git a/app/views/wiki/edit.rhtml b/app/views/wiki/edit.rhtml index c981e0db..f1934a4b 100644 --- a/app/views/wiki/edit.rhtml +++ b/app/views/wiki/edit.rhtml @@ -14,7 +14,7 @@ 'accept-charset' => 'utf-8' }) do %>
+ (params['content'] ? params['content'] : @page.content).purify) %> <% if @page_name != 'HomePage' -%>

<%= check_box_tag :alter_title, value = "1", checked=false, diff --git a/app/views/wiki/new.rhtml b/app/views/wiki/new.rhtml index ebc74414..f45c9d0b 100644 --- a/app/views/wiki/new.rhtml +++ b/app/views/wiki/new.rhtml @@ -14,7 +14,7 @@ { 'id' => 'editForm', 'method' => 'post', 'onsubmit' => 'cleanAuthorName();', 'accept-charset' => 'utf-8' }) do %> + params['content'] ? params['content'].purify : '' ) %>

as <%= text_field_tag :author, @author, diff --git a/app/views/wiki/search.rhtml b/app/views/wiki/search.rhtml index f962f2d9..e467cfaa 100644 --- a/app/views/wiki/search.rhtml +++ b/app/views/wiki/search.rhtml @@ -1,4 +1,4 @@ -<%- @title = "Search results for \"#{h params["query"]}\"" -%> +<%- @title = "Search results for \"#{h @query}\"" -%> <%- unless @title_results.empty? -%>

<%= @title_results.length %> page(s) containing search string in the page name:

@@ -24,7 +24,7 @@ <%- end -%> <%- if (@results + @title_results).empty? -%> -

No pages contain "<%= h params["query"] %>"

+

No pages contain "<%= h @query %>"

Perhaps you should try expanding your query. Remember that Instiki searches for entire phrases, so if you search for "all that jazz" it will not match pages that contain these @@ -36,6 +36,6 @@ "[a-z]*Leet?RegExpSkill(s|z)"

- Create a new page, named: "<%= link_to h(params[:query]), :web => @web.address, :action => 'new', :id => params[:query] %>" + Create a new page, named: "<%= link_to h(@query), :web => @web.address, :action => 'new', :id => @query %>"

<%- end -%> diff --git a/lib/stringsupport.rb b/lib/stringsupport.rb index 0c490f47..58f74592 100644 --- a/lib/stringsupport.rb +++ b/lib/stringsupport.rb @@ -2,27 +2,25 @@ class String -# Check whether a string is valid utf-8 +# Take a string, and remove any invalid substrings, returning a valid utf-8 string. # # :call-seq: -# string.is_utf8? -> boolean +# string.purify -> new_string # -# returns true if the sequence of bytes in string is valid utf-8 +# returns a valid utf-8 string, purged of any subsequences of illegal bytes. #-- - def is_utf8? - #expand NCRs to utf-8 - text = self.gsub(/&#[xX]([a-fA-F0-9]+);/) { |m| [$1.hex].pack('U*') } - text.gsub!(/&#(\d+);/) { |m| [$1.to_i].pack('U*') } - - # You might think this is faster, but it isn't - #pieces = self.split(/&#[xX]([a-fA-F0-9]+);/) - #1.step(pieces.length-1, 2) {|i| pieces[i] = [pieces[i].hex].pack('U*')} - #pieces = pieces.join.split(/&#(\d+);/) - #1.step(pieces.length-1, 2) {|i| pieces[i] = [pieces[i].to_i].pack('U*')} - #text = pieces.join - - #ensure the resulting string of bytes is valid utf-8 - text =~ /\A( + def purify + text = expand_ncrs + text.split(//u).grep(UTF8_REGEX).join + end + + def expand_ncrs + text = gsub(/&#[xX]([a-fA-F0-9]+);/) { |m| [$1.hex].pack('U*') } + text.gsub!(/&#(\d+);/) { |m| [$1.to_i].pack('U*') } + text + end + + UTF8_REGEX = /\A( [\x09\x0A\x0D\x20-\x7E] # ASCII | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs @@ -33,13 +31,31 @@ class String | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15 | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 - )*\Z/nx; - end + )+\Z/nx; #++ + +# Check whether a string is valid utf-8 +# +# :call-seq: +# string.is_utf8? -> boolean +# +# returns true if the sequence of bytes in string is valid utf-8 +#-- + def is_utf8? + #expand NCRs to utf-8 + text = self.expand_ncrs + + # You might think this is faster, but it isn't + #pieces = self.split(/&#[xX]([a-fA-F0-9]+);/) + #1.step(pieces.length-1, 2) {|i| pieces[i] = [pieces[i].hex].pack('U*')} + #pieces = pieces.join.split(/&#(\d+);/) + #1.step(pieces.length-1, 2) {|i| pieces[i] = [pieces[i].to_i].pack('U*')} + #text = pieces.join + + #ensure the resulting string of bytes is valid utf-8 + text =~ UTF8_REGEX + end - def purify - delete("\x01-\x08\x0B\x0C\x0E-\x1F", "\ufffe\uffff") - end #:stopdoc: MATHML_ENTITIES = { 'Alpha' => 'Α', diff --git a/test/functional/wiki_controller_test.rb b/test/functional/wiki_controller_test.rb index b546c43b..0d794e9e 100755 --- a/test/functional/wiki_controller_test.rb +++ b/test/functional/wiki_controller_test.rb @@ -607,30 +607,39 @@ class WikiControllerTest < ActionController::TestCase end def test_save_not_utf8 - r = process 'save', 'web' => 'wiki1', 'id' => 'NewPage', 'content' => "Contents of a new page\r\n\000", + r = process 'save', 'web' => 'wiki1', 'id' => 'NewPage', 'content' => "Cont\000ents of a new page\r\n\000", 'author' => 'AuthorOfNewPage' - assert_redirected_to :web => 'wiki1', :controller => 'wiki', :action => 'new', :id => 'NewPage', :content => '' + assert_redirected_to :web => 'wiki1', :controller => 'wiki', :action => 'show', :id => 'NewPage' assert_equal 'AuthorOfNewPage', r.cookies['author'] assert_match @eternity, r.headers["Set-Cookie"][0] + new_page = @wiki.read_page('wiki1', 'NewPage') + assert_equal "Contents of a new page\r\n", new_page.content + assert_equal 'AuthorOfNewPage', new_page.author end def test_save_not_utf8_ncr r = process 'save', 'web' => 'wiki1', 'id' => 'NewPage', 'content' => "Contents of a new page\r\n￾", 'author' => 'AuthorOfNewPage' - assert_redirected_to :web => 'wiki1', :controller => 'wiki', :action => 'new', :id => 'NewPage', :content => '' + assert_redirected_to :web => 'wiki1', :controller => 'wiki', :action => 'show', :id => 'NewPage' assert_equal 'AuthorOfNewPage', r.cookies['author'] assert_match @eternity, r.headers["Set-Cookie"][0] + new_page = @wiki.read_page('wiki1', 'NewPage') + assert_equal "Contents of a new page\r\n", new_page.content + assert_equal 'AuthorOfNewPage', new_page.author end def test_save_not_utf8_dec_ncr r = process 'save', 'web' => 'wiki1', 'id' => 'NewPage', 'content' => "Contents of a new page\r\n￿", 'author' => 'AuthorOfNewPage' - assert_redirected_to :web => 'wiki1', :controller => 'wiki', :action => 'new', :id => 'NewPage', :content => '' + assert_redirected_to :web => 'wiki1', :controller => 'wiki', :action => 'show', :id => 'NewPage' assert_equal 'AuthorOfNewPage', r.cookies['author'] assert_match @eternity, r.headers["Set-Cookie"][0] + new_page = @wiki.read_page('wiki1', 'NewPage') + assert_equal "Contents of a new page\r\n", new_page.content + assert_equal 'AuthorOfNewPage', new_page.author end def test_save_new_revision_of_existing_page @@ -653,17 +662,15 @@ class WikiControllerTest < ActionController::TestCase @home.lock(Time.now, 'Batman') current_revisions = @home.revisions.size - r = process 'save', 'web' => 'wiki1', 'id' => 'HomePage', 'content' => "Revised HomePage\000", + r = process 'save', 'web' => 'wiki1', 'id' => 'HomePage', 'content' => "Newly rev\000ised HomePage", 'author' => 'Batman' - assert_redirected_to :web => 'wiki1', :controller => 'wiki', :action => 'edit', :id => 'HomePage', - :content => 'HisWay would be MyWay $\sin(x)\begin{svg}\end{svg}\includegraphics[width' + - '=3em]{foo}$ in kinda ThatWay in HisWay though MyWay \OverThere -- see SmartEng' + - 'ine in that SmartEngineGUI' + assert_redirected_to :web => 'wiki1', :controller => 'wiki', :action => 'show', :id => 'HomePage' assert_equal 'Batman', r.cookies['author'] home_page = @wiki.read_page('wiki1', 'HomePage') - assert_equal current_revisions, home_page.revisions.size - assert_equal 'DavidHeinemeierHansson', home_page.author + assert_equal current_revisions+1, home_page.revisions.size + assert_equal 'Newly revised HomePage', home_page.content + assert_equal 'Batman', home_page.author assert !home_page.locked?(Time.now) end @@ -750,11 +757,20 @@ class WikiControllerTest < ActionController::TestCase assert_redirected_to :action => 'new', :controller => 'wiki', :web => 'wiki1', :id => 'NewPage' assert r.flash[:error].to_s == 'Your name cannot contain a "."' + r = process 'save', 'web' => 'wiki1', 'id' => 'NewPage', 'content' => 'Contents of a new page', + 'author' => "Fu\000Manchu" + + assert_redirected_to :action => 'show', :controller => 'wiki', :web => 'wiki1', :id => 'NewPage' + new_page = @wiki.read_page('wiki1', 'NewPage') + assert_equal 'FuManchu', new_page.author + r = process 'save', 'web' => 'wiki1', 'id' => 'AnotherPage', 'content' => 'Contents of a new page', 'author' => "\000" - assert_redirected_to :action => 'new', :controller => 'wiki', :web => 'wiki1', :id => 'AnotherPage' - assert r.flash[:error].to_s == "Your name was not valid utf-8" + assert_redirected_to :action => 'show', :controller => 'wiki', :web => 'wiki1', :id => 'AnotherPage' + new_page = @wiki.read_page('wiki1', 'AnotherPage') + assert_equal 'AnonymousCoward', new_page.author + end def test_search @@ -794,17 +810,24 @@ class WikiControllerTest < ActionController::TestCase end def test_search_null_in_query - r = process 'search', 'web' => 'wiki1', 'query' => "\x00" + r = process 'search', 'web' => 'wiki1', 'query' => "non-existant\x00" - assert_response(400) - assert_match /Your query string was not valid utf-8/, r.body + assert_response(:success) + assert_match /No pages contain \"non-existant\"/, r.body end def test_search_FFFF_in_query r = process 'search', 'web' => 'wiki1', 'query' => "\xEF\xBF\xBF" - assert_response(400) - assert_match /Your query string was not valid utf-8/, r.body + assert_response(:success) + assert_match /9 page\(s\) containing search string in the page name:/, r.body + + r = process 'search', 'web' => 'wiki1', 'query' => "Al\357\277\277l about" + + assert_response(:success) + assert_equal 'All about', r.template_objects['query'] + assert_equal [@elephant, @oak], r.template_objects['results'] + assert_equal [], r.template_objects['title_results'] end def test_search_FFFD_in_query diff --git a/vendor/rails/activerecord/examples/.gitignore b/vendor/rails/activerecord/examples/.gitignore deleted file mode 100644 index 0dfc1cb7..00000000 --- a/vendor/rails/activerecord/examples/.gitignore +++ /dev/null @@ -1 +0,0 @@ -performance.sql