Clean, rather than Complain

Previously, if the user tried to submit content which was
malformed utf-8, Instiki would complain loudly to him.

A slightly more user-friendly approach was suggested by
the latest Rails 2.3.4, and a conversation with Sam Ruby
(who suggested some improvements).

Now, instead of complaining, we remove the offending bytes,
leaving a well-formed utf-8 string, which we pretend is what
the user meant to submit.
This commit is contained in:
Jacques Distler 2009-09-07 16:02:36 -05:00
parent f029aae60e
commit c79fef9c01
9 changed files with 96 additions and 75 deletions

View file

@ -140,7 +140,7 @@ class ApplicationController < ActionController::Base
<html xmlns="http://www.w3.org/1999/xhtml"><body> <html xmlns="http://www.w3.org/1999/xhtml"><body>
<h2>Internal Error</h2> <h2>Internal Error</h2>
<p>An application error occurred while processing your request.</p> <p>An application error occurred while processing your request.</p>
<!-- \n#{exception.to_s.is_utf8? ? exception.to_s.gsub!(/-{2,}/, '- -') : ''}\n#{exception.backtrace.join("\n")}\n --> <!-- \n#{exception.to_s.purify.gsub!(/-{2,}/, '- -') }\n#{exception.backtrace.join("\n")}\n -->
</body></html> </body></html>
EOL EOL
end end

View file

@ -124,14 +124,10 @@ class FileController < ApplicationController
zip = Zip::ZipInputStream.open(archive) zip = Zip::ZipInputStream.open(archive)
while (entry = zip.get_next_entry) do while (entry = zip.get_next_entry) do
ext_length = File.extname(entry.name).length ext_length = File.extname(entry.name).length
page_name = entry.name[0..-(ext_length + 1)] page_name = entry.name[0..-(ext_length + 1)].purify
page_content = entry.get_input_stream.read page_content = entry.get_input_stream.read.purify
logger.info "Processing page '#{page_name}'" logger.info "Processing page '#{page_name}'"
begin begin
if !page_content.is_utf8?
logger.info "Page '#{page_name}' contains non-utf8 character data. Skipping."
next
end
existing_page = @wiki.read_page(@web.address, page_name) existing_page = @wiki.read_page(@web.address, page_name)
if existing_page if existing_page
if existing_page.content == page_content if existing_page.content == page_content

View file

@ -162,8 +162,7 @@ class WikiController < ApplicationController
end end
def search def search
@query = params['query'] @query = params['query'].purify
render(:text => "Your query string was not valid utf-8", :layout => 'error', :status => 400) and return unless @query.is_utf8?
@title_results = @web.select { |page| page.name =~ /#{@query}/i }.sort @title_results = @web.select { |page| page.name =~ /#{@query}/i }.sort
@results = @web.select { |page| page.content =~ /#{@query}/i }.sort @results = @web.select { |page| page.content =~ /#{@query}/i }.sort
all_pages_found = (@results + @title_results).uniq all_pages_found = (@results + @title_results).uniq
@ -180,7 +179,7 @@ class WikiController < ApplicationController
end end
def edit def edit
if @page.nil? or not @page_name.is_utf8? if @page.nil?
redirect_home redirect_home
elsif @page.locked?(Time.now) and not params['break_lock'] elsif @page.locked?(Time.now) and not params['break_lock']
redirect_to :web => @web_name, :action => 'locked', :id => @page_name redirect_to :web => @web_name, :action => 'locked', :id => @page_name
@ -190,12 +189,10 @@ class WikiController < ApplicationController
end end
def locked def locked
render(:text => 'Page name is not valid utf-8.', :status => 400, :layout => 'error') unless @page_name.is_utf8?
# to template # to template
end end
def new def new
render(:text => 'Page name is not valid utf-8.', :status => 400, :layout => 'error') unless @page_name.is_utf8?
# to template # to template
end end
@ -258,32 +255,22 @@ class WikiController < ApplicationController
end end
def save def save
render(:status => 404, :text => 'Undefined page name', :layout => 'error') and return if @page_name.nil? or not @page_name.is_utf8? render(:status => 404, :text => 'Undefined page name', :layout => 'error') and return if @page_name.nil?
unless (request.post? || ENV["RAILS_ENV"] == "test") unless (request.post? || ENV["RAILS_ENV"] == "test")
headers['Allow'] = 'POST' headers['Allow'] = 'POST'
render(:status => 405, :text => 'You must use an HTTP POST', :layout => 'error') render(:status => 405, :text => 'You must use an HTTP POST', :layout => 'error')
return return
end end
author_name = params['author'] author_name = params['author'].purify
author_name = 'AnonymousCoward' if author_name =~ /^\s*$/ author_name = 'AnonymousCoward' if author_name =~ /^\s*$/
begin begin
raise Instiki::ValidationError.new('Your name was not valid utf-8') unless author_name.is_utf8?
raise Instiki::ValidationError.new('Your name cannot contain a "."') if author_name.include? '.' raise Instiki::ValidationError.new('Your name cannot contain a "."') if author_name.include? '.'
cookies['author'] = { :value => author_name, :expires => Time.utc(2030) } cookies['author'] = { :value => author_name, :expires => Time.utc(2030) }
the_content = params['content'] the_content = params['content'].purify
filter_spam(the_content) filter_spam(the_content)
unless the_content.is_utf8?
if @page
the_content = @page.content
else
the_content = ''
end
raise Instiki::ValidationError.new('Your content was not valid utf-8.')
end
if @page if @page
new_name = params['new_name'] || @page_name new_name = params['new_name'] ? params['new_name'].purify : @page_name
raise Instiki::ValidationError.new('Your new title was not valid utf-8.') unless new_name.is_utf8?
raise Instiki::ValidationError.new('Your new title cannot contain a "."') if new_name.include? '.' raise Instiki::ValidationError.new('Your new title cannot contain a "."') if new_name.include? '.'
raise Instiki::ValidationError.new('A page named "' + new_name.escapeHTML + '" already exists.') if @page_name != new_name && @web.has_page?(new_name) raise Instiki::ValidationError.new('A page named "' + new_name.escapeHTML + '" already exists.') if @page_name != new_name && @web.has_page?(new_name)
wiki.revise_page(@web_name, @page_name, new_name, the_content, Time.now, wiki.revise_page(@web_name, @page_name, new_name, the_content, Time.now,
@ -325,7 +312,7 @@ class WikiController < ApplicationController
end end
end end
else else
if not @page_name.nil? and @page_name.is_utf8? and not @page_name.empty? if not @page_name.nil? and not @page_name.empty?
real_page = WikiReference.page_that_redirects_for(@web, @page_name) real_page = WikiReference.page_that_redirects_for(@web, @page_name)
if real_page if real_page
flash[:info] = "Redirected from \"#{@page_name}\"." flash[:info] = "Redirected from \"#{@page_name}\"."
@ -354,7 +341,7 @@ class WikiController < ApplicationController
end end
render :action => 'history' render :action => 'history'
else else
if not @page_name.nil? and @page_name.is_utf8? and not @page_name.empty? if not @page_name.nil? and not @page_name.empty?
redirect_to :web => @web_name, :action => 'new', :id => @page_name redirect_to :web => @web_name, :action => 'new', :id => @page_name
else else
render :text => 'Page name is not specified', :status => 404, :layout => 'error' render :text => 'Page name is not specified', :status => 404, :layout => 'error'
@ -397,7 +384,7 @@ class WikiController < ApplicationController
end end
def load_page def load_page
@page_name = params['id'] @page_name = params['id'] ? params['id'].purify : nil
@page = @wiki.read_page(@web_name, @page_name) if @page_name @page = @wiki.read_page(@web_name, @page_name) if @page_name
end end

View file

@ -14,7 +14,7 @@
'accept-charset' => 'utf-8' }) do %> 'accept-charset' => 'utf-8' }) do %>
<div> <div>
<textarea name="content" id="content" rows="24" cols="60"><%= h(flash[:content] || <textarea name="content" id="content" rows="24" cols="60"><%= h(flash[:content] ||
((params['content'] && params['content'].is_utf8?) ? params['content'] : @page.content).purify) %></textarea> (params['content'] ? params['content'] : @page.content).purify) %></textarea>
<% if @page_name != 'HomePage' -%> <% if @page_name != 'HomePage' -%>
<p> <p>
<%= check_box_tag :alter_title, value = "1", checked=false, <%= check_box_tag :alter_title, value = "1", checked=false,

View file

@ -14,7 +14,7 @@
{ 'id' => 'editForm', 'method' => 'post', 'onsubmit' => 'cleanAuthorName();', 'accept-charset' => 'utf-8' }) do %> { 'id' => 'editForm', 'method' => 'post', 'onsubmit' => 'cleanAuthorName();', 'accept-charset' => 'utf-8' }) do %>
<textarea name="content" id="content" rows="24" cols="60"><%= h(flash[:content] || <textarea name="content" id="content" rows="24" cols="60"><%= h(flash[:content] ||
( (params['content'] && params['content'].is_utf8?) ? params['content'] : '').purify ) %></textarea> params['content'] ? params['content'].purify : '' ) %></textarea>
<div id="editFormButtons"> <div id="editFormButtons">
<input type="submit" value="Submit" accesskey="s"/> as <input type="submit" value="Submit" accesskey="s"/> as
<%= text_field_tag :author, @author, <%= text_field_tag :author, @author,

View file

@ -1,4 +1,4 @@
<%- @title = "Search results for \"#{h params["query"]}\"" -%> <%- @title = "Search results for \"#{h @query}\"" -%>
<%- unless @title_results.empty? -%> <%- unless @title_results.empty? -%>
<h2><%= @title_results.length %> page(s) containing search string in the page name:</h2> <h2><%= @title_results.length %> page(s) containing search string in the page name:</h2>
@ -24,7 +24,7 @@
<%- end -%> <%- end -%>
<%- if (@results + @title_results).empty? -%> <%- if (@results + @title_results).empty? -%>
<h2>No pages contain "<%= h params["query"] %>" </h2> <h2>No pages contain "<%= h @query %>" </h2>
<p> <p>
Perhaps you should try expanding your query. Remember that Instiki searches for entire Perhaps you should try expanding your query. Remember that Instiki searches for entire
phrases, so if you search for "all that jazz" it will not match pages that contain these phrases, so if you search for "all that jazz" it will not match pages that contain these
@ -36,6 +36,6 @@
"[a-z]*Leet?RegExpSkill(s|z)" "[a-z]*Leet?RegExpSkill(s|z)"
</p> </p>
<p> <p>
<b>Create a new page, named:</b> "<span class='newWikiWord'><%= link_to h(params[:query]), :web => @web.address, :action => 'new', :id => params[:query] %></span>" <b>Create a new page, named:</b> "<span class='newWikiWord'><%= link_to h(@query), :web => @web.address, :action => 'new', :id => @query %></span>"
</p> </p>
<%- end -%> <%- end -%>

View file

@ -2,27 +2,25 @@
class String class String
# Check whether a string is valid utf-8 # Take a string, and remove any invalid substrings, returning a valid utf-8 string.
# #
# :call-seq: # :call-seq:
# string.is_utf8? -> boolean # string.purify -> new_string
# #
# returns true if the sequence of bytes in string is valid utf-8 # returns a valid utf-8 string, purged of any subsequences of illegal bytes.
#-- #--
def is_utf8? def purify
#expand NCRs to utf-8 text = expand_ncrs
text = self.gsub(/&#[xX]([a-fA-F0-9]+);/) { |m| [$1.hex].pack('U*') } text.split(//u).grep(UTF8_REGEX).join
text.gsub!(/&#(\d+);/) { |m| [$1.to_i].pack('U*') } end
# You might think this is faster, but it isn't def expand_ncrs
#pieces = self.split(/&#[xX]([a-fA-F0-9]+);/) text = gsub(/&#[xX]([a-fA-F0-9]+);/) { |m| [$1.hex].pack('U*') }
#1.step(pieces.length-1, 2) {|i| pieces[i] = [pieces[i].hex].pack('U*')} text.gsub!(/&#(\d+);/) { |m| [$1.to_i].pack('U*') }
#pieces = pieces.join.split(/&#(\d+);/) text
#1.step(pieces.length-1, 2) {|i| pieces[i] = [pieces[i].to_i].pack('U*')} end
#text = pieces.join
#ensure the resulting string of bytes is valid utf-8 UTF8_REGEX = /\A(
text =~ /\A(
[\x09\x0A\x0D\x20-\x7E] # ASCII [\x09\x0A\x0D\x20-\x7E] # ASCII
| [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
@ -33,13 +31,31 @@ class String
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15 | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
)*\Z/nx; )+\Z/nx;
end
#++ #++
def purify # Check whether a string is valid utf-8
delete("\x01-\x08\x0B\x0C\x0E-\x1F", "\ufffe\uffff") #
end # :call-seq:
# string.is_utf8? -> boolean
#
# returns true if the sequence of bytes in string is valid utf-8
#--
def is_utf8?
#expand NCRs to utf-8
text = self.expand_ncrs
# You might think this is faster, but it isn't
#pieces = self.split(/&#[xX]([a-fA-F0-9]+);/)
#1.step(pieces.length-1, 2) {|i| pieces[i] = [pieces[i].hex].pack('U*')}
#pieces = pieces.join.split(/&#(\d+);/)
#1.step(pieces.length-1, 2) {|i| pieces[i] = [pieces[i].to_i].pack('U*')}
#text = pieces.join
#ensure the resulting string of bytes is valid utf-8
text =~ UTF8_REGEX
end
#:stopdoc: #:stopdoc:
MATHML_ENTITIES = { MATHML_ENTITIES = {
'Alpha' => '&#x0391;', 'Alpha' => '&#x0391;',

View file

@ -607,30 +607,39 @@ class WikiControllerTest < ActionController::TestCase
end end
def test_save_not_utf8 def test_save_not_utf8
r = process 'save', 'web' => 'wiki1', 'id' => 'NewPage', 'content' => "Contents of a new page\r\n\000", r = process 'save', 'web' => 'wiki1', 'id' => 'NewPage', 'content' => "Cont\000ents of a new page\r\n\000",
'author' => 'AuthorOfNewPage' 'author' => 'AuthorOfNewPage'
assert_redirected_to :web => 'wiki1', :controller => 'wiki', :action => 'new', :id => 'NewPage', :content => '' assert_redirected_to :web => 'wiki1', :controller => 'wiki', :action => 'show', :id => 'NewPage'
assert_equal 'AuthorOfNewPage', r.cookies['author'] assert_equal 'AuthorOfNewPage', r.cookies['author']
assert_match @eternity, r.headers["Set-Cookie"][0] assert_match @eternity, r.headers["Set-Cookie"][0]
new_page = @wiki.read_page('wiki1', 'NewPage')
assert_equal "Contents of a new page\r\n", new_page.content
assert_equal 'AuthorOfNewPage', new_page.author
end end
def test_save_not_utf8_ncr def test_save_not_utf8_ncr
r = process 'save', 'web' => 'wiki1', 'id' => 'NewPage', 'content' => "Contents of a new page\r\n&#xfffe;", r = process 'save', 'web' => 'wiki1', 'id' => 'NewPage', 'content' => "Contents of a new page\r\n&#xfffe;",
'author' => 'AuthorOfNewPage' 'author' => 'AuthorOfNewPage'
assert_redirected_to :web => 'wiki1', :controller => 'wiki', :action => 'new', :id => 'NewPage', :content => '' assert_redirected_to :web => 'wiki1', :controller => 'wiki', :action => 'show', :id => 'NewPage'
assert_equal 'AuthorOfNewPage', r.cookies['author'] assert_equal 'AuthorOfNewPage', r.cookies['author']
assert_match @eternity, r.headers["Set-Cookie"][0] assert_match @eternity, r.headers["Set-Cookie"][0]
new_page = @wiki.read_page('wiki1', 'NewPage')
assert_equal "Contents of a new page\r\n", new_page.content
assert_equal 'AuthorOfNewPage', new_page.author
end end
def test_save_not_utf8_dec_ncr def test_save_not_utf8_dec_ncr
r = process 'save', 'web' => 'wiki1', 'id' => 'NewPage', 'content' => "Contents of a new page\r\n&#65535;", r = process 'save', 'web' => 'wiki1', 'id' => 'NewPage', 'content' => "Contents of a new page\r\n&#65535;",
'author' => 'AuthorOfNewPage' 'author' => 'AuthorOfNewPage'
assert_redirected_to :web => 'wiki1', :controller => 'wiki', :action => 'new', :id => 'NewPage', :content => '' assert_redirected_to :web => 'wiki1', :controller => 'wiki', :action => 'show', :id => 'NewPage'
assert_equal 'AuthorOfNewPage', r.cookies['author'] assert_equal 'AuthorOfNewPage', r.cookies['author']
assert_match @eternity, r.headers["Set-Cookie"][0] assert_match @eternity, r.headers["Set-Cookie"][0]
new_page = @wiki.read_page('wiki1', 'NewPage')
assert_equal "Contents of a new page\r\n", new_page.content
assert_equal 'AuthorOfNewPage', new_page.author
end end
def test_save_new_revision_of_existing_page def test_save_new_revision_of_existing_page
@ -653,17 +662,15 @@ class WikiControllerTest < ActionController::TestCase
@home.lock(Time.now, 'Batman') @home.lock(Time.now, 'Batman')
current_revisions = @home.revisions.size current_revisions = @home.revisions.size
r = process 'save', 'web' => 'wiki1', 'id' => 'HomePage', 'content' => "Revised HomePage\000", r = process 'save', 'web' => 'wiki1', 'id' => 'HomePage', 'content' => "Newly rev\000ised HomePage",
'author' => 'Batman' 'author' => 'Batman'
assert_redirected_to :web => 'wiki1', :controller => 'wiki', :action => 'edit', :id => 'HomePage', assert_redirected_to :web => 'wiki1', :controller => 'wiki', :action => 'show', :id => 'HomePage'
:content => 'HisWay would be MyWay $\sin(x)\begin{svg}<svg/>\end{svg}\includegraphics[width' +
'=3em]{foo}$ in kinda ThatWay in HisWay though MyWay \OverThere -- see SmartEng' +
'ine in that SmartEngineGUI'
assert_equal 'Batman', r.cookies['author'] assert_equal 'Batman', r.cookies['author']
home_page = @wiki.read_page('wiki1', 'HomePage') home_page = @wiki.read_page('wiki1', 'HomePage')
assert_equal current_revisions, home_page.revisions.size assert_equal current_revisions+1, home_page.revisions.size
assert_equal 'DavidHeinemeierHansson', home_page.author assert_equal 'Newly revised HomePage', home_page.content
assert_equal 'Batman', home_page.author
assert !home_page.locked?(Time.now) assert !home_page.locked?(Time.now)
end end
@ -750,11 +757,20 @@ class WikiControllerTest < ActionController::TestCase
assert_redirected_to :action => 'new', :controller => 'wiki', :web => 'wiki1', :id => 'NewPage' assert_redirected_to :action => 'new', :controller => 'wiki', :web => 'wiki1', :id => 'NewPage'
assert r.flash[:error].to_s == 'Your name cannot contain a "."' assert r.flash[:error].to_s == 'Your name cannot contain a "."'
r = process 'save', 'web' => 'wiki1', 'id' => 'NewPage', 'content' => 'Contents of a new page',
'author' => "Fu\000Manchu"
assert_redirected_to :action => 'show', :controller => 'wiki', :web => 'wiki1', :id => 'NewPage'
new_page = @wiki.read_page('wiki1', 'NewPage')
assert_equal 'FuManchu', new_page.author
r = process 'save', 'web' => 'wiki1', 'id' => 'AnotherPage', 'content' => 'Contents of a new page', r = process 'save', 'web' => 'wiki1', 'id' => 'AnotherPage', 'content' => 'Contents of a new page',
'author' => "\000" 'author' => "\000"
assert_redirected_to :action => 'new', :controller => 'wiki', :web => 'wiki1', :id => 'AnotherPage' assert_redirected_to :action => 'show', :controller => 'wiki', :web => 'wiki1', :id => 'AnotherPage'
assert r.flash[:error].to_s == "Your name was not valid utf-8" new_page = @wiki.read_page('wiki1', 'AnotherPage')
assert_equal 'AnonymousCoward', new_page.author
end end
def test_search def test_search
@ -794,17 +810,24 @@ class WikiControllerTest < ActionController::TestCase
end end
def test_search_null_in_query def test_search_null_in_query
r = process 'search', 'web' => 'wiki1', 'query' => "\x00" r = process 'search', 'web' => 'wiki1', 'query' => "non-existant\x00"
assert_response(400) assert_response(:success)
assert_match /Your query string was not valid utf-8/, r.body assert_match /No pages contain \"non-existant\"/, r.body
end end
def test_search_FFFF_in_query def test_search_FFFF_in_query
r = process 'search', 'web' => 'wiki1', 'query' => "\xEF\xBF\xBF" r = process 'search', 'web' => 'wiki1', 'query' => "\xEF\xBF\xBF"
assert_response(400) assert_response(:success)
assert_match /Your query string was not valid utf-8/, r.body assert_match /9 page\(s\) containing search string in the page name:/, r.body
r = process 'search', 'web' => 'wiki1', 'query' => "Al\357\277\277l about"
assert_response(:success)
assert_equal 'All about', r.template_objects['query']
assert_equal [@elephant, @oak], r.template_objects['results']
assert_equal [], r.template_objects['title_results']
end end
def test_search_FFFD_in_query def test_search_FFFD_in_query

View file

@ -1 +0,0 @@
performance.sql