Clean, rather than Complain
Previously, if the user tried to submit content which was malformed utf-8, Instiki would complain loudly to him. A slightly more user-friendly approach was suggested by the latest Rails 2.3.4, and a conversation with Sam Ruby (who suggested some improvements). Now, instead of complaining, we remove the offending bytes, leaving a well-formed utf-8 string, which we pretend is what the user meant to submit.
This commit is contained in:
parent
f029aae60e
commit
c79fef9c01
9 changed files with 96 additions and 75 deletions
|
@ -140,7 +140,7 @@ class ApplicationController < ActionController::Base
|
|||
<html xmlns="http://www.w3.org/1999/xhtml"><body>
|
||||
<h2>Internal Error</h2>
|
||||
<p>An application error occurred while processing your request.</p>
|
||||
<!-- \n#{exception.to_s.is_utf8? ? exception.to_s.gsub!(/-{2,}/, '- -') : ''}\n#{exception.backtrace.join("\n")}\n -->
|
||||
<!-- \n#{exception.to_s.purify.gsub!(/-{2,}/, '- -') }\n#{exception.backtrace.join("\n")}\n -->
|
||||
</body></html>
|
||||
EOL
|
||||
end
|
||||
|
|
|
@ -124,14 +124,10 @@ class FileController < ApplicationController
|
|||
zip = Zip::ZipInputStream.open(archive)
|
||||
while (entry = zip.get_next_entry) do
|
||||
ext_length = File.extname(entry.name).length
|
||||
page_name = entry.name[0..-(ext_length + 1)]
|
||||
page_content = entry.get_input_stream.read
|
||||
page_name = entry.name[0..-(ext_length + 1)].purify
|
||||
page_content = entry.get_input_stream.read.purify
|
||||
logger.info "Processing page '#{page_name}'"
|
||||
begin
|
||||
if !page_content.is_utf8?
|
||||
logger.info "Page '#{page_name}' contains non-utf8 character data. Skipping."
|
||||
next
|
||||
end
|
||||
existing_page = @wiki.read_page(@web.address, page_name)
|
||||
if existing_page
|
||||
if existing_page.content == page_content
|
||||
|
|
|
@ -162,8 +162,7 @@ class WikiController < ApplicationController
|
|||
end
|
||||
|
||||
def search
|
||||
@query = params['query']
|
||||
render(:text => "Your query string was not valid utf-8", :layout => 'error', :status => 400) and return unless @query.is_utf8?
|
||||
@query = params['query'].purify
|
||||
@title_results = @web.select { |page| page.name =~ /#{@query}/i }.sort
|
||||
@results = @web.select { |page| page.content =~ /#{@query}/i }.sort
|
||||
all_pages_found = (@results + @title_results).uniq
|
||||
|
@ -180,7 +179,7 @@ class WikiController < ApplicationController
|
|||
end
|
||||
|
||||
def edit
|
||||
if @page.nil? or not @page_name.is_utf8?
|
||||
if @page.nil?
|
||||
redirect_home
|
||||
elsif @page.locked?(Time.now) and not params['break_lock']
|
||||
redirect_to :web => @web_name, :action => 'locked', :id => @page_name
|
||||
|
@ -190,12 +189,10 @@ class WikiController < ApplicationController
|
|||
end
|
||||
|
||||
def locked
|
||||
render(:text => 'Page name is not valid utf-8.', :status => 400, :layout => 'error') unless @page_name.is_utf8?
|
||||
# to template
|
||||
end
|
||||
|
||||
def new
|
||||
render(:text => 'Page name is not valid utf-8.', :status => 400, :layout => 'error') unless @page_name.is_utf8?
|
||||
# to template
|
||||
end
|
||||
|
||||
|
@ -258,32 +255,22 @@ class WikiController < ApplicationController
|
|||
end
|
||||
|
||||
def save
|
||||
render(:status => 404, :text => 'Undefined page name', :layout => 'error') and return if @page_name.nil? or not @page_name.is_utf8?
|
||||
render(:status => 404, :text => 'Undefined page name', :layout => 'error') and return if @page_name.nil?
|
||||
unless (request.post? || ENV["RAILS_ENV"] == "test")
|
||||
headers['Allow'] = 'POST'
|
||||
render(:status => 405, :text => 'You must use an HTTP POST', :layout => 'error')
|
||||
return
|
||||
end
|
||||
author_name = params['author']
|
||||
author_name = params['author'].purify
|
||||
author_name = 'AnonymousCoward' if author_name =~ /^\s*$/
|
||||
|
||||
begin
|
||||
raise Instiki::ValidationError.new('Your name was not valid utf-8') unless author_name.is_utf8?
|
||||
raise Instiki::ValidationError.new('Your name cannot contain a "."') if author_name.include? '.'
|
||||
cookies['author'] = { :value => author_name, :expires => Time.utc(2030) }
|
||||
the_content = params['content']
|
||||
the_content = params['content'].purify
|
||||
filter_spam(the_content)
|
||||
unless the_content.is_utf8?
|
||||
if @page
|
||||
the_content = @page.content
|
||||
else
|
||||
the_content = ''
|
||||
end
|
||||
raise Instiki::ValidationError.new('Your content was not valid utf-8.')
|
||||
end
|
||||
if @page
|
||||
new_name = params['new_name'] || @page_name
|
||||
raise Instiki::ValidationError.new('Your new title was not valid utf-8.') unless new_name.is_utf8?
|
||||
new_name = params['new_name'] ? params['new_name'].purify : @page_name
|
||||
raise Instiki::ValidationError.new('Your new title cannot contain a "."') if new_name.include? '.'
|
||||
raise Instiki::ValidationError.new('A page named "' + new_name.escapeHTML + '" already exists.') if @page_name != new_name && @web.has_page?(new_name)
|
||||
wiki.revise_page(@web_name, @page_name, new_name, the_content, Time.now,
|
||||
|
@ -325,7 +312,7 @@ class WikiController < ApplicationController
|
|||
end
|
||||
end
|
||||
else
|
||||
if not @page_name.nil? and @page_name.is_utf8? and not @page_name.empty?
|
||||
if not @page_name.nil? and not @page_name.empty?
|
||||
real_page = WikiReference.page_that_redirects_for(@web, @page_name)
|
||||
if real_page
|
||||
flash[:info] = "Redirected from \"#{@page_name}\"."
|
||||
|
@ -354,7 +341,7 @@ class WikiController < ApplicationController
|
|||
end
|
||||
render :action => 'history'
|
||||
else
|
||||
if not @page_name.nil? and @page_name.is_utf8? and not @page_name.empty?
|
||||
if not @page_name.nil? and not @page_name.empty?
|
||||
redirect_to :web => @web_name, :action => 'new', :id => @page_name
|
||||
else
|
||||
render :text => 'Page name is not specified', :status => 404, :layout => 'error'
|
||||
|
@ -397,7 +384,7 @@ class WikiController < ApplicationController
|
|||
end
|
||||
|
||||
def load_page
|
||||
@page_name = params['id']
|
||||
@page_name = params['id'] ? params['id'].purify : nil
|
||||
@page = @wiki.read_page(@web_name, @page_name) if @page_name
|
||||
end
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
'accept-charset' => 'utf-8' }) do %>
|
||||
<div>
|
||||
<textarea name="content" id="content" rows="24" cols="60"><%= h(flash[:content] ||
|
||||
((params['content'] && params['content'].is_utf8?) ? params['content'] : @page.content).purify) %></textarea>
|
||||
(params['content'] ? params['content'] : @page.content).purify) %></textarea>
|
||||
<% if @page_name != 'HomePage' -%>
|
||||
<p>
|
||||
<%= check_box_tag :alter_title, value = "1", checked=false,
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
{ 'id' => 'editForm', 'method' => 'post', 'onsubmit' => 'cleanAuthorName();', 'accept-charset' => 'utf-8' }) do %>
|
||||
|
||||
<textarea name="content" id="content" rows="24" cols="60"><%= h(flash[:content] ||
|
||||
( (params['content'] && params['content'].is_utf8?) ? params['content'] : '').purify ) %></textarea>
|
||||
params['content'] ? params['content'].purify : '' ) %></textarea>
|
||||
<div id="editFormButtons">
|
||||
<input type="submit" value="Submit" accesskey="s"/> as
|
||||
<%= text_field_tag :author, @author,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
<%- @title = "Search results for \"#{h params["query"]}\"" -%>
|
||||
<%- @title = "Search results for \"#{h @query}\"" -%>
|
||||
|
||||
<%- unless @title_results.empty? -%>
|
||||
<h2><%= @title_results.length %> page(s) containing search string in the page name:</h2>
|
||||
|
@ -24,7 +24,7 @@
|
|||
<%- end -%>
|
||||
|
||||
<%- if (@results + @title_results).empty? -%>
|
||||
<h2>No pages contain "<%= h params["query"] %>" </h2>
|
||||
<h2>No pages contain "<%= h @query %>" </h2>
|
||||
<p>
|
||||
Perhaps you should try expanding your query. Remember that Instiki searches for entire
|
||||
phrases, so if you search for "all that jazz" it will not match pages that contain these
|
||||
|
@ -36,6 +36,6 @@
|
|||
"[a-z]*Leet?RegExpSkill(s|z)"
|
||||
</p>
|
||||
<p>
|
||||
<b>Create a new page, named:</b> "<span class='newWikiWord'><%= link_to h(params[:query]), :web => @web.address, :action => 'new', :id => params[:query] %></span>"
|
||||
<b>Create a new page, named:</b> "<span class='newWikiWord'><%= link_to h(@query), :web => @web.address, :action => 'new', :id => @query %></span>"
|
||||
</p>
|
||||
<%- end -%>
|
||||
|
|
|
@ -2,27 +2,25 @@
|
|||
|
||||
class String
|
||||
|
||||
# Check whether a string is valid utf-8
|
||||
# Take a string, and remove any invalid substrings, returning a valid utf-8 string.
|
||||
#
|
||||
# :call-seq:
|
||||
# string.is_utf8? -> boolean
|
||||
# string.purify -> new_string
|
||||
#
|
||||
# returns true if the sequence of bytes in string is valid utf-8
|
||||
# returns a valid utf-8 string, purged of any subsequences of illegal bytes.
|
||||
#--
|
||||
def is_utf8?
|
||||
#expand NCRs to utf-8
|
||||
text = self.gsub(/&#[xX]([a-fA-F0-9]+);/) { |m| [$1.hex].pack('U*') }
|
||||
text.gsub!(/&#(\d+);/) { |m| [$1.to_i].pack('U*') }
|
||||
|
||||
# You might think this is faster, but it isn't
|
||||
#pieces = self.split(/&#[xX]([a-fA-F0-9]+);/)
|
||||
#1.step(pieces.length-1, 2) {|i| pieces[i] = [pieces[i].hex].pack('U*')}
|
||||
#pieces = pieces.join.split(/&#(\d+);/)
|
||||
#1.step(pieces.length-1, 2) {|i| pieces[i] = [pieces[i].to_i].pack('U*')}
|
||||
#text = pieces.join
|
||||
|
||||
#ensure the resulting string of bytes is valid utf-8
|
||||
text =~ /\A(
|
||||
def purify
|
||||
text = expand_ncrs
|
||||
text.split(//u).grep(UTF8_REGEX).join
|
||||
end
|
||||
|
||||
def expand_ncrs
|
||||
text = gsub(/&#[xX]([a-fA-F0-9]+);/) { |m| [$1.hex].pack('U*') }
|
||||
text.gsub!(/&#(\d+);/) { |m| [$1.to_i].pack('U*') }
|
||||
text
|
||||
end
|
||||
|
||||
UTF8_REGEX = /\A(
|
||||
[\x09\x0A\x0D\x20-\x7E] # ASCII
|
||||
| [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
|
||||
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
|
||||
|
@ -33,13 +31,31 @@ class String
|
|||
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
|
||||
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
|
||||
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
|
||||
)*\Z/nx;
|
||||
end
|
||||
)+\Z/nx;
|
||||
#++
|
||||
|
||||
# Check whether a string is valid utf-8
|
||||
#
|
||||
# :call-seq:
|
||||
# string.is_utf8? -> boolean
|
||||
#
|
||||
# returns true if the sequence of bytes in string is valid utf-8
|
||||
#--
|
||||
def is_utf8?
|
||||
#expand NCRs to utf-8
|
||||
text = self.expand_ncrs
|
||||
|
||||
# You might think this is faster, but it isn't
|
||||
#pieces = self.split(/&#[xX]([a-fA-F0-9]+);/)
|
||||
#1.step(pieces.length-1, 2) {|i| pieces[i] = [pieces[i].hex].pack('U*')}
|
||||
#pieces = pieces.join.split(/&#(\d+);/)
|
||||
#1.step(pieces.length-1, 2) {|i| pieces[i] = [pieces[i].to_i].pack('U*')}
|
||||
#text = pieces.join
|
||||
|
||||
#ensure the resulting string of bytes is valid utf-8
|
||||
text =~ UTF8_REGEX
|
||||
end
|
||||
|
||||
def purify
|
||||
delete("\x01-\x08\x0B\x0C\x0E-\x1F", "\ufffe\uffff")
|
||||
end
|
||||
#:stopdoc:
|
||||
MATHML_ENTITIES = {
|
||||
'Alpha' => 'Α',
|
||||
|
|
|
@ -607,30 +607,39 @@ class WikiControllerTest < ActionController::TestCase
|
|||
end
|
||||
|
||||
def test_save_not_utf8
|
||||
r = process 'save', 'web' => 'wiki1', 'id' => 'NewPage', 'content' => "Contents of a new page\r\n\000",
|
||||
r = process 'save', 'web' => 'wiki1', 'id' => 'NewPage', 'content' => "Cont\000ents of a new page\r\n\000",
|
||||
'author' => 'AuthorOfNewPage'
|
||||
|
||||
assert_redirected_to :web => 'wiki1', :controller => 'wiki', :action => 'new', :id => 'NewPage', :content => ''
|
||||
assert_redirected_to :web => 'wiki1', :controller => 'wiki', :action => 'show', :id => 'NewPage'
|
||||
assert_equal 'AuthorOfNewPage', r.cookies['author']
|
||||
assert_match @eternity, r.headers["Set-Cookie"][0]
|
||||
new_page = @wiki.read_page('wiki1', 'NewPage')
|
||||
assert_equal "Contents of a new page\r\n", new_page.content
|
||||
assert_equal 'AuthorOfNewPage', new_page.author
|
||||
end
|
||||
|
||||
def test_save_not_utf8_ncr
|
||||
r = process 'save', 'web' => 'wiki1', 'id' => 'NewPage', 'content' => "Contents of a new page\r\n",
|
||||
'author' => 'AuthorOfNewPage'
|
||||
|
||||
assert_redirected_to :web => 'wiki1', :controller => 'wiki', :action => 'new', :id => 'NewPage', :content => ''
|
||||
assert_redirected_to :web => 'wiki1', :controller => 'wiki', :action => 'show', :id => 'NewPage'
|
||||
assert_equal 'AuthorOfNewPage', r.cookies['author']
|
||||
assert_match @eternity, r.headers["Set-Cookie"][0]
|
||||
new_page = @wiki.read_page('wiki1', 'NewPage')
|
||||
assert_equal "Contents of a new page\r\n", new_page.content
|
||||
assert_equal 'AuthorOfNewPage', new_page.author
|
||||
end
|
||||
|
||||
def test_save_not_utf8_dec_ncr
|
||||
r = process 'save', 'web' => 'wiki1', 'id' => 'NewPage', 'content' => "Contents of a new page\r\n",
|
||||
'author' => 'AuthorOfNewPage'
|
||||
|
||||
assert_redirected_to :web => 'wiki1', :controller => 'wiki', :action => 'new', :id => 'NewPage', :content => ''
|
||||
assert_redirected_to :web => 'wiki1', :controller => 'wiki', :action => 'show', :id => 'NewPage'
|
||||
assert_equal 'AuthorOfNewPage', r.cookies['author']
|
||||
assert_match @eternity, r.headers["Set-Cookie"][0]
|
||||
new_page = @wiki.read_page('wiki1', 'NewPage')
|
||||
assert_equal "Contents of a new page\r\n", new_page.content
|
||||
assert_equal 'AuthorOfNewPage', new_page.author
|
||||
end
|
||||
|
||||
def test_save_new_revision_of_existing_page
|
||||
|
@ -653,17 +662,15 @@ class WikiControllerTest < ActionController::TestCase
|
|||
@home.lock(Time.now, 'Batman')
|
||||
current_revisions = @home.revisions.size
|
||||
|
||||
r = process 'save', 'web' => 'wiki1', 'id' => 'HomePage', 'content' => "Revised HomePage\000",
|
||||
r = process 'save', 'web' => 'wiki1', 'id' => 'HomePage', 'content' => "Newly rev\000ised HomePage",
|
||||
'author' => 'Batman'
|
||||
|
||||
assert_redirected_to :web => 'wiki1', :controller => 'wiki', :action => 'edit', :id => 'HomePage',
|
||||
:content => 'HisWay would be MyWay $\sin(x)\begin{svg}<svg/>\end{svg}\includegraphics[width' +
|
||||
'=3em]{foo}$ in kinda ThatWay in HisWay though MyWay \OverThere -- see SmartEng' +
|
||||
'ine in that SmartEngineGUI'
|
||||
assert_redirected_to :web => 'wiki1', :controller => 'wiki', :action => 'show', :id => 'HomePage'
|
||||
assert_equal 'Batman', r.cookies['author']
|
||||
home_page = @wiki.read_page('wiki1', 'HomePage')
|
||||
assert_equal current_revisions, home_page.revisions.size
|
||||
assert_equal 'DavidHeinemeierHansson', home_page.author
|
||||
assert_equal current_revisions+1, home_page.revisions.size
|
||||
assert_equal 'Newly revised HomePage', home_page.content
|
||||
assert_equal 'Batman', home_page.author
|
||||
assert !home_page.locked?(Time.now)
|
||||
end
|
||||
|
||||
|
@ -750,11 +757,20 @@ class WikiControllerTest < ActionController::TestCase
|
|||
assert_redirected_to :action => 'new', :controller => 'wiki', :web => 'wiki1', :id => 'NewPage'
|
||||
assert r.flash[:error].to_s == 'Your name cannot contain a "."'
|
||||
|
||||
r = process 'save', 'web' => 'wiki1', 'id' => 'NewPage', 'content' => 'Contents of a new page',
|
||||
'author' => "Fu\000Manchu"
|
||||
|
||||
assert_redirected_to :action => 'show', :controller => 'wiki', :web => 'wiki1', :id => 'NewPage'
|
||||
new_page = @wiki.read_page('wiki1', 'NewPage')
|
||||
assert_equal 'FuManchu', new_page.author
|
||||
|
||||
r = process 'save', 'web' => 'wiki1', 'id' => 'AnotherPage', 'content' => 'Contents of a new page',
|
||||
'author' => "\000"
|
||||
|
||||
assert_redirected_to :action => 'new', :controller => 'wiki', :web => 'wiki1', :id => 'AnotherPage'
|
||||
assert r.flash[:error].to_s == "Your name was not valid utf-8"
|
||||
assert_redirected_to :action => 'show', :controller => 'wiki', :web => 'wiki1', :id => 'AnotherPage'
|
||||
new_page = @wiki.read_page('wiki1', 'AnotherPage')
|
||||
assert_equal 'AnonymousCoward', new_page.author
|
||||
|
||||
end
|
||||
|
||||
def test_search
|
||||
|
@ -794,17 +810,24 @@ class WikiControllerTest < ActionController::TestCase
|
|||
end
|
||||
|
||||
def test_search_null_in_query
|
||||
r = process 'search', 'web' => 'wiki1', 'query' => "\x00"
|
||||
r = process 'search', 'web' => 'wiki1', 'query' => "non-existant\x00"
|
||||
|
||||
assert_response(400)
|
||||
assert_match /Your query string was not valid utf-8/, r.body
|
||||
assert_response(:success)
|
||||
assert_match /No pages contain \"non-existant\"/, r.body
|
||||
end
|
||||
|
||||
def test_search_FFFF_in_query
|
||||
r = process 'search', 'web' => 'wiki1', 'query' => "\xEF\xBF\xBF"
|
||||
|
||||
assert_response(400)
|
||||
assert_match /Your query string was not valid utf-8/, r.body
|
||||
assert_response(:success)
|
||||
assert_match /9 page\(s\) containing search string in the page name:/, r.body
|
||||
|
||||
r = process 'search', 'web' => 'wiki1', 'query' => "Al\357\277\277l about"
|
||||
|
||||
assert_response(:success)
|
||||
assert_equal 'All about', r.template_objects['query']
|
||||
assert_equal [@elephant, @oak], r.template_objects['results']
|
||||
assert_equal [], r.template_objects['title_results']
|
||||
end
|
||||
|
||||
def test_search_FFFD_in_query
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
performance.sql
|
Loading…
Reference in a new issue