Ensure that input is bona fide utf-8.

This commit is contained in:
Jacques Distler 2007-03-07 21:06:39 -06:00
parent 8300133c8d
commit d74116dc67
3 changed files with 38 additions and 9 deletions

View file

@ -1,6 +1,7 @@
# Controller responsible for serving files and pictures. # Controller responsible for serving files and pictures.
require 'zip/zip' require 'zip/zip'
require 'string_utils'
class FileController < ApplicationController class FileController < ApplicationController
@ -9,7 +10,7 @@ class FileController < ApplicationController
before_filter :check_allow_uploads before_filter :check_allow_uploads
def file def file
@file_name = params['id'] @file_name = @params['id']
if @params['file'] if @params['file']
# form supplied # form supplied
new_file = @web.wiki_files.create(@params['file']) new_file = @web.wiki_files.create(@params['file'])
@ -28,6 +29,7 @@ class FileController < ApplicationController
send_data(file.content, determine_file_options_for(@file_name, :filename => @file_name)) send_data(file.content, determine_file_options_for(@file_name, :filename => @file_name))
else else
@file = WikiFile.new(:file_name => @file_name) @file = WikiFile.new(:file_name => @file_name)
# @file = WikiFile.new(@file_name)
render render
end end
end end
@ -61,7 +63,8 @@ class FileController < ApplicationController
if @web.allow_uploads? if @web.allow_uploads?
return true return true
else else
render :status => 403, :text => 'File uploads are blocked by the webmaster' @hide_navigation = true
render(:status => 403, :text => 'File uploads are blocked by the webmaster', :layout => true)
return false return false
end end
end end
@ -77,6 +80,10 @@ class FileController < ApplicationController
page_content = entry.get_input_stream.read page_content = entry.get_input_stream.read
logger.info "Processing page '#{page_name}'" logger.info "Processing page '#{page_name}'"
begin begin
if !page_content.is_utf8?
logger.info "Page '#{page_name}' contains non-utf8 character data. Skipping."
next
end
existing_page = @wiki.read_page(@web.address, page_name) existing_page = @wiki.read_page(@web.address, page_name)
if existing_page if existing_page
if existing_page.content == page_content if existing_page.content == page_content

View file

@ -3,6 +3,7 @@ require 'redcloth_for_tex'
require 'parsedate' require 'parsedate'
require 'zip/zip' require 'zip/zip'
require 'sanitize' require 'sanitize'
require 'string_utils'
class WikiController < ApplicationController class WikiController < ApplicationController
@ -226,25 +227,28 @@ class WikiController < ApplicationController
def save def save
render(:status => 404, :text => 'Undefined page name') and return if @page_name.nil? render(:status => 404, :text => 'Undefined page name') and return if @page_name.nil?
author_name = @params['author'] author_name = @params['author'].delete("\x01-\x08\x0B\x0C\x0E-\x1F")
author_name = 'AnonymousCoward' if author_name =~ /^\s*$/ author_name = 'AnonymousCoward' if author_name =~ /^\s*$/
raise "Your name was not valid utf-8" if !author_name.is_utf8?
cookies['author'] = { :value => author_name, :expires => Time.utc(2030) } cookies['author'] = { :value => author_name, :expires => Time.utc(2030) }
begin begin
filter_spam(@params['content']) the_content = @params['content'].delete("\x01-\x08\x0B\x0C\x0E-\x1F")
raise "Your content was not valid utf-8" if !the_content.is_utf8?
filter_spam(the_content)
if @page if @page
wiki.revise_page(@web_name, @page_name, @params['content'], Time.now, wiki.revise_page(@web_name, @page_name, the_content, Time.now,
Author.new(author_name, remote_ip), PageRenderer.new) Author.new(author_name, remote_ip), PageRenderer.new)
@page.unlock @page.unlock
else else
wiki.write_page(@web_name, @page_name, @params['content'], Time.now, wiki.write_page(@web_name, @page_name, the_content, Time.now,
Author.new(author_name, remote_ip), PageRenderer.new) Author.new(author_name, remote_ip), PageRenderer.new)
end end
redirect_to_page @page_name redirect_to_page @page_name
rescue => e rescue => e
flash[:error] = e flash[:error] = e
logger.error e logger.error e
flash[:content] = @params['content'] flash[:content] = the_content
if @page if @page
@page.unlock @page.unlock
redirect_to :action => 'edit', :web => @web_name, :id => @page_name redirect_to :action => 'edit', :web => @web_name, :id => @page_name
@ -290,7 +294,7 @@ class WikiController < ApplicationController
def s5 def s5
if @web.markup == :markdownMML or @web.markup == :markdown if @web.markup == :markdownMML or @web.markup == :markdown
@s5_content = sanitize_html(Maruku.new(@page.content.delete("\r\x01-\x08\x0B\x0C\x0E-\x1F"), @s5_content = sanitize_html(Maruku.new(@page.content.delete("\r"),
{:math_enabled => true, :math_numbered => ['\\[','\\begin{equation}'], :content_only => true, {:math_enabled => true, :math_numbered => ['\\[','\\begin{equation}'], :content_only => true,
:author => @page.author, :title => @page.plain_name}).to_s5) :author => @page.author, :title => @page.plain_name}).to_s5)
end end

18
lib/string_utils.rb Normal file
View file

@ -0,0 +1,18 @@
# Some useful additions to the String class
class String
def is_utf8?
self =~ /^(
[\x09\x0A\x0D\x20-\x7E] # ASCII
| [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
| [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
| \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
)*$/x;
end
end