HTML5lib Bug
Fixed a bug in the HTML5lib tokenizer (affects S5 slideshows). Some miscellaneous code cleanup. In particular, don't bother with zapping control characters; instead, rely on is_utf8? method to raise an exception (which we do anyway).
This commit is contained in:
parent
f482036683
commit
5b182bd228
|
@ -227,13 +227,13 @@ class WikiController < ApplicationController
|
||||||
def save
|
def save
|
||||||
render(:status => 404, :text => 'Undefined page name') and return if @page_name.nil?
|
render(:status => 404, :text => 'Undefined page name') and return if @page_name.nil?
|
||||||
|
|
||||||
author_name = params['author'].delete("\x01-\x08\x0B\x0C\x0E-\x1F")
|
author_name = params['author']
|
||||||
author_name = 'AnonymousCoward' if author_name =~ /^\s*$/
|
author_name = 'AnonymousCoward' if author_name =~ /^\s*$/
|
||||||
raise "Your name was not valid utf-8" if !author_name.is_utf8?
|
raise "Your name was not valid utf-8" if !author_name.is_utf8?
|
||||||
cookies['author'] = { :value => author_name, :expires => Time.utc(2030) }
|
cookies['author'] = { :value => author_name, :expires => Time.utc(2030) }
|
||||||
|
|
||||||
begin
|
begin
|
||||||
the_content = params['content'].delete("\x01-\x08\x0B\x0C\x0E-\x1F")
|
the_content = params['content']
|
||||||
raise "Your content was not valid utf-8" if !the_content.is_utf8?
|
raise "Your content was not valid utf-8" if !the_content.is_utf8?
|
||||||
filter_spam(the_content)
|
filter_spam(the_content)
|
||||||
if @page
|
if @page
|
||||||
|
@ -294,16 +294,16 @@ class WikiController < ApplicationController
|
||||||
|
|
||||||
def s5
|
def s5
|
||||||
if @web.markup == :markdownMML
|
if @web.markup == :markdownMML
|
||||||
my_content = Maruku.new(@page.content.delete("\r\x01-\x08\x0B\x0C\x0E-\x1F"),
|
my_content = Maruku.new(@page.content.delete("\r"),
|
||||||
{:math_enabled => true, :math_numbered => ['\\[','\\begin{equation}'], :content_only => true,
|
{:math_enabled => true, :math_numbered => ['\\[','\\begin{equation}'], :content_only => true,
|
||||||
:author => @page.author, :title => @page.plain_name})
|
:author => @page.author, :title => @page.plain_name})
|
||||||
@s5_content = sanitize_xhtml(my_content.to_s5.to_ncr)
|
@s5_content = sanitize_xhtml(my_content.to_s5)
|
||||||
@s5_theme = my_content.s5_theme
|
@s5_theme = my_content.s5_theme
|
||||||
elsif @web.markup == :markdown
|
elsif @web.markup == :markdown
|
||||||
my_content = Maruku.new(@page.content.delete("\r\x01-\x08\x0B\x0C\x0E-\x1F"),
|
my_content = Maruku.new(@page.content.delete("\r"),
|
||||||
{:math_enabled => false, :content_only => true,
|
{:math_enabled => false, :content_only => true,
|
||||||
:author => @page.author, :title => @page.plain_name})
|
:author => @page.author, :title => @page.plain_name})
|
||||||
@s5_content = sanitize_xhtml(my_content.to_s5.to_ncr)
|
@s5_content = sanitize_xhtml(my_content.to_s5)
|
||||||
@s5_theme = my_content.s5_theme
|
@s5_theme = my_content.s5_theme
|
||||||
else
|
else
|
||||||
@s5_content = "S5 not supported with this text filter"
|
@s5_content = "S5 not supported with this text filter"
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
require 'chunks/chunk'
|
require 'chunks/chunk'
|
||||||
|
require 'sanitize'
|
||||||
|
|
||||||
# The category chunk looks for "category: news" on a line by
|
# The category chunk looks for "category: news" on a line by
|
||||||
# itself and parses the terms after the ':' as categories.
|
# itself and parses the terms after the ':' as categories.
|
||||||
|
@ -8,6 +9,7 @@ require 'chunks/chunk'
|
||||||
#
|
#
|
||||||
# Category lines can be hidden using ':category: news', for example
|
# Category lines can be hidden using ':category: news', for example
|
||||||
class Category < Chunk::Abstract
|
class Category < Chunk::Abstract
|
||||||
|
|
||||||
CATEGORY_PATTERN = /^(:)?category\s*:(.*)$/i
|
CATEGORY_PATTERN = /^(:)?category\s*:(.*)$/i
|
||||||
def self.pattern() CATEGORY_PATTERN end
|
def self.pattern() CATEGORY_PATTERN end
|
||||||
|
|
||||||
|
@ -16,7 +18,8 @@ class Category < Chunk::Abstract
|
||||||
def initialize(match_data, content)
|
def initialize(match_data, content)
|
||||||
super(match_data, content)
|
super(match_data, content)
|
||||||
@hidden = match_data[1]
|
@hidden = match_data[1]
|
||||||
@list = match_data[2].split(',').map { |c| html_escape(c.strip) }
|
@list = match_data[2].split(',').map { |c| c.to_s.is_utf8? ? html_escape(c.strip) : nil }
|
||||||
|
@list.compact!
|
||||||
@unmask_text = ''
|
@unmask_text = ''
|
||||||
if @hidden
|
if @hidden
|
||||||
@unmask_text = ''
|
@unmask_text = ''
|
||||||
|
|
|
@ -78,6 +78,7 @@ module Chunk
|
||||||
string.gsub( /&/, "&" ).
|
string.gsub( /&/, "&" ).
|
||||||
gsub( /</, "<" ).
|
gsub( /</, "<" ).
|
||||||
gsub( />/, ">" ).
|
gsub( />/, ">" ).
|
||||||
|
gsub( /'/, "'" ).
|
||||||
gsub( /"/, """ )
|
gsub( /"/, """ )
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -217,7 +217,7 @@ module HTML5
|
||||||
|
|
||||||
# This method replaces the need for "entityInAttributeValueState".
|
# This method replaces the need for "entityInAttributeValueState".
|
||||||
def process_entity_in_attribute
|
def process_entity_in_attribute
|
||||||
entity = consume_entity(true)
|
entity = consume_entity()
|
||||||
if entity
|
if entity
|
||||||
@current_token[:data][-1][1] += entity
|
@current_token[:data][-1][1] += entity
|
||||||
else
|
else
|
||||||
|
|
|
@ -405,5 +405,25 @@
|
||||||
"name": "xul",
|
"name": "xul",
|
||||||
"input": "<p style=\"-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')\">fubar</p>",
|
"input": "<p style=\"-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')\">fubar</p>",
|
||||||
"output": "<p style=''>fubar</p>"
|
"output": "<p style=''>fubar</p>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "quotes_in_attributes",
|
||||||
|
"input": "<img src='foo' title='\"foo\" bar' />",
|
||||||
|
"rexml": "<img src='foo' title='\"foo\" bar' />",
|
||||||
|
"output": "<img title='"foo" bar' src='foo'/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "named_entities_in_attributes",
|
||||||
|
"input": "<img src='foo' title='"foo" bar' />",
|
||||||
|
"rexml": "<img src='foo' title='\"foo\" bar' />",
|
||||||
|
"output": "<img title='"foo" bar' src='foo'/>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "NCRs_in_attributes",
|
||||||
|
"input": "<img src='foo' title='"foo" bar' />",
|
||||||
|
"rexml": "<img src='foo' title='\"foo\" bar' />",
|
||||||
|
"output": "<img title='"foo" bar' src='foo'/>"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
|
@ -11,6 +11,7 @@ module MaRuKu
|
||||||
string.gsub( /&/, "&" ).
|
string.gsub( /&/, "&" ).
|
||||||
gsub( /</, "<" ).
|
gsub( /</, "<" ).
|
||||||
gsub( />/, ">" ).
|
gsub( />/, ">" ).
|
||||||
|
gsub( /'/, "'" ).
|
||||||
gsub( /"/, """ )
|
gsub( /"/, """ )
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue