HTML5lib Bug

Fixed a bug in the HTML5lib tokenizer (affects S5 slideshows). Some miscellaneous code cleanup. In particular, don't bother with zapping control characters; instead, rely on is_utf8? method to raise an exception (which we do anyway).
2007-09-06 10:40:48 -05:00 · 2007-09-06 10:40:48 -05:00 · 5b182bd228
commit 5b182bd228
parent f482036683
6 changed files with 33 additions and 8 deletions
--- a/lib/chunks/category.rb
+++ b/lib/chunks/category.rb
@ -1,4 +1,5 @@
 require 'chunks/chunk'
+require 'sanitize'

 # The category chunk looks for "category: news" on a line by
 # itself and parses the terms after the ':' as categories.
@ -8,6 +9,7 @@ require 'chunks/chunk'
 #
 # Category lines can be hidden using ':category: news', for example
 class Category < Chunk::Abstract
+
  CATEGORY_PATTERN = /^(:)?category\s*:(.*)$/i
  def self.pattern() CATEGORY_PATTERN  end

@ -16,7 +18,8 @@ class Category < Chunk::Abstract
 def initialize(match_data, content)
    super(match_data, content)
    @hidden = match_data[1]
-    @list = match_data[2].split(',').map { |c| html_escape(c.strip) }
+    @list = match_data[2].split(',').map { |c| c.to_s.is_utf8? ? html_escape(c.strip) : nil }
+    @list.compact!
    @unmask_text = ''
    if @hidden
      @unmask_text = ''
--- a/lib/chunks/chunk.rb
+++ b/lib/chunks/chunk.rb
@ -78,6 +78,7 @@ module Chunk
      string.gsub( /&/, "&amp;" ).
             gsub( /</, "&lt;" ).
             gsub( />/, "&gt;" ).
+             gsub( /'/, "&#39;" ).
             gsub( /"/, "&quot;" )
    end