From c8196cbe4109e56732e6b418e54d4ae6e680d717 Mon Sep 17 00:00:00 2001 From: Jacques Distler Date: Tue, 1 Jan 2008 22:00:07 -0600 Subject: [PATCH] More Unicode Fun From Philip Taylor (via Henri Sivonen): disallow U+fffe and U+ffff. --- lib/sanitize.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/sanitize.rb b/lib/sanitize.rb index a747440a..6bc6d6a1 100644 --- a/lib/sanitize.rb +++ b/lib/sanitize.rb @@ -133,7 +133,9 @@ class String [\x09\x0A\x0D\x20-\x7E] # ASCII | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs - | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte + | [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte + | \xEF[\x80-\xBE]{2} # + | \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15