From 206230a4ecb5c7e1b4de445ccda14814ad5a0232 Mon Sep 17 00:00:00 2001 From: Saito Date: Tue, 29 May 2012 16:16:59 +0800 Subject: [PATCH] rewrite encode strategy. --- lib/gitlab/encode.rb | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/lib/gitlab/encode.rb b/lib/gitlab/encode.rb index 1c95a947..cee3ace2 100644 --- a/lib/gitlab/encode.rb +++ b/lib/gitlab/encode.rb @@ -1,25 +1,29 @@ # Patch Strings to enable detect_encoding! on views require 'charlock_holmes/string' module Gitlab - module Encode + module Encode extend self def utf8 message + # return nil if message is nil return nil unless message - detect = CharlockHolmes::EncodingDetector.detect(message) rescue {} + # if message is utf-8 encoding, just return it + message.force_encoding("utf-8") + return message if message.valid_encoding? - # It's better to default to UTF-8 as sometimes it's wrongly detected as another charset - if detect[:encoding] && detect[:confidence] == 100 - CharlockHolmes::Converter.convert(message, detect[:encoding], 'UTF-8') - else - message - end.force_encoding("utf-8") + # if message is not utf-8 encoding, detect and convert it + detect = CharlockHolmes::EncodingDetector.detect(message) + if detect[:encoding] && detect[:confidence] > 60 + message.force_encoding(detect[:encoding]) + message.encode!("utf-8", detect[:encoding], :undef => :replace, :replace => "", :invalid => :replace) + end - # Prevent app from crash cause of - # encoding errors + message.valid_encoding? ? message : raise + + # Prevent app from crash cause of encoding errors rescue - "--broken encoding: #{encoding}" + "--broken encoding: #{detect[:encoding]}" end def detect_encoding message