rewrite encode strategy.

This commit is contained in:
Saito 2012-05-29 16:16:59 +08:00
parent 89043d6b89
commit 206230a4ec

View file

@ -5,21 +5,25 @@ module Gitlab
extend self extend self
def utf8 message def utf8 message
# return nil if message is nil
return nil unless message return nil unless message
detect = CharlockHolmes::EncodingDetector.detect(message) rescue {} # if message is utf-8 encoding, just return it
message.force_encoding("utf-8")
return message if message.valid_encoding?
# It's better to default to UTF-8 as sometimes it's wrongly detected as another charset # if message is not utf-8 encoding, detect and convert it
if detect[:encoding] && detect[:confidence] == 100 detect = CharlockHolmes::EncodingDetector.detect(message)
CharlockHolmes::Converter.convert(message, detect[:encoding], 'UTF-8') if detect[:encoding] && detect[:confidence] > 60
else message.force_encoding(detect[:encoding])
message message.encode!("utf-8", detect[:encoding], :undef => :replace, :replace => "", :invalid => :replace)
end.force_encoding("utf-8") end
# Prevent app from crash cause of message.valid_encoding? ? message : raise
# encoding errors
# Prevent app from crash cause of encoding errors
rescue rescue
"--broken encoding: #{encoding}" "--broken encoding: #{detect[:encoding]}"
end end
def detect_encoding message def detect_encoding message