From a25a85b9396cbc07bf2bfcd12d3c0a6685df66d6 Mon Sep 17 00:00:00 2001 From: Saito Date: Thu, 29 Dec 2011 11:51:33 +0800 Subject: [PATCH] merge charlock_holmes to master --- Gemfile | 2 +- Gemfile.lock | 4 ++-- lib/utils.rb | 14 ++++++-------- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/Gemfile b/Gemfile index 3c7b1a4f..e240fa65 100644 --- a/Gemfile +++ b/Gemfile @@ -22,9 +22,9 @@ gem "acts_as_list" gem "rdiscount" gem "acts-as-taggable-on", "~> 2.1.0" gem "drapper" -gem "rchardet19", "~> 1.3.5" gem "resque" gem "httparty" +gem "charlock_holmes" group :assets do gem "sass-rails", "~> 3.1.0" diff --git a/Gemfile.lock b/Gemfile.lock index 09fecb88..86a69981 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -77,6 +77,7 @@ GEM xpath (~> 0.1.4) carrierwave (0.5.8) activesupport (~> 3.0) + charlock_holmes (0.6.8) childprocess (0.2.2) ffi (~> 1.0.6) coffee-rails (3.1.1) @@ -172,7 +173,6 @@ GEM rdoc (~> 3.4) thor (~> 0.14.6) rake (0.9.2.2) - rchardet19 (1.3.5) rdiscount (1.6.8) rdoc (3.11) json (~> 1.4) @@ -285,6 +285,7 @@ DEPENDENCIES awesome_print capybara carrierwave + charlock_holmes coffee-rails (~> 3.1.0) database_cleaner devise (= 1.5.0) @@ -302,7 +303,6 @@ DEPENDENCIES pygments.rb (= 0.2.3) rails (= 3.1.1) rails-footnotes (~> 3.7.5) - rchardet19 (~> 1.3.5) rdiscount resque rspec-rails diff --git a/lib/utils.rb b/lib/utils.rb index 2de14761..8e5d4694 100644 --- a/lib/utils.rb +++ b/lib/utils.rb @@ -17,15 +17,13 @@ module Utils end module CharEncode - def encode(string) - return '' unless string - cd = CharDet.detect(string) - if cd.confidence > 0.6 - string.force_encoding(cd.encoding) + def encode(content) + content ||= '' + detection = CharlockHolmes::EncodingDetector.detect(content) + if hash = detection + content = CharlockHolmes::Converter.convert(content, hash[:encoding], 'UTF-8') if hash[:encoding] end - string.encode("utf-8", :undef => :replace, :replace => "?", :invalid => :replace) - rescue - "Invalid Encoding" + content end end