merge charlock_holmes to master

This commit is contained in:
Saito 2011-12-29 11:51:33 +08:00
parent 6e5caa1985
commit a25a85b939
3 changed files with 9 additions and 11 deletions

View file

@ -22,9 +22,9 @@ gem "acts_as_list"
gem "rdiscount" gem "rdiscount"
gem "acts-as-taggable-on", "~> 2.1.0" gem "acts-as-taggable-on", "~> 2.1.0"
gem "drapper" gem "drapper"
gem "rchardet19", "~> 1.3.5"
gem "resque" gem "resque"
gem "httparty" gem "httparty"
gem "charlock_holmes"
group :assets do group :assets do
gem "sass-rails", "~> 3.1.0" gem "sass-rails", "~> 3.1.0"

View file

@ -77,6 +77,7 @@ GEM
xpath (~> 0.1.4) xpath (~> 0.1.4)
carrierwave (0.5.8) carrierwave (0.5.8)
activesupport (~> 3.0) activesupport (~> 3.0)
charlock_holmes (0.6.8)
childprocess (0.2.2) childprocess (0.2.2)
ffi (~> 1.0.6) ffi (~> 1.0.6)
coffee-rails (3.1.1) coffee-rails (3.1.1)
@ -172,7 +173,6 @@ GEM
rdoc (~> 3.4) rdoc (~> 3.4)
thor (~> 0.14.6) thor (~> 0.14.6)
rake (0.9.2.2) rake (0.9.2.2)
rchardet19 (1.3.5)
rdiscount (1.6.8) rdiscount (1.6.8)
rdoc (3.11) rdoc (3.11)
json (~> 1.4) json (~> 1.4)
@ -285,6 +285,7 @@ DEPENDENCIES
awesome_print awesome_print
capybara capybara
carrierwave carrierwave
charlock_holmes
coffee-rails (~> 3.1.0) coffee-rails (~> 3.1.0)
database_cleaner database_cleaner
devise (= 1.5.0) devise (= 1.5.0)
@ -302,7 +303,6 @@ DEPENDENCIES
pygments.rb (= 0.2.3) pygments.rb (= 0.2.3)
rails (= 3.1.1) rails (= 3.1.1)
rails-footnotes (~> 3.7.5) rails-footnotes (~> 3.7.5)
rchardet19 (~> 1.3.5)
rdiscount rdiscount
resque resque
rspec-rails rspec-rails

View file

@ -17,15 +17,13 @@ module Utils
end end
module CharEncode module CharEncode
def encode(string) def encode(content)
return '' unless string content ||= ''
cd = CharDet.detect(string) detection = CharlockHolmes::EncodingDetector.detect(content)
if cd.confidence > 0.6 if hash = detection
string.force_encoding(cd.encoding) content = CharlockHolmes::Converter.convert(content, hash[:encoding], 'UTF-8') if hash[:encoding]
end end
string.encode("utf-8", :undef => :replace, :replace => "?", :invalid => :replace) content
rescue
"Invalid Encoding"
end end
end end