merge charlock_holmes to master

This commit is contained in:
Saito 2011-12-29 11:51:33 +08:00
parent 6e5caa1985
commit a25a85b939
3 changed files with 9 additions and 11 deletions

View file

@ -22,9 +22,9 @@ gem "acts_as_list"
gem "rdiscount"
gem "acts-as-taggable-on", "~> 2.1.0"
gem "drapper"
gem "rchardet19", "~> 1.3.5"
gem "resque"
gem "httparty"
gem "charlock_holmes"
group :assets do
gem "sass-rails", "~> 3.1.0"

View file

@ -77,6 +77,7 @@ GEM
xpath (~> 0.1.4)
carrierwave (0.5.8)
activesupport (~> 3.0)
charlock_holmes (0.6.8)
childprocess (0.2.2)
ffi (~> 1.0.6)
coffee-rails (3.1.1)
@ -172,7 +173,6 @@ GEM
rdoc (~> 3.4)
thor (~> 0.14.6)
rake (0.9.2.2)
rchardet19 (1.3.5)
rdiscount (1.6.8)
rdoc (3.11)
json (~> 1.4)
@ -285,6 +285,7 @@ DEPENDENCIES
awesome_print
capybara
carrierwave
charlock_holmes
coffee-rails (~> 3.1.0)
database_cleaner
devise (= 1.5.0)
@ -302,7 +303,6 @@ DEPENDENCIES
pygments.rb (= 0.2.3)
rails (= 3.1.1)
rails-footnotes (~> 3.7.5)
rchardet19 (~> 1.3.5)
rdiscount
resque
rspec-rails

View file

@ -17,15 +17,13 @@ module Utils
end
module CharEncode
def encode(string)
return '' unless string
cd = CharDet.detect(string)
if cd.confidence > 0.6
string.force_encoding(cd.encoding)
def encode(content)
content ||= ''
detection = CharlockHolmes::EncodingDetector.detect(content)
if hash = detection
content = CharlockHolmes::Converter.convert(content, hash[:encoding], 'UTF-8') if hash[:encoding]
end
string.encode("utf-8", :undef => :replace, :replace => "?", :invalid => :replace)
rescue
"Invalid Encoding"
content
end
end