From 75fa0632e67b2b91401673b0dd292db4103224c8 Mon Sep 17 00:00:00 2001 From: Saito Date: Mon, 28 Nov 2011 17:34:11 +0800 Subject: [PATCH 1/2] add rchardet19 to detect string encoding --- Gemfile | 65 ++++++++++++++++++++++++++-------------------------- Gemfile.lock | 2 ++ 2 files changed, 35 insertions(+), 32 deletions(-) diff --git a/Gemfile b/Gemfile index 9ef2f2a1..df904460 100644 --- a/Gemfile +++ b/Gemfile @@ -1,55 +1,56 @@ -source 'http://rubygems.org' +source "http://rubygems.org" -gem 'rails', '3.1.1' +gem "rails", "3.1.1" -gem 'sqlite3' -gem 'devise', "1.5.0" -gem 'stamp' -gem 'kaminari' -gem 'haml-rails' -gem 'jquery-rails' -gem 'grit', :git => 'https://github.com/gitlabhq/grit.git' +gem "sqlite3" +gem "devise", "1.5.0" +gem "stamp" +gem "kaminari" +gem "haml-rails" +gem "jquery-rails" +gem "grit", :git => "https://github.com/gitlabhq/grit.git" gem "carrierwave" -gem 'six' -gem 'therubyracer' -gem 'faker' -gem 'seed-fu', '~> 2.1.0' +gem "six" +gem "therubyracer" +gem "faker" +gem "seed-fu", "~> 2.1.0" gem "inifile" gem "pygments.rb", "0.2.3" gem "thin" gem "git" gem "acts_as_list" -gem 'rdiscount' -gem 'acts-as-taggable-on', '~> 2.1.0' -gem 'drapper' +gem "rdiscount" +gem "acts-as-taggable-on", "~> 2.1.0" +gem "drapper" +gem "rchardet19", "~> 1.3.5" group :assets do - gem 'sass-rails', "~> 3.1.0" - gem 'coffee-rails', "~> 3.1.0" - gem 'uglifier' + gem "sass-rails", "~> 3.1.0" + gem "coffee-rails", "~> 3.1.0" + gem "uglifier" end group :development do - gem 'letter_opener' - gem 'rails-footnotes', '~> 3.7.5' - gem 'annotate', :git => 'https://github.com/ctran/annotate_models.git' + gem "letter_opener" + gem "rails-footnotes", "~> 3.7.5" + gem "annotate", :git => "https://github.com/ctran/annotate_models.git" end group :development, :test do - gem 'rspec-rails' - gem 'capybara' - gem 'autotest' - gem 'autotest-rails' + gem "rspec-rails" + gem "capybara" + gem "autotest" + gem "autotest-rails" unless ENV["CI"] - gem 'ruby-debug19', :require => 'ruby-debug' + gem "ruby-debug19", :require => "ruby-debug" end - gem 'awesome_print' - gem 'database_cleaner' - gem 'launchy' + gem "awesome_print" + gem "database_cleaner" + gem "launchy" end group :test do - gem 'turn', :require => false - gem 'simplecov', :require => false + gem "turn", :require => false + gem "simplecov", :require => false gem "shoulda", "~> 3.0.0.beta2" end diff --git a/Gemfile.lock b/Gemfile.lock index 2ff90cd4..c975a956 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -157,6 +157,7 @@ GEM rdoc (~> 3.4) thor (~> 0.14.6) rake (0.9.2.2) + rchardet19 (1.3.5) rdiscount (1.6.8) rdoc (3.11) json (~> 1.4) @@ -268,6 +269,7 @@ DEPENDENCIES pygments.rb (= 0.2.3) rails (= 3.1.1) rails-footnotes (~> 3.7.5) + rchardet19 (~> 1.3.5) rdiscount rspec-rails ruby-debug19 From 46cbe5418947ab58c919432b9013252ada6a3bc3 Mon Sep 17 00:00:00 2001 From: Saito Date: Mon, 28 Nov 2011 17:46:41 +0800 Subject: [PATCH 2/2] fix the issue on github #157. directly force_encoding is wrong, must detect the file string's encoding. then force_encoding the string to it's encoding. last convert it to utf-8. --- app/helpers/commits_helper.rb | 1 + app/models/commit.rb | 8 +++++--- app/views/commits/_text_file.html.haml | 2 +- lib/graph_commit.rb | 7 ++++--- lib/utils.rb | 13 ++++++++++++- 5 files changed, 23 insertions(+), 8 deletions(-) diff --git a/app/helpers/commits_helper.rb b/app/helpers/commits_helper.rb index bc95f916..585564aa 100644 --- a/app/helpers/commits_helper.rb +++ b/app/helpers/commits_helper.rb @@ -1,4 +1,5 @@ module CommitsHelper + include Utils::CharEncode def diff_line(line, line_new = 0, line_old = 0) full_line = html_escape(line.gsub(/\n/, '')) color = if line[0] == "+" diff --git a/app/models/commit.rb b/app/models/commit.rb index 6d724bc8..0884e342 100644 --- a/app/models/commit.rb +++ b/app/models/commit.rb @@ -1,4 +1,6 @@ class Commit + include Utils::CharEncode + attr_accessor :commit attr_accessor :head @@ -20,7 +22,7 @@ class Commit end def safe_message - message.force_encoding(Encoding::UTF_8) + encode(message) end def created_at @@ -28,10 +30,10 @@ class Commit end def author_email - author.email.force_encoding(Encoding::UTF_8) + encode(author.email) end def author_name - author.name.force_encoding(Encoding::UTF_8) + encode(author.name) end end diff --git a/app/views/commits/_text_file.html.haml b/app/views/commits/_text_file.html.haml index bdc740b3..c9cb99e2 100644 --- a/app/views/commits/_text_file.html.haml +++ b/app/views/commits/_text_file.html.haml @@ -2,7 +2,7 @@ - line_new = 0 - lines_arr = diff.diff.lines.to_a - lines_arr.each do |line| - - line.force_encoding(Encoding::UTF_8) + - encode(line) - next if line.match(/^--- \/dev\/null/) - next if line.match(/^--- a/) - next if line.match(/^\+\+\+ b/) diff --git a/lib/graph_commit.rb b/lib/graph_commit.rb index 18b17022..1fcb9e78 100644 --- a/lib/graph_commit.rb +++ b/lib/graph_commit.rb @@ -1,6 +1,7 @@ require "grit" class GraphCommit + include Utils::CharEncode attr_accessor :time, :space attr_accessor :refs @@ -65,7 +66,7 @@ class GraphCommit # @param [GraphCommit] the commit object. # @param [Hash] map of commits # - # @return [Fixnum] max space used. + # @return [Fixnum] max space used. def self.mark_chain(mark, commit, map) commit.space = mark if commit.space == 0 m1 = mark - 1 @@ -96,13 +97,13 @@ class GraphCommit h[:parents] = self.parents.collect do |p| [p.id,0,0] end - h[:author] = author.name.force_encoding("UTF-8") + h[:author] = encode(author.name) h[:time] = time h[:space] = space h[:refs] = refs.collect{|r|r.name}.join(" ") unless refs.nil? h[:id] = sha h[:date] = date - h[:message] = message.force_encoding("UTF-8") + h[:message] = encode(message) h[:login] = author.email h end diff --git a/lib/utils.rb b/lib/utils.rb index f43e2edd..23f05f6f 100644 --- a/lib/utils.rb +++ b/lib/utils.rb @@ -16,9 +16,20 @@ module Utils end end + module CharEncode + def encode(string) + cd = CharDet.detect(string) + if cd.confidence > 0.6 + string.force_encoding(cd.encoding) + end + string.encode("utf-8", :undef => :replace, :replace => "?", :invalid => :replace) + end + end + module Colorize + include CharEncode def colorize - system_colorize(data, name) + system_colorize(encode(data), name) end def system_colorize(data, file_name)