From 80ddd2c09d9017f49976436982e2b9be5084fc4c Mon Sep 17 00:00:00 2001 From: Dmitriy Zaporozhets Date: Thu, 5 Apr 2012 01:51:49 +0300 Subject: [PATCH] Better encoding handling. Updated grit --- Gemfile.lock | 4 +-- app/controllers/commits_controller.rb | 1 + config/initializers/gitlabhq/20_grit_ext.rb | 30 ++++++++++++++++++--- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 46831d37..3808242b 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -14,7 +14,7 @@ GIT GIT remote: https://github.com/gitlabhq/grit.git - revision: ff015074ef35bd94cba943f9c0f98e161ab5851c + revision: 3fc864f3c637e06e2fa7a81f6b48a5df58a9bc5b specs: grit (2.4.1) diff-lcs (~> 1.1) @@ -148,7 +148,7 @@ GEM mime-types (~> 1.16) treetop (~> 1.4.8) method_source (0.7.0) - mime-types (1.17.2) + mime-types (1.18) modularity (0.6.1) multi_json (1.0.4) multi_xml (0.4.1) diff --git a/app/controllers/commits_controller.rb b/app/controllers/commits_controller.rb index 486e1988..3ee2e7a7 100644 --- a/app/controllers/commits_controller.rb +++ b/app/controllers/commits_controller.rb @@ -1,3 +1,4 @@ +require 'benchmark' require "base64" class CommitsController < ApplicationController diff --git a/config/initializers/gitlabhq/20_grit_ext.rb b/config/initializers/gitlabhq/20_grit_ext.rb index a8c97b10..9d4b2268 100644 --- a/config/initializers/gitlabhq/20_grit_ext.rb +++ b/config/initializers/gitlabhq/20_grit_ext.rb @@ -17,13 +17,35 @@ Grit::GitRuby::Internal::RawObject.class_eval do end private + def transcoding(content) content ||= "" - detection = CharlockHolmes::EncodingDetector.detect(content) - if hash = detection - content = CharlockHolmes::Converter.convert(content, hash[:encoding], 'UTF-8') if hash[:encoding] + hash = CharlockHolmes::EncodingDetector.detect(content) + + if hash + return content if hash[:type] == :binary + + if hash[:encoding] == "UTF-8" + content = if hash[:confidence] < 100 + content + else + content.force_encoding("UTF-8") + end + + return content + end + + CharlockHolmes::Converter.convert(content, hash[:encoding], 'UTF-8') if hash[:encoding] + else + content.force_encoding("UTF-8") end - content + end + + def z_binary?(string) + string.each_byte do |x| + x.nonzero? or return true + end + false end end