Merge pull request #21 from jamiecobbett/master

Fix import of very large datasets containing multibyte chars on Ruby 1.9
This commit is contained in:
Zach Dennis 2011-06-22 19:51:29 -07:00
commit 5a71efa2b4
2 changed files with 22 additions and 3 deletions

View file

@ -6,7 +6,7 @@ module ActiveRecord::Import::AbstractAdapter
# Returns the sum of the sizes of the passed in objects. This should # Returns the sum of the sizes of the passed in objects. This should
# probably be moved outside this class, but to where? # probably be moved outside this class, but to where?
def sum_sizes( *objects ) # :nodoc: def sum_sizes( *objects ) # :nodoc:
objects.inject( 0 ){|sum,o| sum += o.size } objects.inject( 0 ){ |sum,o| sum += o.bytesize }
end end
def get_insert_value_sets( values, sql_size, max_bytes ) # :nodoc: def get_insert_value_sets( values, sql_size, max_bytes ) # :nodoc:
@ -16,12 +16,12 @@ module ActiveRecord::Import::AbstractAdapter
comma_bytes = arr.size comma_bytes = arr.size
sql_size_thus_far = sql_size + current_size + val.size + comma_bytes sql_size_thus_far = sql_size + current_size + val.size + comma_bytes
if NO_MAX_PACKET == max_bytes or sql_size_thus_far <= max_bytes if NO_MAX_PACKET == max_bytes or sql_size_thus_far <= max_bytes
current_size += val.size current_size += val.bytesize
arr << val arr << val
else else
value_sets << arr value_sets << arr
arr = [ val ] arr = [ val ]
current_size = val.size current_size = val.bytesize
end end
# if we're on the last iteration push whatever we have in arr to value_sets # if we're on the last iteration push whatever we have in arr to value_sets

View file

@ -1,3 +1,4 @@
# encoding: UTF-8
def should_support_mysql_import_functionality def should_support_mysql_import_functionality
describe "building insert value sets" do describe "building insert value sets" do
@ -25,6 +26,24 @@ def should_support_mysql_import_functionality
assert_equal values[1], value_sets[1].first assert_equal values[1], value_sets[1].first
assert_equal values[2], value_sets[2].first assert_equal values[2], value_sets[2].first
end end
context "data contains multi-byte chars" do
it "should properly build insert value set based on max packet allowed" do
# each accented e should be 2 bytes, so each entry is 6 bytes instead of 5
values = [
"('é')",
"('é')" ]
adapter = ActiveRecord::Base.connection.class
base_sql_size_in_bytes = 15
max_bytes = 26
values_size_in_bytes = adapter.sum_sizes( *values )
value_sets = adapter.get_insert_value_sets( values, base_sql_size_in_bytes, max_bytes )
assert_equal 2, value_sets.size, 'Two value sets were expected!'
end
end
end end
describe "#import with :on_duplicate_key_update option (mysql specific functionality)" do describe "#import with :on_duplicate_key_update option (mysql specific functionality)" do