commit ca5f83e1cfed91ebed03d97d9d0a6ab2ac1c2a2c Author: Zach Dennis Date: Thu Feb 25 22:53:30 2010 -0500 Initial commit of making ar-extensions import Rails 3 friendly diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..029f295 --- /dev/null +++ b/Gemfile @@ -0,0 +1,4 @@ +source :gemcutter + +gem "rails", "= 3.0.0.beta" + diff --git a/lib/ar-extensions.rb b/lib/ar-extensions.rb new file mode 100644 index 0000000..20c10b4 --- /dev/null +++ b/lib/ar-extensions.rb @@ -0,0 +1,7 @@ +require "active_record" +require "active_record/version" + +module ActiveRecord::Extensions +end + +require "ar-extensions/import" diff --git a/lib/ar-extensions/import.rb b/lib/ar-extensions/import.rb new file mode 100644 index 0000000..806e9ec --- /dev/null +++ b/lib/ar-extensions/import.rb @@ -0,0 +1,351 @@ +require "ostruct" + +module ActiveRecord::Extensions::ConnectionAdapters ; end + +module ActiveRecord::Extensions::Import #:nodoc: + + module ImportSupport #:nodoc: + def supports_import? #:nodoc: + true + end + end + + module OnDuplicateKeyUpdateSupport #:nodoc: + def supports_on_duplicate_key_update? #:nodoc: + true + end + end + +end + +class ActiveRecord::Base + class << self + + # use tz as set in ActiveRecord::Base + tproc = @@default_timezone == :utc ? lambda { Time.now.utc } : lambda { Time.now } + AREXT_RAILS_COLUMNS = { + :create => { "created_on" => tproc , + "created_at" => tproc }, + :update => { "updated_on" => tproc , + "updated_at" => tproc } + } + AREXT_RAILS_COLUMN_NAMES = AREXT_RAILS_COLUMNS[:create].keys + AREXT_RAILS_COLUMNS[:update].keys + + # Returns true if the current database connection adapter + # supports import functionality, otherwise returns false. + def supports_import? + connection.supports_import? + rescue NoMethodError + false + end + + # Returns true if the current database connection adapter + # supports on duplicate key update functionality, otherwise + # returns false. + def supports_on_duplicate_key_update? + connection.supports_on_duplicate_key_update? + rescue NoMethodError + false + end + + # Imports a collection of values to the database. + # + # This is more efficient than using ActiveRecord::Base#create or + # ActiveRecord::Base#save multiple times. This method works well if + # you want to create more than one record at a time and do not care + # about having ActiveRecord objects returned for each record + # inserted. + # + # This can be used with or without validations. It does not utilize + # the ActiveRecord::Callbacks during creation/modification while + # performing the import. + # + # == Usage + # Model.import array_of_models + # Model.import column_names, array_of_values + # Model.import column_names, array_of_values, options + # + # ==== Model.import array_of_models + # + # With this form you can call _import_ passing in an array of model + # objects that you want updated. + # + # ==== Model.import column_names, array_of_values + # + # The first parameter +column_names+ is an array of symbols or + # strings which specify the columns that you want to update. + # + # The second parameter, +array_of_values+, is an array of + # arrays. Each subarray is a single set of values for a new + # record. The order of values in each subarray should match up to + # the order of the +column_names+. + # + # ==== Model.import column_names, array_of_values, options + # + # The first two parameters are the same as the above form. The third + # parameter, +options+, is a hash. This is optional. Please see + # below for what +options+ are available. + # + # == Options + # * +validate+ - true|false, tells import whether or not to use \ + # ActiveRecord validations. Validations are enforced by default. + # * +on_duplicate_key_update+ - an Array or Hash, tells import to \ + # use MySQL's ON DUPLICATE KEY UPDATE ability. See On Duplicate\ + # Key Update below. + # * +synchronize+ - an array of ActiveRecord instances for the model + # that you are currently importing data into. This synchronizes + # existing model instances in memory with updates from the import. + # * +timestamps+ - true|false, tells import to not add timestamps \ + # (if false) even if record timestamps is disabled in ActiveRecord::Base + # + # == Examples + # class BlogPost < ActiveRecord::Base ; end + # + # # Example using array of model objects + # posts = [ BlogPost.new :author_name=>'Zach Dennis', :title=>'AREXT', + # BlogPost.new :author_name=>'Zach Dennis', :title=>'AREXT2', + # BlogPost.new :author_name=>'Zach Dennis', :title=>'AREXT3' ] + # BlogPost.import posts + # + # # Example using column_names and array_of_values + # columns = [ :author_name, :title ] + # values = [ [ 'zdennis', 'test post' ], [ 'jdoe', 'another test post' ] ] + # BlogPost.import columns, values + # + # # Example using column_names, array_of_value and options + # columns = [ :author_name, :title ] + # values = [ [ 'zdennis', 'test post' ], [ 'jdoe', 'another test post' ] ] + # BlogPost.import( columns, values, :validate => false ) + # + # # Example synchronizing existing instances in memory + # post = BlogPost.find_by_author_name( 'zdennis' ) + # puts post.author_name # => 'zdennis' + # columns = [ :author_name, :title ] + # values = [ [ 'yoda', 'test post' ] ] + # BlogPost.import posts, :synchronize=>[ post ] + # puts post.author_name # => 'yoda' + # + # == On Duplicate Key Update (MySQL only) + # + # The :on_duplicate_key_update option can be either an Array or a Hash. + # + # ==== Using an Array + # + # The :on_duplicate_key_update option can be an array of column + # names. The column names are the only fields that are updated if + # a duplicate record is found. Below is an example: + # + # BlogPost.import columns, values, :on_duplicate_key_update=>[ :date_modified, :content, :author ] + # + # ==== Using A Hash + # + # The :on_duplicate_key_update option can be a hash of column name + # to model attribute name mappings. This gives you finer grained + # control over what fields are updated with what attributes on your + # model. Below is an example: + # + # BlogPost.import columns, attributes, :on_duplicate_key_update=>{ :title => :title } + # + # = Returns + # This returns an object which responds to +failed_instances+ and +num_inserts+. + # * failed_instances - an array of objects that fails validation and were not committed to the database. An empty array if no validation is performed. + # * num_inserts - the number of insert statements it took to import the data + def import( *args ) + @logger = Logger.new(STDOUT) + @logger.level = Logger::DEBUG + options = { :validate=>true, :timestamps=>true } + options.merge!( args.pop ) if args.last.is_a? Hash + + # assume array of model objects + if args.last.is_a?( Array ) and args.last.first.is_a? ActiveRecord::Base + if args.length == 2 + models = args.last + column_names = args.first + else + models = args.first + column_names = self.column_names.dup + end + + array_of_attributes = [] + models.each do |model| + # this next line breaks sqlite.so with a segmentation fault + # if model.new_record? || options[:on_duplicate_key_update] + attributes = [] + column_names.each do |name| + attributes << model.send( "#{name}_before_type_cast" ) + end + array_of_attributes << attributes + # end + end + # supports 2-element array and array + elsif args.size == 2 and args.first.is_a?( Array ) and args.last.is_a?( Array ) + column_names, array_of_attributes = args + else + raise ArgumentError.new( "Invalid arguments!" ) + end + + # Force the primary key col into the insert if it's not + # on the list and we are using a sequence and stuff a nil + # value for it into each row so the sequencer will fire later + if !column_names.include?(primary_key) && sequence_name && connection.prefetch_primary_key? + column_names << primary_key + array_of_attributes.each { |a| a << nil } + end + + is_validating = options.delete( :validate ) + + # dup the passed in array so we don't modify it unintentionally + array_of_attributes = array_of_attributes.dup + + # record timestamps unless disabled in ActiveRecord::Base + if record_timestamps && options.delete( :timestamps ) + add_special_rails_stamps column_names, array_of_attributes, options + end + + return_obj = if is_validating + import_with_validations( column_names, array_of_attributes, options ) + else + num_inserts = import_without_validations_or_callbacks( column_names, array_of_attributes, options ) + OpenStruct.new :failed_instances=>[], :num_inserts=>num_inserts + end + + if options[:synchronize] + synchronize( options[:synchronize] ) + end + + return_obj.num_inserts = 0 if return_obj.num_inserts.nil? + return_obj + end + + # TODO import_from_table needs to be implemented. + def import_from_table( options ) # :nodoc: + end + + # Imports the passed in +column_names+ and +array_of_attributes+ + # given the passed in +options+ Hash with validations. Returns an + # object with the methods +failed_instances+ and +num_inserts+. + # +failed_instances+ is an array of instances that failed validations. + # +num_inserts+ is the number of inserts it took to import the data. See + # ActiveRecord::Base.import for more information on + # +column_names+, +array_of_attributes+ and +options+. + def import_with_validations( column_names, array_of_attributes, options={} ) + failed_instances = [] + + # create instances for each of our column/value sets + arr = validations_array_for_column_names_and_attributes( column_names, array_of_attributes ) + + # keep track of the instance and the position it is currently at. if this fails + # validation we'll use the index to remove it from the array_of_attributes + arr.each_with_index do |hsh,i| + instance = new( hsh ) + if not instance.valid? + array_of_attributes[ i ] = nil + failed_instances << instance + end + end + array_of_attributes.compact! + + num_inserts = array_of_attributes.empty? ? 0 : import_without_validations_or_callbacks( column_names, array_of_attributes, options ) + OpenStruct.new :failed_instances=>failed_instances, :num_inserts => num_inserts + end + + # Imports the passed in +column_names+ and +array_of_attributes+ + # given the passed in +options+ Hash. This will return the number + # of insert operations it took to create these records without + # validations or callbacks. See ActiveRecord::Base.import for more + # information on +column_names+, +array_of_attributes_ and + # +options+. + def import_without_validations_or_callbacks( column_names, array_of_attributes, options={} ) + escaped_column_names = quote_column_names( column_names ) + columns = [] + array_of_attributes.first.each_with_index { |arr,i| columns << columns_hash[ column_names[i] ] } + + if not supports_import? + columns_sql = "(" + escaped_column_names.join( ',' ) + ")" + insert_statements, values = [], [] + number_inserted = 0 + array_of_attributes.each do |arr| + my_values = [] + arr.each_with_index do |val,j| + if !sequence_name.blank? && column_names[j] == primary_key && val.nil? + my_values << connection.next_value_for_sequence(sequence_name) + else + my_values << connection.quote( val, columns[j] ) + end + end + insert_statements << "INSERT INTO #{quoted_table_name} #{columns_sql} VALUES(" + my_values.join( ',' ) + ")" + connection.execute( insert_statements.last ) + number_inserted += 1 + end + else + # generate the sql + insert_sql = connection.multiple_value_sets_insert_sql( quoted_table_name, escaped_column_names, options ) + values_sql = connection.values_sql_for_column_names_and_attributes( columns, array_of_attributes ) + post_sql_statements = connection.post_sql_statements( quoted_table_name, options ) + + # perform the inserts + number_inserted = connection.insert_many( [ insert_sql, post_sql_statements ].flatten, + values_sql, + "#{self.class.name} Create Many Without Validations Or Callbacks" ) + end + + number_inserted + end + + # Returns an array of quoted column names + def quote_column_names( names ) + names.map{ |name| connection.quote_column_name( name ) } + end + + + private + + + def add_special_rails_stamps( column_names, array_of_attributes, options ) + AREXT_RAILS_COLUMNS[:create].each_pair do |key, blk| + if self.column_names.include?(key) + value = blk.call + if index=column_names.index(key) + # replace every instance of the array of attributes with our value + array_of_attributes.each{ |arr| arr[index] = value } + else + column_names << key + array_of_attributes.each { |arr| arr << value } + end + end + end + + AREXT_RAILS_COLUMNS[:update].each_pair do |key, blk| + if self.column_names.include?(key) + value = blk.call + if index=column_names.index(key) + # replace every instance of the array of attributes with our value + array_of_attributes.each{ |arr| arr[index] = value } + else + column_names << key + array_of_attributes.each { |arr| arr << value } + end + + if options[:on_duplicate_key_update] + options[:on_duplicate_key_update] << key.to_sym if options[:on_duplicate_key_update].is_a?(Array) + options[:on_duplicate_key_update][key.to_sym] = key.to_sym if options[:on_duplicate_key_update].is_a?(Hash) + else + options[:on_duplicate_key_update] = [ key.to_sym ] + end + end + end + end + + # Returns an Array of Hashes for the passed in +column_names+ and +array_of_attributes+. + def validations_array_for_column_names_and_attributes( column_names, array_of_attributes ) # :nodoc: + arr = [] + array_of_attributes.each do |attributes| + c = 0 + hsh = attributes.inject( {} ){|hsh,attr| hsh[ column_names[c] ] = attr ; c+=1 ; hsh } + arr << hsh + end + arr + end + + end +end diff --git a/test/database.yml b/test/database.yml new file mode 100644 index 0000000..00b4f32 --- /dev/null +++ b/test/database.yml @@ -0,0 +1,28 @@ +common: &common + username: root + password: + encoding: utf8 + host: localhost + database: aroptests + +mysql: + <<: *common + adapter: mysql + +postgres: + <<: *common + adapter: postgres + min_messages: warning + +oracle: + <<: *common + adapter: oracle + min_messages: debug + +sqlite: + adapter: sqlite + dbfile: test.db + +sqlite3: + adapter: sqlite3 + database: test.db diff --git a/test/database.yml.sample b/test/database.yml.sample new file mode 100644 index 0000000..d1bdf51 --- /dev/null +++ b/test/database.yml.sample @@ -0,0 +1,28 @@ +common: &common + username: myuser + password: password + encoding: utf8 + host: localhost + database: aroptests + +mysql: + <<: *common + adapter: mysql + +postgres: + <<: *common + adapter: postgres + min_messages: warning + +oracle: + <<: *common + adapter: oracle + min_messages: debug + +sqlite: + adapter: sqlite + dbfile: test.db + +sqlite3: + adapter: sqlite3 + dbfile: test.db diff --git a/test/import_test.rb b/test/import_test.rb new file mode 100644 index 0000000..29a6bc7 --- /dev/null +++ b/test/import_test.rb @@ -0,0 +1,114 @@ +require File.expand_path(File.dirname(__FILE__) + '/test_helper') + +describe "#import" do + context "with :validation option" do + let(:columns) { %w(title author_name) } + let(:valid_values) { [[ "LDAP", "Jerry Carter"], ["Rails Recipes", "Chad Fowler"]] } + let(:invalid_values) { [[ "The RSpec Book", ""], ["Agile+UX", ""]] } + + context "with validation checks turned off" do + it "should import valid data" do + assert_difference "Topic.count", +2 do + Topic.import columns, valid_values, :validate => false + end + end + + it "should import invalid data" do + assert_difference "Topic.count", +2 do + Topic.import columns, invalid_values, :validate => false + end + end + end + + context "with validation checks turned on" do + it "should import valid data" do + assert_difference "Topic.count", +2 do + Topic.import columns, valid_values, :validate => true + end + end + + it "should not import invalid data" do + assert_no_difference "Topic.count" do + Topic.import columns, invalid_values, :validate => true + end + end + + it "should import valid data when mixed with invalid data" do + assert_difference "Topic.count", +2 do + Topic.import columns, valid_values + invalid_values, :validate => true + end + assert_equal 0, Topic.find_all_by_title(invalid_values.map(&:first)).count + end + end + end +end + # + # context "with an array of model instances" do + # it "should import attributes from those model instances" + # + # it "should import unsaved model instances" + # end + # + # context "ActiveRecord model niceties" do + # context "created_on columns" do + # it "should set the created_on column" + # + # it "should set the created_on column respecting the time zone" + # end + # + # context "created_at columns" do + # it "should set the created_at column" + # + # it "should set the created_at column respecting the time zone" + # end + # + # context "updated_on columns" do + # it "should set the updated_on column" + # + # it "should set the updated_on column respecting the time zone" + # end + # + # context "updated_at columns" do + # it "should set the updated_at column" + # + # it "should set the updated_at column respecting the time zone" + # end + # end + # + # context "importing over existing records" do + # it "should not add duplicate records" + # + # it "should not overwrite existing records" + # end + # + # it "should import models with attribute fields that are database reserved words" + # + # it "should return the number of inserts performed" + # end + # + # describe "computing insert value sets" do + # context "when the max allowed bytes is 33 and the base SQL is 26 bytes" do + # it "should return 3 value sets when given 3 value sets of 7 bytes a piece" + # end + # + # context "when the max allowed bytes is 40 and the base SQL is 26 bytes" do + # it "should return 3 value sets when given 3 value sets of 7 bytes a piece" + # end + # + # context "when the max allowed bytes is 41 and the base SQL is 26 bytes" do + # it "should return 3 value sets when given 2 value sets of 7 bytes a piece" + # end + # + # context "when the max allowed bytes is 48 and the base SQL is 26 bytes" do + # it "should return 3 value sets when given 2 value sets of 7 bytes a piece" + # end + # + # context "when the max allowed bytes is 49 and the base SQL is 26 bytes" do + # it "should return 3 value sets when given 1 value sets of 7 bytes a piece" + # end + # + # context "when the max allowed bytes is 999999 and the base SQL is 26 bytes" do + # it "should return 3 value sets when given 1 value sets of 7 bytes a piece" + # end + # end +# end \ No newline at end of file diff --git a/test/models/topic.rb b/test/models/topic.rb new file mode 100644 index 0000000..c0d916f --- /dev/null +++ b/test/models/topic.rb @@ -0,0 +1,6 @@ +class Topic < ActiveRecord::Base + validates_presence_of :author_name + has_many :books + + composed_of :description, :mapping => [ %w(title title), %w(author_name author_name)], :allow_nil => true, :class_name => "TopicDescription" +end diff --git a/test/schema/generic_schema.rb b/test/schema/generic_schema.rb new file mode 100644 index 0000000..5f22442 --- /dev/null +++ b/test/schema/generic_schema.rb @@ -0,0 +1,96 @@ +ActiveRecord::Schema.define do + + create_table :schema_info, :force=>true do |t| + t.column :version, :integer, :unique=>true + end + SchemaInfo.create :version=>SchemaInfo::VERSION + + create_table :group, :force => true do |t| + t.column :order, :string + t.timestamps + end + + create_table :topics, :force=>true do |t| + t.column :title, :string, :null=>false + t.column :author_name, :string + t.column :author_email_address, :string + t.column :written_on, :datetime + t.column :bonus_time, :time + t.column :last_read, :datetime + t.column :content, :text + t.column :approved, :boolean, :default=>'1' + t.column :replies_count, :integer + t.column :parent_id, :integer + t.column :type, :string + t.column :created_at, :datetime + t.column :updated_at, :datetime + end + + create_table :projects, :force=>true do |t| + t.column :name, :string + t.column :type, :string + end + + create_table :developers, :force=>true do |t| + t.column :name, :string + t.column :salary, :integer, :default=>'70000' + t.column :created_at, :datetime + t.column :team_id, :integer + t.column :updated_at, :datetime + end + + create_table :addresses, :force=>true do |t| + t.column :address, :string + t.column :city, :string + t.column :state, :string + t.column :zip, :string + t.column :developer_id, :integer + end + + create_table :teams, :force=>true do |t| + t.column :name, :string + end + + create_table :books, :force=>true do |t| + t.column :title, :string, :null=>false + t.column :publisher, :string, :null=>false, :default => 'Default Publisher' + t.column :author_name, :string, :null=>false + t.column :created_at, :datetime + t.column :created_on, :datetime + t.column :updated_at, :datetime + t.column :updated_on, :datetime + t.column :publish_date, :date + t.column :topic_id, :integer + t.column :for_sale, :boolean, :default => true + end + + create_table :languages, :force=>true do |t| + t.column :name, :string + t.column :developer_id, :integer + end + + create_table :shopping_carts, :force=>true do |t| + t.column :name, :string, :null => true + t.column :created_at, :datetime + t.column :updated_at, :datetime + end + + create_table :cart_items, :force => true do |t| + t.column :shopping_cart_id, :string, :null => false + t.column :book_id, :string, :null => false + t.column :copies, :integer, :default => 1 + t.column :created_at, :datetime + t.column :updated_at, :datetime + end + + add_index :cart_items, [:shopping_cart_id, :book_id], :unique => true, :name => 'uk_shopping_cart_books' + + create_table :animals, :force => true do |t| + t.column :name, :string, :null => false + t.column :size, :string, :default => nil + t.column :created_at, :datetime + t.column :updated_at, :datetime + end + + add_index :animals, [:name], :unique => true, :name => 'uk_animals' +end diff --git a/test/schema/version.rb b/test/schema/version.rb new file mode 100644 index 0000000..39bbad0 --- /dev/null +++ b/test/schema/version.rb @@ -0,0 +1,4 @@ +class SchemaInfo < ActiveRecord::Base + set_table_name 'schema_info' + VERSION = 12 +end diff --git a/test/test_helper.rb b/test/test_helper.rb new file mode 100644 index 0000000..405d5d7 --- /dev/null +++ b/test/test_helper.rb @@ -0,0 +1,63 @@ +ENV["RAILS_ENV"] = "test" +require 'pathname' + +this_dir = Pathname.new File.dirname(__FILE__) +$LOAD_PATH << this_dir.join("../lib") + +gem "rails", "3.0.0.beta" +require "rails" +require "rails/test_help" +require "active_record/fixtures" + +require "ar-extensions" +require "logger" + +require "ruby-debug" + +class ActiveSupport::TestCase + include ActiveRecord::TestFixtures + self.use_transactional_fixtures = true + + class << self + def describe(description, toplevel=nil, &blk) + text = toplevel ? description : "#{name} #{description}" + klass = Class.new(self) + klass.class_eval <<-RUBY_EVAL + def self.name + "#{text}" + end + RUBY_EVAL + klass.instance_eval &blk + end + alias_method :context, :describe + + def let(name, &blk) + define_method(name, &blk) + end + + def it(description, &blk) + define_method("test: #{name} #{description}", &blk) + end + end + +end + +def describe(description, &blk) + ActiveSupport::TestCase.describe(description, true, &blk) +end + +adapter = "sqlite3" + +ActiveRecord::Base.logger = Logger.new("foo.log") +ActiveRecord::Base.configurations["test"] = YAML.load(this_dir.join("database.yml").open)[adapter] +ActiveRecord::Base.establish_connection "test" + +# Load base/generic schema +require this_dir.join("schema/version") +require this_dir.join("schema/generic_schema") + +# Load adapter specific schema if one exists +adapter_schema = this_dir.join("schema/#{adapter}_schema") +require adapter_schema if File.exists?(adapter_schema) + +Dir[File.dirname(__FILE__) + "/models/*.rb"].each{ |file| require file } \ No newline at end of file