word count smaller chunks

This commit is contained in:
Chris Anderson 2008-06-01 10:08:38 -07:00
parent f9c95bfce6
commit 32b8090ddb

View file

@ -2,10 +2,10 @@ require File.dirname(__FILE__) + '/../../couchrest'
couch = CouchRest.new("http://localhost:5984") couch = CouchRest.new("http://localhost:5984")
db = couch.database('word-count-example') db = couch.database('word-count-example')
db.delete! rescue nil # db.delete! rescue nil
db = couch.create_db('word-count-example') # db = couch.create_db('word-count-example')
['da-vinci.txt', 'outline-of-science.txt', 'ulysses.txt'].each do |book| %w{america.txt da-vinci.txt outline-of-science.txt ulysses.txt}.each do |book|
title = book.split('.')[0] title = book.split('.')[0]
puts title puts title
File.open(File.join(File.dirname(__FILE__),book),'r') do |file| File.open(File.join(File.dirname(__FILE__),book),'r') do |file|
@ -13,7 +13,7 @@ db = couch.create_db('word-count-example')
chunk = 0 chunk = 0
while line = file.gets while line = file.gets
lines << line lines << line
if lines.length > 100 if lines.length > 10
db.save({ db.save({
:title => title, :title => title,
:chunk => chunk, :chunk => chunk,
@ -26,27 +26,27 @@ db = couch.create_db('word-count-example')
end end
end end
word_count = { # word_count = {
:map => 'function(doc){ # :map => 'function(doc){
var words = doc.text.split(/\W/); # var words = doc.text.split(/\W/);
words.forEach(function(word){ # words.forEach(function(word){
if (word.length > 0) emit([word,doc.title],1); # if (word.length > 0) emit([word,doc.title],1);
}); # });
}', # }',
:reduce => 'function(key,combine){ # :reduce => 'function(key,combine){
return sum(combine); # return sum(combine);
}' # }'
} # }
#
db.delete db.get("_design/word_count") rescue nil # db.delete db.get("_design/word_count") rescue nil
#
db.save({ # db.save({
"_id" => "_design/word_count", # "_id" => "_design/word_count",
:views => { # :views => {
:count => word_count, # :count => word_count,
:words => {:map => word_count[:map]} # :words => {:map => word_count[:map]}
} # }
}) # })
puts "The books have been stored in your CouchDB. To initiate the MapReduce process, visit http://localhost:5984/_utils/ in your browser and click 'word-count-example', then select view 'words' or 'count'. The process could take about 15 minutes on an average MacBook." puts "The books have been stored in your CouchDB. To initiate the MapReduce process, visit http://localhost:5984/_utils/ in your browser and click 'word-count-example', then select view 'words' or 'count'. The process could take about 15 minutes on an average MacBook."