2008-05-25 20:49:37 +02:00
require File . dirname ( __FILE__ ) + '/../../couchrest'
couch = CouchRest . new ( " http://localhost:5984 " )
db = couch . database ( 'word-count-example' )
2008-06-02 00:07:56 +02:00
db . delete! rescue nil
db = couch . create_db ( 'word-count-example' )
2008-05-25 20:49:37 +02:00
2008-06-01 19:08:38 +02:00
%w{ america.txt da-vinci.txt outline-of-science.txt ulysses.txt } . each do | book |
2008-06-02 00:07:56 +02:00
# %w{}.each do |book|
2008-05-25 20:49:37 +02:00
title = book . split ( '.' ) [ 0 ]
puts title
File . open ( File . join ( File . dirname ( __FILE__ ) , book ) , 'r' ) do | file |
lines = [ ]
chunk = 0
while line = file . gets
2008-06-02 00:07:56 +02:00
puts chunk
2008-05-25 20:49:37 +02:00
lines << line
2008-06-01 19:08:38 +02:00
if lines . length > 10
2008-05-25 20:49:37 +02:00
db . save ( {
:title = > title ,
:chunk = > chunk ,
:text = > lines . join ( '' )
} )
chunk += 1
lines = [ ]
end
end
end
end
2008-06-01 19:08:38 +02:00
# word_count = {
# :map => 'function(doc){
# var words = doc.text.split(/\W/);
# words.forEach(function(word){
# if (word.length > 0) emit([word,doc.title],1);
# });
# }',
# :reduce => 'function(key,combine){
# return sum(combine);
# }'
# }
#
# db.delete db.get("_design/word_count") rescue nil
#
# db.save({
# "_id" => "_design/word_count",
# :views => {
# :count => word_count,
# :words => {:map => word_count[:map]}
# }
# })
2008-05-25 20:49:37 +02:00
2008-05-26 07:35:04 +02:00
puts " The books have been stored in your CouchDB. To initiate the MapReduce process, visit http://localhost:5984/_utils/ in your browser and click 'word-count-example', then select view 'words' or 'count'. The process could take about 15 minutes on an average MacBook. "
2008-05-25 20:49:37 +02:00