couchrest_model/examples/word_count/word_count.rb

54 lines
1.4 KiB
Ruby
Raw Normal View History

2008-05-25 20:49:37 +02:00
require File.dirname(__FILE__) + '/../../couchrest'
couch = CouchRest.new("http://localhost:5984")
db = couch.database('word-count-example')
db.delete! rescue nil
db = couch.create_db('word-count-example')
['da-vinci.txt', 'outline-of-science.txt', 'ulysses.txt'].each do |book|
title = book.split('.')[0]
puts title
File.open(File.join(File.dirname(__FILE__),book),'r') do |file|
lines = []
chunk = 0
while line = file.gets
lines << line
if lines.length > 100
db.save({
:title => title,
:chunk => chunk,
:text => lines.join('')
})
chunk += 1
lines = []
end
end
end
end
word_count = {
:map => 'function(doc){
var words = doc.text.split(/\W/);
words.forEach(function(word){
if (word.length > 0) emit([word,doc.title],1);
});
}',
:reduce => 'function(key,combine){
return sum(combine);
}'
}
db.delete db.get("_design/word_count") rescue nil
db.save({
"_id" => "_design/word_count",
:views => {
:count => word_count,
:words => {:map => word_count[:map]}
}
})
2008-05-26 07:35:04 +02:00
puts "The books have been stored in your CouchDB. To initiate the MapReduce process, visit http://localhost:5984/_utils/ in your browser and click 'word-count-example', then select view 'words' or 'count'. The process could take about 15 minutes on an average MacBook."
2008-05-25 20:49:37 +02:00