word count examples

This commit is contained in:
Chris Anderson 2008-05-25 11:49:37 -07:00
parent 4cf4b22a75
commit 92873935fc
5 changed files with 77724 additions and 0 deletions

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,53 @@
require File.dirname(__FILE__) + '/../../couchrest'
couch = CouchRest.new("http://localhost:5984")
db = couch.database('word-count-example')
db.delete! rescue nil
db = couch.create_db('word-count-example')
['da-vinci.txt', 'outline-of-science.txt', 'ulysses.txt'].each do |book|
title = book.split('.')[0]
puts title
File.open(File.join(File.dirname(__FILE__),book),'r') do |file|
lines = []
chunk = 0
while line = file.gets
lines << line
if lines.length > 100
db.save({
:title => title,
:chunk => chunk,
:text => lines.join('')
})
chunk += 1
lines = []
end
end
end
end
word_count = {
:map => 'function(doc){
var words = doc.text.split(/\W/);
words.forEach(function(word){
if (word.length > 0) emit([word,doc.title],1);
});
}',
:reduce => 'function(key,combine){
return sum(combine);
}'
}
db.delete db.get("_design/word_count") rescue nil
db.save({
"_id" => "_design/word_count",
:views => {
:count => word_count,
:words => {:map => word_count[:map]}
}
})
puts "The books have been stored in your CouchDB. To initiate the MapReduce process, visit http://localhost:5984/_utils/ in your browser and click 'word-count-example', then select view 'words' or 'count'. The process could take around an hour on an average MacBook."

View file

@ -0,0 +1,34 @@
require File.dirname(__FILE__) + '/../../couchrest'
couch = CouchRest.new("http://localhost:5984")
db = couch.database('word-count-example')
puts "Now that we've parsed all those books into CouchDB, the queries we can run are incredibly flexible."
puts "\nThe simplest query we can run is the total word count for all words in all documents:"
puts db.view('word_count/count').inspect
puts "\nWe can also narrow the query down to just one word, across all documents. Here is the count for 'flight':"
word = 'flight'
params = {
:startkey => [word],
:endkey => [word,'Z']
}
puts db.view('word_count/count',params).inspect
puts "\nWe scope the query using startkey and endkey params to take advantage of CouchDB's collation ordering. Here are the params for the last query:"
puts params.inspect
puts "\nWe can also count words on a per-title basis."
title = 'da-vinci'
params = {
:key => [word, title]
}
puts db.view('word_count/count',params).inspect
puts "\nHere are the params for 'flight' in the da-vinci book:"
puts params.inspect
puts
puts 'The url looks like this:'
puts 'http://localhost:5984/word-count-example/_view/word_count/count?key=["flight","da-vinci"]'
puts "\nTry dropping that in your browser..."