word count examples
This commit is contained in:
parent
4cf4b22a75
commit
92873935fc
5 changed files with 77724 additions and 0 deletions
32118
examples/word_count/da-vinci.txt
Normal file
32118
examples/word_count/da-vinci.txt
Normal file
File diff suppressed because it is too large
Load diff
12761
examples/word_count/outline-of-science.txt
Normal file
12761
examples/word_count/outline-of-science.txt
Normal file
File diff suppressed because it is too large
Load diff
32758
examples/word_count/ulysses.txt
Normal file
32758
examples/word_count/ulysses.txt
Normal file
File diff suppressed because it is too large
Load diff
53
examples/word_count/word_count.rb
Normal file
53
examples/word_count/word_count.rb
Normal file
|
@ -0,0 +1,53 @@
|
|||
require File.dirname(__FILE__) + '/../../couchrest'
|
||||
|
||||
couch = CouchRest.new("http://localhost:5984")
|
||||
db = couch.database('word-count-example')
|
||||
db.delete! rescue nil
|
||||
db = couch.create_db('word-count-example')
|
||||
|
||||
['da-vinci.txt', 'outline-of-science.txt', 'ulysses.txt'].each do |book|
|
||||
title = book.split('.')[0]
|
||||
puts title
|
||||
File.open(File.join(File.dirname(__FILE__),book),'r') do |file|
|
||||
lines = []
|
||||
chunk = 0
|
||||
while line = file.gets
|
||||
lines << line
|
||||
if lines.length > 100
|
||||
db.save({
|
||||
:title => title,
|
||||
:chunk => chunk,
|
||||
:text => lines.join('')
|
||||
})
|
||||
chunk += 1
|
||||
lines = []
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
word_count = {
|
||||
:map => 'function(doc){
|
||||
var words = doc.text.split(/\W/);
|
||||
words.forEach(function(word){
|
||||
if (word.length > 0) emit([word,doc.title],1);
|
||||
});
|
||||
}',
|
||||
:reduce => 'function(key,combine){
|
||||
return sum(combine);
|
||||
}'
|
||||
}
|
||||
|
||||
db.delete db.get("_design/word_count") rescue nil
|
||||
|
||||
db.save({
|
||||
"_id" => "_design/word_count",
|
||||
:views => {
|
||||
:count => word_count,
|
||||
:words => {:map => word_count[:map]}
|
||||
}
|
||||
})
|
||||
|
||||
puts "The books have been stored in your CouchDB. To initiate the MapReduce process, visit http://localhost:5984/_utils/ in your browser and click 'word-count-example', then select view 'words' or 'count'. The process could take around an hour on an average MacBook."
|
||||
|
||||
|
34
examples/word_count/word_count_query.rb
Normal file
34
examples/word_count/word_count_query.rb
Normal file
|
@ -0,0 +1,34 @@
|
|||
require File.dirname(__FILE__) + '/../../couchrest'
|
||||
|
||||
couch = CouchRest.new("http://localhost:5984")
|
||||
db = couch.database('word-count-example')
|
||||
|
||||
puts "Now that we've parsed all those books into CouchDB, the queries we can run are incredibly flexible."
|
||||
|
||||
puts "\nThe simplest query we can run is the total word count for all words in all documents:"
|
||||
puts db.view('word_count/count').inspect
|
||||
|
||||
puts "\nWe can also narrow the query down to just one word, across all documents. Here is the count for 'flight':"
|
||||
word = 'flight'
|
||||
params = {
|
||||
:startkey => [word],
|
||||
:endkey => [word,'Z']
|
||||
}
|
||||
|
||||
puts db.view('word_count/count',params).inspect
|
||||
|
||||
puts "\nWe scope the query using startkey and endkey params to take advantage of CouchDB's collation ordering. Here are the params for the last query:"
|
||||
puts params.inspect
|
||||
|
||||
puts "\nWe can also count words on a per-title basis."
|
||||
title = 'da-vinci'
|
||||
params = {
|
||||
:key => [word, title]
|
||||
}
|
||||
puts db.view('word_count/count',params).inspect
|
||||
puts "\nHere are the params for 'flight' in the da-vinci book:"
|
||||
puts params.inspect
|
||||
puts
|
||||
puts 'The url looks like this:'
|
||||
puts 'http://localhost:5984/word-count-example/_view/word_count/count?key=["flight","da-vinci"]'
|
||||
puts "\nTry dropping that in your browser..."
|
Loading…
Reference in a new issue