word count examples
This commit is contained in:
parent
4cf4b22a75
commit
92873935fc
32118
examples/word_count/da-vinci.txt
Normal file
32118
examples/word_count/da-vinci.txt
Normal file
File diff suppressed because it is too large
Load diff
12761
examples/word_count/outline-of-science.txt
Normal file
12761
examples/word_count/outline-of-science.txt
Normal file
File diff suppressed because it is too large
Load diff
32758
examples/word_count/ulysses.txt
Normal file
32758
examples/word_count/ulysses.txt
Normal file
File diff suppressed because it is too large
Load diff
53
examples/word_count/word_count.rb
Normal file
53
examples/word_count/word_count.rb
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
require File.dirname(__FILE__) + '/../../couchrest'
|
||||||
|
|
||||||
|
couch = CouchRest.new("http://localhost:5984")
|
||||||
|
db = couch.database('word-count-example')
|
||||||
|
db.delete! rescue nil
|
||||||
|
db = couch.create_db('word-count-example')
|
||||||
|
|
||||||
|
['da-vinci.txt', 'outline-of-science.txt', 'ulysses.txt'].each do |book|
|
||||||
|
title = book.split('.')[0]
|
||||||
|
puts title
|
||||||
|
File.open(File.join(File.dirname(__FILE__),book),'r') do |file|
|
||||||
|
lines = []
|
||||||
|
chunk = 0
|
||||||
|
while line = file.gets
|
||||||
|
lines << line
|
||||||
|
if lines.length > 100
|
||||||
|
db.save({
|
||||||
|
:title => title,
|
||||||
|
:chunk => chunk,
|
||||||
|
:text => lines.join('')
|
||||||
|
})
|
||||||
|
chunk += 1
|
||||||
|
lines = []
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
word_count = {
|
||||||
|
:map => 'function(doc){
|
||||||
|
var words = doc.text.split(/\W/);
|
||||||
|
words.forEach(function(word){
|
||||||
|
if (word.length > 0) emit([word,doc.title],1);
|
||||||
|
});
|
||||||
|
}',
|
||||||
|
:reduce => 'function(key,combine){
|
||||||
|
return sum(combine);
|
||||||
|
}'
|
||||||
|
}
|
||||||
|
|
||||||
|
db.delete db.get("_design/word_count") rescue nil
|
||||||
|
|
||||||
|
db.save({
|
||||||
|
"_id" => "_design/word_count",
|
||||||
|
:views => {
|
||||||
|
:count => word_count,
|
||||||
|
:words => {:map => word_count[:map]}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
puts "The books have been stored in your CouchDB. To initiate the MapReduce process, visit http://localhost:5984/_utils/ in your browser and click 'word-count-example', then select view 'words' or 'count'. The process could take around an hour on an average MacBook."
|
||||||
|
|
||||||
|
|
34
examples/word_count/word_count_query.rb
Normal file
34
examples/word_count/word_count_query.rb
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
require File.dirname(__FILE__) + '/../../couchrest'
|
||||||
|
|
||||||
|
couch = CouchRest.new("http://localhost:5984")
|
||||||
|
db = couch.database('word-count-example')
|
||||||
|
|
||||||
|
puts "Now that we've parsed all those books into CouchDB, the queries we can run are incredibly flexible."
|
||||||
|
|
||||||
|
puts "\nThe simplest query we can run is the total word count for all words in all documents:"
|
||||||
|
puts db.view('word_count/count').inspect
|
||||||
|
|
||||||
|
puts "\nWe can also narrow the query down to just one word, across all documents. Here is the count for 'flight':"
|
||||||
|
word = 'flight'
|
||||||
|
params = {
|
||||||
|
:startkey => [word],
|
||||||
|
:endkey => [word,'Z']
|
||||||
|
}
|
||||||
|
|
||||||
|
puts db.view('word_count/count',params).inspect
|
||||||
|
|
||||||
|
puts "\nWe scope the query using startkey and endkey params to take advantage of CouchDB's collation ordering. Here are the params for the last query:"
|
||||||
|
puts params.inspect
|
||||||
|
|
||||||
|
puts "\nWe can also count words on a per-title basis."
|
||||||
|
title = 'da-vinci'
|
||||||
|
params = {
|
||||||
|
:key => [word, title]
|
||||||
|
}
|
||||||
|
puts db.view('word_count/count',params).inspect
|
||||||
|
puts "\nHere are the params for 'flight' in the da-vinci book:"
|
||||||
|
puts params.inspect
|
||||||
|
puts
|
||||||
|
puts 'The url looks like this:'
|
||||||
|
puts 'http://localhost:5984/word-count-example/_view/word_count/count?key=["flight","da-vinci"]'
|
||||||
|
puts "\nTry dropping that in your browser..."
|
Loading…
Reference in a new issue