couchrest_model/examples/word_count/word_count.rb

47 lines
1.3 KiB
Ruby
Raw Normal View History

2009-01-16 00:05:55 +01:00
require 'rubygems'
require 'couchrest'
2008-05-25 20:49:37 +02:00
2008-12-14 12:05:02 +01:00
couch = CouchRest.new("http://127.0.0.1:5984")
2008-05-25 20:49:37 +02:00
db = couch.database('word-count-example')
db.delete! rescue nil
db = couch.create_db('word-count-example')
2008-05-25 20:49:37 +02:00
2008-06-20 22:36:36 +02:00
books = {
'outline-of-science.txt' => 'http://www.gutenberg.org/files/20417/20417.txt',
'ulysses.txt' => 'http://www.gutenberg.org/dirs/etext03/ulyss12.txt',
'america.txt' => 'http://www.gutenberg.org/files/16960/16960.txt',
'da-vinci.txt' => 'http://www.gutenberg.org/dirs/etext04/7ldv110.txt'
}
books.each do |file, url|
pathfile = File.join(File.dirname(__FILE__),file)
`curl #{url} > #{pathfile}` unless File.exists?(pathfile)
end
books.keys.each do |book|
2008-05-25 20:49:37 +02:00
title = book.split('.')[0]
puts title
File.open(File.join(File.dirname(__FILE__),book),'r') do |file|
lines = []
chunk = 0
while line = file.gets
lines << line
2008-06-01 19:08:38 +02:00
if lines.length > 10
2008-05-25 20:49:37 +02:00
db.save({
:title => title,
:chunk => chunk,
:text => lines.join('')
})
chunk += 1
2008-06-02 18:37:47 +02:00
puts chunk
2008-05-25 20:49:37 +02:00
lines = []
end
end
end
end
2008-12-14 12:05:02 +01:00
# puts "The books have been stored in your CouchDB. To initiate the MapReduce process, visit http://127.0.0.1:5984/_utils/ in your browser and click 'word-count-example', then select view 'words' or 'count'. The process could take about 15 minutes on an average MacBook."
2009-01-16 00:05:55 +01:00
2008-05-25 20:49:37 +02:00