markov chain generation is pretty decent

This commit is contained in:
Chris Anderson 2008-06-01 15:07:56 -07:00
parent 4be3c5f12c
commit 8ff959a6f7
20 changed files with 25066 additions and 62 deletions

File diff suppressed because it is too large Load diff

34
examples/word_count/markov Executable file
View file

@ -0,0 +1,34 @@
#!/usr/bin/env ruby
require '../../couchrest'
cr = CouchRest.new("http://localhost:5984")
db = cr.database('word-count-example')
word = ARGV[0]
words = [word]
wprobs = {}
while word
$stdout.print ' ' if words.length > 1
$stdout.print word
$stdout.flush
wprobs[word] ||= db.view('markov/chain-reduce', :startkey => [word,nil], :endkey => [word,{}],:group_level => 2)
# puts
# puts "search #{word} #{wprobs[word]['rows'].length}"
# wprobs[word]['rows'].sort_by{|r|r['value']}.each{|r|puts [r['value'],r['key']].inspect}
rows = wprobs[word]['rows'].select{|r|(r['key'][1]!='')}.sort_by{|r|r['value']}
row = rows[(-1*[rows.length,5].min)..-1].sort_by{rand}[0]
word = row ? row['key'][1] : nil
words << word
end
$stdout.print '.'
$stdout.flush
puts
# `say #{words.join(' ')}`

View file

@ -0,0 +1,3 @@
function(doc) {
doc.title && doc.chunk && emit([doc.title, doc.chunk],null);
}

View file

@ -0,0 +1 @@
function(doc){if(doc.text && doc.text.match(/united/)) emit([doc.title, doc.chunk],null)}

View file

@ -0,0 +1,6 @@
function(doc){
var words = doc.text.split(/\W/).filter(function(w) {return w.length > 0}).map(function(w){return w.toLowerCase()});
for (var i = 0, l = words.length; i < l; i++) {
emit(words.slice(i,4),doc.title);
}
}

View file

@ -0,0 +1,7 @@
function(key,vs,c){
if (c) {
return sum(vs);
} else {
return vs.length;
}
}

View file

@ -0,0 +1,6 @@
function(doc){
var words = doc.text.split(/\W/).map(function(w){return w.toLowerCase()});
words.forEach(function(word){
if (word.length > 0) emit([word,doc.title],1);
});
}

View file

@ -0,0 +1,3 @@
function(key,combine){
return sum(combine);
}

View file

@ -2,16 +2,18 @@ require File.dirname(__FILE__) + '/../../couchrest'
couch = CouchRest.new("http://localhost:5984")
db = couch.database('word-count-example')
# db.delete! rescue nil
# db = couch.create_db('word-count-example')
db.delete! rescue nil
db = couch.create_db('word-count-example')
%w{america.txt da-vinci.txt outline-of-science.txt ulysses.txt}.each do |book|
# %w{}.each do |book|
title = book.split('.')[0]
puts title
File.open(File.join(File.dirname(__FILE__),book),'r') do |file|
lines = []
chunk = 0
while line = file.gets
puts chunk
lines << line
if lines.length > 10
db.save({