From b36552e710172f907746cada920afcb068aba60f Mon Sep 17 00:00:00 2001 From: Chris Anderson Date: Mon, 2 Jun 2008 09:34:04 -0700 Subject: [PATCH] refactored markov to be clearer --- examples/word_count/markov | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/examples/word_count/markov b/examples/word_count/markov index a6b848f..af7465e 100755 --- a/examples/word_count/markov +++ b/examples/word_count/markov @@ -1,28 +1,32 @@ #!/usr/bin/env ruby -require '../../couchrest' +require File.expand_path(File.dirname(__FILE__)) + '/../../couchrest' cr = CouchRest.new("http://localhost:5984") -db = cr.database('word-count-example') +@db = cr.database('word-count-example') +@word_memoizer = {} + +def probable_follower_for(word) + @word_memoizer[word] ||= @db.view('markov/chain-reduce', :startkey => [word,nil], :endkey => [word,{}],:group_level => 2) + + # puts + # puts "search #{word} #{wprobs[word]['rows'].length}" + # @word_memoizer[word]['rows'].sort_by{|r|r['value']}.each{|r|puts [r['value'],r['key']].inspect} + + rows = @word_memoizer[word]['rows'].select{|r|(r['key'][1]!='')}.sort_by{|r|r['value']} + row = rows[(-1*[rows.length,5].min)..-1].sort_by{rand}[0] + row ? row['key'][1] : nil +end + word = ARGV[0] words = [word] -wprobs = {} while word $stdout.print ' ' if words.length > 1 $stdout.print word $stdout.flush - - wprobs[word] ||= db.view('markov/chain-reduce', :startkey => [word,nil], :endkey => [word,{}],:group_level => 2) - - # puts - # puts "search #{word} #{wprobs[word]['rows'].length}" - # wprobs[word]['rows'].sort_by{|r|r['value']}.each{|r|puts [r['value'],r['key']].inspect} - - rows = wprobs[word]['rows'].select{|r|(r['key'][1]!='')}.sort_by{|r|r['value']} - row = rows[(-1*[rows.length,5].min)..-1].sort_by{rand}[0] - word = row ? row['key'][1] : nil + word = probable_follower_for(word) words << word end