markov chain generation is pretty decent
This commit is contained in:
parent
4be3c5f12c
commit
8ff959a6f7
20 changed files with 25066 additions and 62 deletions
24398
examples/word_count/america.txt
Normal file
24398
examples/word_count/america.txt
Normal file
File diff suppressed because it is too large
Load diff
34
examples/word_count/markov
Executable file
34
examples/word_count/markov
Executable file
|
@ -0,0 +1,34 @@
|
|||
#!/usr/bin/env ruby
|
||||
|
||||
require '../../couchrest'
|
||||
|
||||
cr = CouchRest.new("http://localhost:5984")
|
||||
db = cr.database('word-count-example')
|
||||
|
||||
word = ARGV[0]
|
||||
words = [word]
|
||||
wprobs = {}
|
||||
|
||||
while word
|
||||
$stdout.print ' ' if words.length > 1
|
||||
$stdout.print word
|
||||
$stdout.flush
|
||||
|
||||
wprobs[word] ||= db.view('markov/chain-reduce', :startkey => [word,nil], :endkey => [word,{}],:group_level => 2)
|
||||
|
||||
# puts
|
||||
# puts "search #{word} #{wprobs[word]['rows'].length}"
|
||||
# wprobs[word]['rows'].sort_by{|r|r['value']}.each{|r|puts [r['value'],r['key']].inspect}
|
||||
|
||||
rows = wprobs[word]['rows'].select{|r|(r['key'][1]!='')}.sort_by{|r|r['value']}
|
||||
row = rows[(-1*[rows.length,5].min)..-1].sort_by{rand}[0]
|
||||
word = row ? row['key'][1] : nil
|
||||
words << word
|
||||
end
|
||||
|
||||
$stdout.print '.'
|
||||
$stdout.flush
|
||||
puts
|
||||
|
||||
# `say #{words.join(' ')}`
|
||||
|
3
examples/word_count/views/books/chunked-map.js
Normal file
3
examples/word_count/views/books/chunked-map.js
Normal file
|
@ -0,0 +1,3 @@
|
|||
function(doc) {
|
||||
doc.title && doc.chunk && emit([doc.title, doc.chunk],null);
|
||||
}
|
1
examples/word_count/views/books/united-map.js
Normal file
1
examples/word_count/views/books/united-map.js
Normal file
|
@ -0,0 +1 @@
|
|||
function(doc){if(doc.text && doc.text.match(/united/)) emit([doc.title, doc.chunk],null)}
|
6
examples/word_count/views/markov/chain-map.js
Normal file
6
examples/word_count/views/markov/chain-map.js
Normal file
|
@ -0,0 +1,6 @@
|
|||
function(doc){
|
||||
var words = doc.text.split(/\W/).filter(function(w) {return w.length > 0}).map(function(w){return w.toLowerCase()});
|
||||
for (var i = 0, l = words.length; i < l; i++) {
|
||||
emit(words.slice(i,4),doc.title);
|
||||
}
|
||||
}
|
7
examples/word_count/views/markov/chain-reduce.js
Normal file
7
examples/word_count/views/markov/chain-reduce.js
Normal file
|
@ -0,0 +1,7 @@
|
|||
function(key,vs,c){
|
||||
if (c) {
|
||||
return sum(vs);
|
||||
} else {
|
||||
return vs.length;
|
||||
}
|
||||
}
|
6
examples/word_count/views/word_count/count-map.js
Normal file
6
examples/word_count/views/word_count/count-map.js
Normal file
|
@ -0,0 +1,6 @@
|
|||
function(doc){
|
||||
var words = doc.text.split(/\W/).map(function(w){return w.toLowerCase()});
|
||||
words.forEach(function(word){
|
||||
if (word.length > 0) emit([word,doc.title],1);
|
||||
});
|
||||
}
|
3
examples/word_count/views/word_count/count-reduce.js
Normal file
3
examples/word_count/views/word_count/count-reduce.js
Normal file
|
@ -0,0 +1,3 @@
|
|||
function(key,combine){
|
||||
return sum(combine);
|
||||
}
|
|
@ -2,16 +2,18 @@ require File.dirname(__FILE__) + '/../../couchrest'
|
|||
|
||||
couch = CouchRest.new("http://localhost:5984")
|
||||
db = couch.database('word-count-example')
|
||||
# db.delete! rescue nil
|
||||
# db = couch.create_db('word-count-example')
|
||||
db.delete! rescue nil
|
||||
db = couch.create_db('word-count-example')
|
||||
|
||||
%w{america.txt da-vinci.txt outline-of-science.txt ulysses.txt}.each do |book|
|
||||
# %w{}.each do |book|
|
||||
title = book.split('.')[0]
|
||||
puts title
|
||||
File.open(File.join(File.dirname(__FILE__),book),'r') do |file|
|
||||
lines = []
|
||||
chunk = 0
|
||||
while line = file.gets
|
||||
puts chunk
|
||||
lines << line
|
||||
if lines.length > 10
|
||||
db.save({
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue