3 # Author: Giuseppe Bilotta <giuseppe.bilotta@gmail.com>
4 # New markov chain plugin
19 # Word or nonword regexp:
20 # can be used to scan a string splitting it into
25 # mkv[i] holds the chains of order i
32 # Each chain is in the form
33 # [:array, :of, :symbols] => {
34 # :word => [chance before, chance after]
35 # :word => [chance before, chance after]
37 # except for order 0, which is just a hash of
42 s = sym.to_sym rescue nil
43 if @mkv[0].has_key?(s)
50 def add_before(array, prev)
51 raise "Not enough words in new data" if array.empty?
52 raise "Too many words in new data" if array.size > MAX_ORDER
54 if @mkv[size].has_key?(array)
62 @mkv[size][array.dup] = { prev => [1, 0] }
66 def add_after(array, nxt)
67 raise "Not enough words in new data" if array.empty?
68 raise "Too many words in new data" if array.size > MAX_ORDER
70 if @mkv[size].has_key?(array)
78 @mkv[size][array.dup] = { nxt => [0, 1] }
83 raise "Too many words in new data" if array.size > MAX_ORDER + 1
84 add_before(array.butfirst, array.first)
85 add_after(array.butlast, array.last)
97 syms = text.scan(WNW).map { |w| w.intern }
101 syms.size.times { |i|
102 [MAX_ORDER, syms.size-i].min.times { |ord|
104 # puts "Learning #{v.inspect}"
109 pp @mkv if defined? pp
114 mkv = MarkovChainer.new
116 mkv.learn("This is a test, this is a nice little test.")