From 76866e8e7dcd89ab5e43ba53492c4a730a3e4638 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sat, 28 Jul 2007 23:20:51 +0200 Subject: [PATCH] Initial checkout --- mark2.rb | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100755 mark2.rb diff --git a/mark2.rb b/mark2.rb new file mode 100755 index 0000000..731374b --- /dev/null +++ b/mark2.rb @@ -0,0 +1,117 @@ +#! /usr/bin/ruby -w +# vim: set sw=2 et: +# Author: Giuseppe Bilotta +# New markov chain plugin + +class Array + def butlast + self[0,self.size-1] + end + def butfirst + self[1,self.size] + end +end + +class MarkovChainer + # Maximum depth + MAX_ORDER = 5 + + # Word or nonword regexp: + # can be used to scan a string splitting it into + # words and nonwords. + WNW = /\w+|\W/u + + def initialize + # mkv[i] holds the chains of order i + @mkv = Array.new + + MAX_ORDER.times { |i| + @mkv[i] = {} + } + + # Each chain is in the form + # [:array, :of, :symbols] => { + # :word => [chance before, chance after] + # :word => [chance before, chance after] + # } + # except for order 0, which is just a hash of + # {:word => chance} + end + + def add_one(sym) + s = sym.to_sym rescue nil + if @mkv[0].has_key?(s) + @mkv[0][s] += 1 + else + @mkv[0][s] = 1 + end + end + + def add_before(array, prev) + raise "Not enough words in new data" if array.empty? + raise "Too many words in new data" if array.size > MAX_ORDER + size = array.size + if @mkv[size].has_key?(array) + h = @mkv[size][array] + if h.has_key?(prev) + h[prev][0] += 1 + else + h[prev] = [1,0] + end + else + @mkv[size][array.dup] = { prev => [1, 0] } + end + end + + def add_after(array, nxt) + raise "Not enough words in new data" if array.empty? + raise "Too many words in new data" if array.size > MAX_ORDER + size = array.size + if @mkv[size].has_key?(array) + h = @mkv[size][array] + if h.has_key?(nxt) + h[nxt][1] += 1 + else + h[nxt] = [0,1] + end + else + @mkv[size][array.dup] = { nxt => [0, 1] } + end + end + + def add_multi(array) + raise "Too many words in new data" if array.size > MAX_ORDER + 1 + add_before(array.butfirst, array.first) + add_after(array.butlast, array.last) + end + + def add(*data) + if data.size == 1 + add_one(data.first) + else + add_multi(data) + end + end + + def learn(text) + syms = text.scan(WNW).map { |w| w.intern } + syms.unshift(nil) + syms.push(nil) + + syms.size.times { |i| + [MAX_ORDER, syms.size-i].min.times { |ord| + v = syms[i, ord+1] + # puts "Learning #{v.inspect}" + add(*v) + # pp @mkv + } + } + pp @mkv if defined? pp + end + +end + +mkv = MarkovChainer.new + +mkv.learn("This is a test, this is a nice little test.") + -- 2.32.0.93.g670b81a890