From 5eb8fe009f9cfa431f426c4628948e69ed8dafbd Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Mon, 30 Jul 2007 02:05:52 +0200 Subject: [PATCH] Alternate implementation --- mark2.rb | 158 ++++++++++++++++++++++++++++--------------------------- test.rb | 9 +++- 2 files changed, 89 insertions(+), 78 deletions(-) diff --git a/mark2.rb b/mark2.rb index 1190597..a737971 100644 --- a/mark2.rb +++ b/mark2.rb @@ -27,34 +27,59 @@ class Array end end +class ChanceHashValue + attr_accessor :count, :key, :parent + def initialize(parent=nil, key=nil) + @count = 0 + @prev = nil + @next = nil + @parent = parent + @key = key + end + + def prev + @prev = ChanceHash.new(self, nil) unless @prev + @prev + end + + def next + @next = ChanceHash.new(nil, self) unless @next + @next + end + +end + class ChanceHash - def initialize - @hash = Hash.new(0) + attr_reader :next_of, :prev_of + def initialize(prev_of=nil, next_of=nil) + @hash = Hash.new { |hash, key| + hash[key] = ChanceHashValue.new(hash, key) + } @picker = [] @total = 0 @valid_pick = false + @prev_of = prev_of + @next_of = next_of end def size @hash.size end + def [](key) + @hash[key] + end + def increase(el) - if @hash.key?(el) - @hash[el] += 1 - else - @hash[el] = 1 - end + @hash[el].count += 1 @valid_pick = false return @hash[el] end def decrease(el) - if @hash.key?(el) - @hash[el] -= 1 - @hash.delete(el) if @hash[el] == 0 - end + @hash[el].count -= 1 + @hash.delete(el) if @hash[el].count == 0 @valid_pick = false return @hash[el] end @@ -62,14 +87,21 @@ class ChanceHash def make_picker @picker.clear total = 0 - @hash.each { |el, ch| - total += ch - @picker << [total, el] + @hash.each { |key, val| + if val.count > 0 + total += val.count + @picker << [total, key] + end } @total = total @valid_pick = true end + def picker + make_picker unless @valid_pick + @picker.dup + end + def random case @hash.size when 0 @@ -96,50 +128,31 @@ class MarkovChainer def initialize(ord=5) # mkv[i] holds the chains of order i @max_order = ord - @mkv = Array.new(@max_order+1) { |i| - if i==0 - ChanceHash.new - else - Hash.new { |hash, key| - hash[key] = {:prev => ChanceHash.new, :next => ChanceHash.new} - } - end - } + @mkv = ChanceHash.new end - def add_one(sym) - s = sym.to_sym rescue nil - @mkv[0].increase(s) + def add_forward(array, base=@mkv) + return if array.compact.empty? + sym = array.shift + s = sym and sym.to_sym + base.increase(s) if s or base != @mkv + return if array.empty? + add_forward(array, base[s].next) if s or base == @mkv end - def add_before(array, prev) - raise "Not enough words in new data" if array.empty? - raise "Too many words in new data" if array.size > @max_order - size = array.size - h = @mkv[size][array.dup] - h[:prev].increase(prev) + def add_backwards(array, base=@mkv) + return if array.compact.empty? + sym = array.pop + s = sym and sym.to_sym + base.increase(s) if s or base != @mkv + return if array.empty? + add_backwards(array, base[s].prev) if s or base == @mkv end - def add_after(array, nxt) - raise "Not enough words in new data" if array.empty? - raise "Too many words in new data" if array.size > @max_order - size = array.size - h = @mkv[size][array.dup] - h[:next].increase(nxt) - end - - def add_multi(array) + def add(array) raise "Too many words in new data" if array.size > @max_order + 1 - add_before(array.butfirst, array.first) - add_after(array.butlast, array.last) - end - - def add(*data) - if data.size == 1 - add_one(data.first) - else - add_multi(data) - end + add_forward(array.dup) + add_backwards(array.dup) end def simple_learn(text) @@ -148,13 +161,12 @@ class MarkovChainer syms.push(nil) syms.size.times { |i| - ([@max_order, syms.size-i].min+1).times { |ord| - v = syms[i, ord+1] - # puts "Learning #{v.inspect}" - add(*v) - # pp @mkv - } + ord = [@max_order, syms.size-i].min + v = syms[i, ord+1] + # puts "Learning #{v.inspect}" + add(v) } + # pp @mkv end def learn(text, o={}) @@ -167,22 +179,18 @@ class MarkovChainer simple_learn(text.downcase) end - pp @mkv if defined? pp + # pp @mkv if defined? pp end def raw_next(syms, o={}) max_order = o.fetch(:max_order, @max_order) - ar = syms.last([max_order, syms.size].min) + ar = syms.last([max_order, syms.size].min).dup ord = ar.size - if ord == 0 - @mkv[0].random - else - if @mkv[ord].key?(ar) - @mkv[ord][ar][:next].random - else - raw_next(ar.last(ord-1), o) - end + base = @mkv + until ar.empty? + base = base[ar.shift].next end + base.random end def next(text, o={}) @@ -192,17 +200,13 @@ class MarkovChainer def raw_prev(syms, o={}) max_order = o.fetch(:max_order, @max_order) - ar = syms.first([max_order, syms.size].min) + ar = syms.first([max_order, syms.size].min).dup ord = ar.size - if ord == 0 - @mkv[0].random - else - if @mkv[ord].key?(ar) - @mkv[ord][ar][:prev].random - else - raw_prev(ar.first(ord-1), o) - end + base = @mkv + until ar.empty? + base = base[ar.pop].prev end + base.random end def prev(text, o={}) @@ -233,7 +237,7 @@ class MarkovChainer def complete(text, o={}) txt = text while txt.empty? do - txt = @mkv[0].random.to_s + txt = @mkv.random.to_s end syms = txt.scan(WNW).map { |w| w.intern } prev = raw_prev(syms, o) diff --git a/test.rb b/test.rb index 7dd392c..33141aa 100755 --- a/test.rb +++ b/test.rb @@ -17,6 +17,12 @@ size = File.size?(fname) return unless size +# mkv.learn("This is a test, this is a test indeed.") +# +# pp mkv.instance_variable_get(:@mkv) if defined? pp +# +# mkv.complete("") + old_ratio = 0 File.open(fname) { |file| file.each { |line| @@ -25,7 +31,8 @@ File.open(fname) { |file| if new_ratio > old_ratio old_ratio = new_ratio puts "\n\n\nLearned #{new_ratio}%" - puts "%u words known" % mkv.instance_variable_get(:@mkv)[0].size + # pp mkv.instance_variable_get(:@mkv) if defined? pp + puts "%u words known" % mkv.instance_variable_get(:@mkv).size min_ord.upto(max_ord) { |ord| puts "\nOrder #{ord}::" puts mkv.complete("", :max_order=>ord) -- 2.32.0.93.g670b81a890