git.oblomov.eu Git - rbot-mark/blob - mark2.rb

   1 #! /usr/bin/ruby -w
   2 # vim: set sw=2 et:
   3 # Author: Giuseppe Bilotta <giuseppe.bilotta@gmail.com>
   4 # New markov chain plugin
   5
   6 class Array
   7   def butlast
   8     self[0,self.size-1]
   9   end
  10   def butfirst
  11     self[1,self.size]
  12   end
  13 end
  14
  15 class ChanceHash
  16
  17   def initialize
  18     @hash = Hash.new(0)
  19     @picker = {}
  20     @total = 0
  21     @valid_pick = false
  22   end
  23
  24   def increase(el)
  25     if @hash.key?(el)
  26       @hash[el] += 1
  27     else
  28       @hash[el] = 1
  29     end
  30     @valid_pick = false
  31     return @hash[el]
  32   end
  33
  34   def decrease(el)
  35     if @hash.key?(el)
  36       @hash[el] -= 1
  37       @hash.delete(el) if @hash[el] == 0
  38     end
  39     @valid_pick = false
  40     return @hash[el]
  41   end
  42
  43   def make_picker
  44     @picker.clear
  45     total = 0
  46     @hash.each { |el, ch|
  47       total += ch
  48       @picker[total] = el
  49     }
  50     @total = total
  51     @valid_pick = true
  52   end
  53
  54   def random
  55     make_picker unless @valid_pick
  56     pick = rand(@total)
  57     @picker.each { |ch, el|
  58       return el if pick < ch
  59     }
  60   end
  61 end
  62
  63 class MarkovChainer
  64   # Maximum depth
  65   MAX_ORDER = 5
  66
  67   # Word or nonword regexp:
  68   # can be used to scan a string splitting it into
  69   # words and nonwords.
  70   WNW = /\w+|\W/u
  71
  72   def initialize
  73     # mkv[i] holds the chains of order i
  74     @mkv = Array.new
  75
  76     # Each chain is in the form
  77     # [:array, :of, :symbols] => {
  78     #   :prev => ChanceHash,
  79     #   :next => ChanceHash
  80     # }
  81     # except for order 0, which is a simple ChanceHash
  82     # itself
  83     MAX_ORDER.times { |i|
  84       if i == 0
  85         @mkv[0] = ChanceHash.new
  86       else
  87         @mkv[i] = Hash.new { |hash, key|
  88           hash[key] = {:prev => ChanceHash.new, :next => ChanceHash.new}
  89         }
  90       end
  91     }
  92
  93   end
  94
  95   def add_one(sym)
  96     s = sym.to_sym rescue nil
  97     @mkv[0].increase(s)
  98   end
  99
 100   def add_before(array, prev)
 101     raise "Not enough words in new data" if array.empty?
 102     raise "Too many words in new data" if array.size > MAX_ORDER
 103     size = array.size
 104     h = @mkv[size][array.dup]
 105     h[:prev].increase(prev)
 106   end
 107
 108   def add_after(array, nxt)
 109     raise "Not enough words in new data" if array.empty?
 110     raise "Too many words in new data" if array.size > MAX_ORDER
 111     size = array.size
 112     h = @mkv[size][array.dup]
 113     h[:next].increase(nxt)
 114   end
 115
 116   def add_multi(array)
 117     raise "Too many words in new data" if array.size > MAX_ORDER + 1
 118     add_before(array.butfirst, array.first)
 119     add_after(array.butlast, array.last)
 120   end
 121
 122   def add(*data)
 123     if data.size == 1
 124       add_one(data.first)
 125     else
 126       add_multi(data)
 127     end
 128   end
 129
 130   def simple_learn(text)
 131     syms = text.scan(WNW).map { |w| w.intern }
 132     syms.unshift(nil)
 133     syms.push(nil)
 134
 135     syms.size.times { |i|
 136       [MAX_ORDER, syms.size-i].min.times { |ord|
 137         v = syms[i, ord+1]
 138         # puts "Learning #{v.inspect}"
 139         add(*v)
 140         # pp @mkv
 141       }
 142     }
 143   end
 144
 145   def learn(text, o={})
 146     opts = {:lowercase => true}.merge o
 147
 148     lc = opts[:lowercase]
 149
 150     simple_learn(text)
 151     if lc
 152       simple_learn(text.downcase)
 153     end
 154
 155     pp @mkv if defined? pp
 156   end
 157
 158 end
 159
 160 mkv = MarkovChainer.new
 161
 162 mkv.learn("This is a test, a nice little test indeed.")
 163