# Author: Giuseppe Bilotta <giuseppe.bilotta@gmail.com>
# New markov chain plugin
+$swig_runtime_data_type_pointer2 = nil
+require 'sqlite3'
+
class Array
+ def sql_group
+ "("+self.join(',')+")"
+ end
+
def butlast
first(self.size-1)
end
end
self[i, count]
end
-end
-
-class ChanceHash
-
- def initialize
- @hash = Hash.new(0)
- @picker = []
- @total = 0
- @valid_pick = false
- end
-
- def size
- @hash.size
- end
-
- def [](key)
- @hash[key]
- end
-
- def keys
- @hash.keys
- end
-
- def increase(el)
- if @hash.key?(el)
- @hash[el] += 1
- else
- @hash[el] = 1
- end
- @valid_pick = false
- return @hash[el]
- end
- def decrease(el)
- if @hash.key?(el)
- @hash[el] -= 1
- @hash.delete(el) if @hash[el] == 0
- end
- @valid_pick = false
- return @hash[el]
+ def pick_with_chance(chance)
+ pick = rand(chance)
+ self.each { |el, cch|
+ ch = cch.to_i
+ return el if pick < ch
+ pick -= ch
+ }
+ nil
end
+end
- def make_picker
- @picker.clear
- total = 0
- @hash.each { |el, ch|
- total += ch
- @picker << [total, el]
- }
- @total = total
- @valid_pick = true
+class String
+ def quoted
+ "'" + SQLite3::Database.quote(self)+"'"
end
+end
- def random
- case @hash.size
- when 0
- return nil
- when 1
- return @hash.keys.first
- else
- make_picker unless @valid_pick
- pick = rand(@total)
- @picker.each { |ch, el|
- return el if pick < ch
- }
- end
+class NilClass
+ def quoted
+ 'NULL'
end
end
WNW = /\w+|\W/u
attr_reader :max_order
- def initialize(ord=5)
+
+ def order(i=0)
+ "order#{i}"
+ end
+
+ def word(i)
+ "word#{i}"
+ end
+
+ def initialize(db, ord=5)
+ @db = SQLite3::Database.new(db)
+ @db.synchronous=0
@max_order = ord
- @mkv = Hash.new { |hash, key|
- hash[key] = {:prev => ChanceHash.new, :next => ChanceHash.new}
- }
- @mkv[nil] = ChanceHash.new
+
+ @db.execute("create table if not exists #{order(0).quoted} (#{word(0).quoted} text unique not null primary key, 'chance' integer not null default 0)")
+ 1.upto(@max_order+1) do |i|
+ cols = (0..i).map { |j| word(j).quoted}
+
+ cmd = "create table if not exists "
+ cmd << order(i).quoted + " ("
+ cmd << cols.map { |c| c+' text'}.join(',')
+ cmd << ", 'chance' integer not null default 0"
+ cmd << ", unique#{cols.sql_group})"
+ @db.execute(cmd)
+ end
end
def words
- @mkv[nil].keys
+ @db.execute("select word0 from order0")
end
- def add_one(sym)
- # Don't add nil to order 0
- return unless sym
- @mkv[nil].increase(sym.to_sym)
+ def num_words
+ @db.get_first_value("select count(*) from order0").to_i
end
- def add_before(array, prev)
- raise "Not enough words in new data" if array.empty?
- raise "Too many words in new data" if array.size > @max_order
- # Don't add prev to chains whose first element is nil
- return unless array.first
- h = @mkv[array.dup]
- h[:prev].increase(prev)
+ def where_selector(words, o={})
+ offset = o[:offset].to_i
+ ar = []
+ words.length.times do |i|
+ if words[i]
+ ar << word(i+offset) + "=" + words[i].quoted
+ else
+ ar << word(i+offset) + " ISNULL"
+ end
+ end
+ "where #{ar.join(' and ')}"
end
- def add_after(array, nxt)
- raise "Not enough words in new data" if array.empty?
- raise "Too many words in new data" if array.size > @max_order
- # Don't add next to chains whose last element is nil
- return unless array.last
- h = @mkv[array.dup]
- h[:next].increase(nxt)
+ def grouped_selector(words, o={})
+ offset= o[:offset].to_i
+ wds = []
+ cols = []
+ words.length.times do |i|
+ cols << word(i)
+ wds << words[i].quoted
+ end
+ if o.key?(:chance)
+ cols << "chance"
+ wds << o[:chance].to_i
+ end
+ return [cols.sql_group, wds.sql_group]
+ end
+
+ def add_one(sym)
+ # Don't add nil to order 0
+ return unless sym
+ @db.transaction do |db|
+ if db.get_first_value("select chance from order0 where word0=?1", sym)
+ db.execute("update order0 set chance=chance+1 where word0=?1", sym)
+ else
+ db.execute("insert into order0 (word0, chance) values (?1, 1)", sym)
+ end
+ end
+ # puts @db.execute("select * from order0 where word0=?1", sym).inspect
end
def add_multi(array)
raise "Too many words in new data" if array.size > @max_order + 1
- add_before(array.butfirst, array.first)
- add_after(array.butlast, array.last)
+ table = order(array.length-1).quoted
+ cols, wds = grouped_selector(array, :chance => 1)
+ where = where_selector(array)
+ @db.transaction do |db|
+ if db.get_first_value("select chance from #{table} " + where)
+ db.execute("update #{table} set chance=chance+1 " + where)
+ else
+ db.execute("insert into #{table} #{cols} values #{wds}")
+ end
+ end
+ # puts @db.execute("select * from #{table} " + where).inspect
end
def add(*data)
+ # puts "adding #{data.inspect}"
if data.size == 1
add_one(data.first)
else
end
def simple_learn(text)
- syms = text.scan(WNW).map { |w| w.intern }
+ return if text.empty?
+ syms = text.scan(WNW)
syms.unshift(nil)
syms.push(nil)
- syms.size.times { |i|
- ([@max_order, syms.size-i].min+1).times { |ord|
- v = syms[i, ord+1]
+ syms.each_index do |i|
+ max_len = [@max_order+1, syms.size - i].min
+ 1.upto max_len do |len|
+ v = syms[i, len]
# puts "Learning #{v.inspect}"
add(*v)
# pp @mkv
- }
- }
+ end
+ end
end
def learn(text, o={})
if lc
simple_learn(text.downcase)
end
-
- pp @mkv if defined? pp
end
def raw_next(syms, o={})
max_order = o.fetch(:max_order, @max_order)
- ar = syms.last([max_order, syms.size].min)
- if @mkv.key?(ar)
- @mkv[ar][:next].random
+ if max_order > syms.length
+ max_order = syms.length
+ end
+ ar = syms.last(max_order)
+ # puts "raw_next #{max_order} #{ar.inspect}"
+
+ table = order(max_order)
+ sel = word(max_order)
+ where = where_selector(ar)
+
+ choices = @db.execute("select #{sel},chance from #{table} #{where}")
+ unless choices.empty?
+ sum = @db.get_first_value("select sum(chance) from #{table} #{where}").to_i
+ return choices.pick_with_chance(sum)
else
raw_next(ar.butfirst, o)
end
end
def next(text, o={})
- syms = text.scan(WNW).map { |w| w.intern }
+ syms = text.scan(WNW)
raw_next(syms, o)
end
def raw_prev(syms, o={})
max_order = o.fetch(:max_order, @max_order)
- ar = syms.first([max_order, syms.size].min)
- if @mkv.key?(ar)
- @mkv[ar][:prev].random
+ if max_order > syms.length
+ max_order = syms.length
+ end
+ ar = syms.first(max_order)
+ # puts "raw_prev #{max_order} #{ar.inspect}"
+
+ table = order(max_order)
+ sel = word(0)
+ where = where_selector(ar,:offset => 1)
+
+ choices = @db.execute("select #{sel}, chance from #{table} #{where}")
+ unless choices.empty?
+ sum = @db.get_first_value("select sum(chance) from #{table} #{where}").to_i
+ return choices.pick_with_chance(sum)
else
raw_prev(ar.butlast, o)
end
end
def prev(text, o={})
- syms = text.scan(WNW).map { |w| w.intern }
+ syms = text.scan(WNW)
raw_prev(syms, o)
end
def complete_prev(text, o={})
- syms = text.scan(WNW).map { |w| w.intern }
+ syms = text.scan(WNW)
prev = raw_prev(syms, o)
while prev do
syms.unshift(prev)
end
def complete_next(text, o={})
- syms = text.scan(WNW).map { |w| w.intern }
+ syms = text.scan(WNW)
nxt = raw_next(syms, o)
while nxt do
syms.push(nxt)
def complete(text, o={})
txt = text
+ choices = @db.execute("select word0,chance from order0")
+ return String.new if choices.empty?
+ sum = @db.get_first_value("select sum(chance) from order0").to_i
while txt.empty? do
- txt = @mkv[nil].random.to_s
+ txt = choices.pick_with_chance(sum)
end
- syms = txt.scan(WNW).map { |w| w.intern }
+ syms = [txt]
prev = raw_prev(syms, o)
nxt = raw_next(syms, o)
while nxt or prev do
+ # puts syms.inspect, nxt.inspect, prev.inspect
# Keep adding only on the side where we
# didn't come across a nil already
if prev