#!/usr/bin/env ruby require 'othello' require 'strategy' require 'trainer' BOARD_SIZE = 6 class Learning LEARNING_UNIT = 100 LEARNING_UNITS_PLANNING = 100 LOG_FILE = 'learning.dat' RAW_DATA_PREFIX = "learning_#{LEARNING_UNIT}" STRATEGY_THRESHOLD = 0.9 # Greedy vs. Boltzmann DISCOUNT_ALPHA = 0.91 @total_learning_unit def initialize begin open(LOG_FILE, 'r'){|io| temp = Marshal::load(io.read) @total_learning_unit = temp[0] } rescue @total_learning_unit = 0 end end def dump open(LOG_FILE, 'w'){|io| io << Marshal::dump([@total_learning_unit]) } end def execute bayes = Evaluator::Bayes::new('learned_bayes.dat') boltzmann = Strategy::Boltzmann::new greedy = Strategy::Greedy::new(bayes) boltzmann_greedy = Strategy::BiStrategy::new(boltzmann, greedy, 0.3) while true puts "Unit #{@total_learning_unit} started!" strategies = [] if @total_learning_unit < LEARNING_UNITS_PLANNING then threshold = 1 - ((@total_learning_unit.to_f / LEARNING_UNITS_PLANNING - 1) ** 2) else threshold = 1 end if threshold > STRATEGY_THRESHOLD then threshold = STRATEGY_THRESHOLD end for i in 0...LEARNING_UNIT strategies << (rand < threshold ? (rand < threshold ? greedy : boltzmann_greedy) : boltzmann) end logs = [] for i in (0...LEARNING_UNIT) Trainer::new(BOARD_SIZE, true, strategies[i]){|log, result| logs << [log, result] rewards = [1] for j in 0...log.size next_reward = rewards[-1] * DISCOUNT_ALPHA if next_reward < 0.1 then next_reward = 0.1 end rewards << next_reward end rewards.reverse! if result[0] > result[1] then #黒が勝ち log.each_with_index{|state, j| greedy.evaluator.train_black_win(state, rewards[j]) } elsif result[0] < result[1] then #白が勝ち log.each_with_index{|state, j| greedy.evaluator.train_white_win(state, rewards[j]) } else #引き分け log.each_with_index{|state, j| greedy.evaluator.train_black_win(state, rewards[j] * 0.5) greedy.evaluator.train_white_win(state, rewards[j] * 0.5) } end } puts "END: #{i}" puts end puts "Raw data dumped, unit number = #{@total_learning_unit}" open("#{RAW_DATA_PREFIX}_#{sprintf('%03d', @total_learning_unit)}.dat", 'w'){|file| file << Marshal::dump(logs)} @total_learning_unit += 1 dump end end end if $0 == __FILE__ then Learning::new.execute end