#!/usr/bin/env ruby

require 'othello'
require 'strategy'
require 'trainer'

BOARD_SIZE = 6

class Learning
	
	LEARNING_UNIT = 100
	LEARNING_UNITS_PLANNING = 100

	LOG_FILE = 'learning.dat'	
	RAW_DATA_PREFIX = "learning_#{LEARNING_UNIT}"
	
	STRATEGY_THRESHOLD = 0.9	# Greedy vs. Boltzmann
	
	DISCOUNT_ALPHA = 0.91

	@total_learning_unit

	def initialize
		begin
			open(LOG_FILE, 'r'){|io|
				temp = Marshal::load(io.read)
				@total_learning_unit = temp[0]
			}
		rescue
			@total_learning_unit = 0
		end
	end
	
	def dump
		open(LOG_FILE, 'w'){|io|
			io << Marshal::dump([@total_learning_unit])
		}
	end
	
	def execute
		bayes = Evaluator::Bayes::new('learned_bayes.dat')
	
		boltzmann = Strategy::Boltzmann::new
		greedy = Strategy::Greedy::new(bayes)
		boltzmann_greedy = Strategy::BiStrategy::new(boltzmann, greedy, 0.3)
		
		while true
			puts "Unit #{@total_learning_unit} started!"
			
			strategies = []
			
			if @total_learning_unit < LEARNING_UNITS_PLANNING then
				threshold = 1 - ((@total_learning_unit.to_f / LEARNING_UNITS_PLANNING - 1) ** 2)
			else
				threshold = 1
			end
			
			if threshold > STRATEGY_THRESHOLD then threshold = STRATEGY_THRESHOLD end
			
			for i in 0...LEARNING_UNIT
				strategies << (rand < threshold ? (rand < threshold ? greedy : boltzmann_greedy) : boltzmann)
			end
			
			logs = []
			for i in (0...LEARNING_UNIT)
				Trainer::new(BOARD_SIZE, true, strategies[i]){|log, result|
					logs << [log, result]
					
					rewards = [1]
					for j in 0...log.size
						next_reward = rewards[-1] * DISCOUNT_ALPHA
						if next_reward < 0.1 then next_reward = 0.1 end
						rewards << next_reward
					end
					rewards.reverse!
					
					if result[0] > result[1] then
						#黒が勝ち
						log.each_with_index{|state, j|
							greedy.evaluator.train_black_win(state, rewards[j])
						}
					elsif result[0] < result[1] then
						#白が勝ち
						log.each_with_index{|state, j|
							greedy.evaluator.train_white_win(state, rewards[j])
						}
					else
						#引き分け
						log.each_with_index{|state, j|
							greedy.evaluator.train_black_win(state, rewards[j] * 0.5)
							greedy.evaluator.train_white_win(state, rewards[j] * 0.5)
						}
					end
				}
				puts "END: #{i}"
				puts
			end
			
			puts "Raw data dumped, unit number = #{@total_learning_unit}"
			open("#{RAW_DATA_PREFIX}_#{sprintf('%03d', @total_learning_unit)}.dat", 'w'){|file| file << Marshal::dump(logs)}
			@total_learning_unit += 1
			dump
		end
	end
end

if $0 == __FILE__ then
	Learning::new.execute
end