This AlphaZero version uses the custom and flexible ValueLayer
to create the value output and the LineFilterLayer
for the policy output.
The model is based on V7, which was trained from scratch on 1.000.000 training examples from the StageThree dataset on a 5x4 board. This model received further training on 2.000.000 StageThree 4x3 samples.
import sys
sys.path.append('..')
import numpy as np
import tensorflow as tf
from tensorflow.python import debug as tf_debug
from keras.callbacks import *
from keras.models import *
from keras.layers import *
from keras.optimizers import *
from keras.initializers import *
from keras.utils.np_utils import to_categorical
from keras.utils import plot_model
import keras.backend as K
from keras.regularizers import l2
from keras.engine.topology import Layer
from PIL import Image
from matplotlib.pyplot import imshow
%matplotlib inline
import random
import gc
from LineFilterLayer import LineFilterLayer
from ValueLayer import ValueLayer
baseModelPath = 'model/alphaZeroV7.h5'
modelPath = 'model/alphaZeroV8.h5'
datasetPath = 'StageThree-2000000-4x3-00:15-21_05_2018.npz'
print(K.image_data_format())
# expected output: channels_last
def dotsAndBoxesToCategorical(inputData):
inp = np.copy(inputData)
inp[inp == 255] = 1 # Line - comes first so that target data only has two categories
inp[inp == 65] = 2 # Box A
inp[inp == 150] = 3 # Box B
inp[inp == 215] = 4 # Dot
cat = to_categorical(inp)
newShape = inp.shape + (cat.shape[-1],)
return cat.reshape(newShape)
def imgSizeToBoxes(x):
return (x-3)/2
def lineFilterMatrixNP(imgWidth,imgHeight):
boxWidth = imgSizeToBoxes(imgWidth)
boxHeight = imgSizeToBoxes(imgHeight)
linesCnt = 2*boxWidth*boxHeight+boxWidth+boxHeight
mat = np.zeros((imgHeight, imgWidth), dtype=np.bool)
for idx in range(linesCnt):
y1 = idx / ((2*boxWidth) + 1)
if idx % ((2*boxWidth) + 1) < boxWidth:
# horizontal line
x1 = idx % ((2*boxWidth) + 1)
x2 = x1 + 1
y2 = y1
else:
# vertical line
x1 = idx % ((2*boxWidth) + 1) - boxWidth
x2 = x1
y2 = y1 + 1
px = x2 * 2 + y2 - y1
py = y2 * 2 + x2 - x1
mat[py,px] = 1
return mat
def loadPVDataset(datasetPath):
rawDataset = np.load(datasetPath)
x_input = rawDataset['input']
y_policy = rawDataset['policy']
y_value = rawDataset['value']
x_input = dotsAndBoxesToCategorical(x_input)
y_policy = y_policy[:,lineFilterMatrixNP(y_policy.shape[-1], y_policy.shape[-2])]
y_policy /= 255
return (x_input, y_policy, y_value)
np.set_printoptions(precision=2)
(x_input, y_policy, y_value) = loadPVDataset(datasetPath)
print(x_input.shape)
print(y_policy.shape)
print(y_value.shape)
print("input:")
print(x_input[0,::,::,1])
print("policy:")
print(y_policy[0])
print('value:')
print(y_value[0])
width = x_input.shape[-2]
height = x_input.shape[-3]
# LineFilterLayer has to be set before loading the model
LineFilterLayer.imgWidth = width
LineFilterLayer.imgHeight = height
# ValueLayer has to be set before loading the model
ValueLayer.imgWidth = width
ValueLayer.imgHeight = height
model = load_model(baseModelPath,
custom_objects={'LineFilterLayer':LineFilterLayer,
'ValueLayer':ValueLayer})
model.summary()
#sess = K.get_session()
#sess = tf_debug.LocalCLIDebugWrapperSession(sess)
#K.set_session(sess)
# Training
callbacks = []
checkpoint = ModelCheckpoint(filepath=modelPath+".checkpoint", save_weights_only=False)
callbacks.append(checkpoint)
progbar = ProgbarLogger()
callbacks.append(progbar)
tensorboard = TensorBoard(log_dir='model/log2', write_grads=True, write_graph=True, write_images=True, histogram_freq=1)
#callbacks.append(tensorboard)
model.fit(x_input, [y_policy, y_value], epochs=8, batch_size=128, callbacks=callbacks, validation_split=0.001)
model.save(modelPath)
def linesToDotsAndBoxesImage(lines, imgWidth, imgHeight):
boxWidth = imgSizeToBoxes(imgWidth)
boxHeight = imgSizeToBoxes(imgHeight)
linesCnt = 2*boxWidth*boxHeight+boxWidth+boxHeight
mat = np.zeros((imgHeight, imgWidth), dtype=lines.dtype)
for idx in range(linesCnt):
y1 = idx / ((2*boxWidth) + 1)
if idx % ((2*boxWidth) + 1) < boxWidth:
# horizontal line
x1 = idx % ((2*boxWidth) + 1)
x2 = x1 + 1
y2 = y1
else:
# vertical line
x1 = idx % ((2*boxWidth) + 1) - boxWidth
x2 = x1
y2 = y1 + 1
px = x2 * 2 + y2 - y1
py = y2 * 2 + x2 - x1
mat[py,px] = lines[idx]
return mat
example = random.randrange(x_input.shape[0])
print("example: "+str(example))
imgWidth = x_input.shape[-2]
imgHeight = x_input.shape[-3]
input_data = x_input[example:example+1]
(prediction_lines, prediction_value) = model.predict(input_data)
prediction_lines_print = prediction_lines * 100
print(prediction_lines_print.astype(np.uint8))
print(np.sum(prediction_lines))
prediction = linesToDotsAndBoxesImage(prediction_lines[0], imgWidth, imgHeight)
# print input data
input_data_print = x_input[example,:,:,1]
input_data_print = input_data_print.astype(np.uint8)
print("input "+str(input_data_print.shape)+": ")
print(input_data_print)
# generate greyscale image data from input data
planes = [1,2,3,4]
input_imgdata = np.sum(x_input[example,:,:,1:], axis=-1) * 255
input_imgdata = input_imgdata.astype(np.uint8)
# print prediction
prediction_data_print = prediction * 100
prediction_data_print = prediction_data_print.astype(np.uint8)
print("prediction policy: ")
print(prediction_data_print)
print("prediction value: ")
print(prediction_value)
print("target value: ")
print(y_value[example])
# generate greyscale image data from prediction data
prediction_imgdata = prediction * 255
prediction_imgdata = prediction_imgdata.astype(np.uint8)
# generate greyscale image of target data
target_imgdata = linesToDotsAndBoxesImage(y_policy[example], imgWidth, imgHeight) * 255
# merge image data in color channels
merged_imgdata = np.stack([input_imgdata, prediction_imgdata, target_imgdata], axis=2)
#create image
img = Image.fromarray(merged_imgdata, 'RGB')
img = img.resize(size=(img.size[0]*10, img.size[1]*10))
img