AlphaZero version 5¶

This model was trained from scratch on various StageTwo Datasets for two epochs each.

Take a look at alphaZeroV5.py and alphaZeroV5.training.log for the correct version. It didn't work using this jupyter notebook, because freeing up memory didn't work.

import sys
sys.path.append('..')

import numpy as np
import tensorflow as tf
from tensorflow.python import debug as tf_debug

from keras.callbacks import *
from keras.models import *
from keras.layers import *
from keras.optimizers import *
from keras.utils.np_utils import to_categorical
from keras.utils import plot_model
import keras.backend as K
from keras.regularizers import l2
from keras.engine.topology import Layer

from PIL import Image
from matplotlib.pyplot import imshow
%matplotlib inline
import random
import gc

from LineFilterLayer import LineFilterLayer

modelPath = 'model/alphaZeroV5.h5'

datasetList = [
    'StageTwo-1000-5x4-15:53-09_04_2018.npz',
    'StageTwo-1000000-6x5-23:21-08_04_2018.npz',
    'StageTwo-1000000-6x5-23:41-08_04_2018.npz',
    'StageTwo-1000000-6x5-08:32-09_04_2018.npz',
    'StageTwo-1000000-6x5-08:51-09_04_2018.npz',
    'StageTwo-1000000-6x5-09:10-09_04_2018.npz',
    'StageTwo-1000000-6x5-09:28-09_04_2018.npz',
    'StageTwo-1000000-6x5-09:47-09_04_2018.npz',
    'StageTwo-1000000-6x5-10:06-09_04_2018.npz',
    'StageTwo-1000000-6x5-10:25-09_04_2018.npz',
    'StageTwo-1000000-6x5-10:44-09_04_2018.npz',
    'StageTwo-1000000-6x5-11:03-09_04_2018.npz',
    'StageTwo-1000000-6x5-11:21-09_04_2018.npz',
]

Using TensorFlow backend.

print(K.image_data_format()) 
# expected output: channels_last
assert(K.image_data_format() == 'channels_last' )

channels_last

def dotsAndBoxesToCategorical(inp):
    #inp = np.copy(inputData)
    inp[inp == 255] = 1 # Line - comes first so that target data only has two categories
    inp[inp == 65] = 2 # Box A
    inp[inp == 150] = 3 # Box B
    inp[inp == 215] = 4 # Dot
    oldShape = inp.shape
    inp = to_categorical(inp)
    newShape = oldShape + (inp.shape[-1],)
    return inp.reshape(newShape)

def imgSizeToBoxes(x):
    return (x-3)/2

def lineFilterMatrixNP(imgWidth,imgHeight):
    boxWidth = imgSizeToBoxes(imgWidth)
    boxHeight = imgSizeToBoxes(imgHeight)
    linesCnt = 2*boxWidth*boxHeight+boxWidth+boxHeight
    mat = np.zeros((imgHeight, imgWidth), dtype=np.bool)
    for idx in range(linesCnt):
        y1 = idx / ((2*boxWidth) + 1)
        if idx % ((2*boxWidth) + 1) < boxWidth:
            # horizontal line
            x1 = idx % ((2*boxWidth) + 1)
            x2 = x1 + 1
            y2 = y1
        else:
            # vertical line
            x1 = idx % ((2*boxWidth) + 1) - boxWidth
            x2 = x1
            y2 = y1 + 1
        px = x2 * 2 + y2 - y1
        py = y2 * 2 + x2 - x1
        mat[py,px] = 1
    return mat

#lineFilterMatrixNP(13,11)

def loadDataset(datasetPath):
    rawDataset = np.load(datasetPath)
    
    x_train = rawDataset['x_train']
    y_train = rawDataset['y_train']
    
    x_train = dotsAndBoxesToCategorical(x_train)
    y_train = y_train[:,lineFilterMatrixNP(y_train.shape[-1], y_train.shape[-2])]
    y_train /= 255
    
    return (x_train, y_train)

np.set_printoptions(precision=2)

(x_train, y_train) = loadDataset(datasetList[0])

print(x_train.shape)
print(y_train.shape)

(1000, 11, 13, 5)
(1000, 49)

kernelSize = (5,5)
filterCnt = 64
l2reg = 1e-4
resBlockCnt = 4
inputWidth = x_train.shape[-2]
inputHeight = x_train.shape[-3]

def build_residual_block(x, index):
        in_x = x
        res_name = "res"+str(index)
        x = Conv2D(filters=filterCnt, kernel_size=kernelSize, padding="same",
                   data_format="channels_last", kernel_regularizer=l2(l2reg), 
                   name=res_name+"_conv1_"+str(filterCnt))(x)
        x = BatchNormalization(name=res_name+"_batchnorm1")(x)
        x = Activation("relu",name=res_name+"_relu1")(x)
        x = Conv2D(filters=filterCnt, kernel_size=kernelSize, padding="same",
                   data_format="channels_last", kernel_regularizer=l2(l2reg), 
                   name=res_name+"_conv2-"+str(filterCnt))(x)
        x = BatchNormalization(name="res"+str(index)+"_batchnorm2")(x)
        x = Add(name=res_name+"_add")([in_x, x])
        x = Activation("relu", name=res_name+"_relu2")(x)
        return x


img_input = Input(shape=(None,None,5,))
x = Conv2D(filterCnt, kernelSize, padding='same', kernel_regularizer=l2(l2reg), name="input_conv")(img_input)
x = Activation("relu", name="input_relu")(x)
x = BatchNormalization()(x)

for i in range(resBlockCnt):
    x = build_residual_block(x, i+1)

res_out = x

x = Conv2D(1, kernelSize, padding='same', kernel_regularizer=l2(l2reg), name="output_conv")(x)
x = LineFilterLayer(inputWidth, inputHeight)(x)
x = Activation("softmax", name="output_softmax")(x)
    
model = Model(inputs=img_input, outputs=x)
model.compile(optimizer='adam', loss='categorical_crossentropy')

model.summary()
model.save(modelPath)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_1 (InputLayer)            (None, None, None, 5 0                                            
__________________________________________________________________________________________________
input_conv (Conv2D)             (None, None, None, 6 8064        input_1[0][0]                    
__________________________________________________________________________________________________
input_relu (Activation)         (None, None, None, 6 0           input_conv[0][0]                 
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, None, None, 6 256         input_relu[0][0]                 
__________________________________________________________________________________________________
res1_conv1_64 (Conv2D)          (None, None, None, 6 102464      batch_normalization_1[0][0]      
__________________________________________________________________________________________________
res1_batchnorm1 (BatchNormaliza (None, None, None, 6 256         res1_conv1_64[0][0]              
__________________________________________________________________________________________________
res1_relu1 (Activation)         (None, None, None, 6 0           res1_batchnorm1[0][0]            
__________________________________________________________________________________________________
res1_conv2-64 (Conv2D)          (None, None, None, 6 102464      res1_relu1[0][0]                 
__________________________________________________________________________________________________
res1_batchnorm2 (BatchNormaliza (None, None, None, 6 256         res1_conv2-64[0][0]              
__________________________________________________________________________________________________
res1_add (Add)                  (None, None, None, 6 0           batch_normalization_1[0][0]      
                                                                 res1_batchnorm2[0][0]            
__________________________________________________________________________________________________
res1_relu2 (Activation)         (None, None, None, 6 0           res1_add[0][0]                   
__________________________________________________________________________________________________
res2_conv1_64 (Conv2D)          (None, None, None, 6 102464      res1_relu2[0][0]                 
__________________________________________________________________________________________________
res2_batchnorm1 (BatchNormaliza (None, None, None, 6 256         res2_conv1_64[0][0]              
__________________________________________________________________________________________________
res2_relu1 (Activation)         (None, None, None, 6 0           res2_batchnorm1[0][0]            
__________________________________________________________________________________________________
res2_conv2-64 (Conv2D)          (None, None, None, 6 102464      res2_relu1[0][0]                 
__________________________________________________________________________________________________
res2_batchnorm2 (BatchNormaliza (None, None, None, 6 256         res2_conv2-64[0][0]              
__________________________________________________________________________________________________
res2_add (Add)                  (None, None, None, 6 0           res1_relu2[0][0]                 
                                                                 res2_batchnorm2[0][0]            
__________________________________________________________________________________________________
res2_relu2 (Activation)         (None, None, None, 6 0           res2_add[0][0]                   
__________________________________________________________________________________________________
res3_conv1_64 (Conv2D)          (None, None, None, 6 102464      res2_relu2[0][0]                 
__________________________________________________________________________________________________
res3_batchnorm1 (BatchNormaliza (None, None, None, 6 256         res3_conv1_64[0][0]              
__________________________________________________________________________________________________
res3_relu1 (Activation)         (None, None, None, 6 0           res3_batchnorm1[0][0]            
__________________________________________________________________________________________________
res3_conv2-64 (Conv2D)          (None, None, None, 6 102464      res3_relu1[0][0]                 
__________________________________________________________________________________________________
res3_batchnorm2 (BatchNormaliza (None, None, None, 6 256         res3_conv2-64[0][0]              
__________________________________________________________________________________________________
res3_add (Add)                  (None, None, None, 6 0           res2_relu2[0][0]                 
                                                                 res3_batchnorm2[0][0]            
__________________________________________________________________________________________________
res3_relu2 (Activation)         (None, None, None, 6 0           res3_add[0][0]                   
__________________________________________________________________________________________________
res4_conv1_64 (Conv2D)          (None, None, None, 6 102464      res3_relu2[0][0]                 
__________________________________________________________________________________________________
res4_batchnorm1 (BatchNormaliza (None, None, None, 6 256         res4_conv1_64[0][0]              
__________________________________________________________________________________________________
res4_relu1 (Activation)         (None, None, None, 6 0           res4_batchnorm1[0][0]            
__________________________________________________________________________________________________
res4_conv2-64 (Conv2D)          (None, None, None, 6 102464      res4_relu1[0][0]                 
__________________________________________________________________________________________________
res4_batchnorm2 (BatchNormaliza (None, None, None, 6 256         res4_conv2-64[0][0]              
__________________________________________________________________________________________________
res4_add (Add)                  (None, None, None, 6 0           res3_relu2[0][0]                 
                                                                 res4_batchnorm2[0][0]            
__________________________________________________________________________________________________
res4_relu2 (Activation)         (None, None, None, 6 0           res4_add[0][0]                   
__________________________________________________________________________________________________
output_conv (Conv2D)            (None, None, None, 1 1601        res4_relu2[0][0]                 
__________________________________________________________________________________________________
line_filter_layer_1 (LineFilter (None, None)         0           output_conv[0][0]                
__________________________________________________________________________________________________
output_softmax (Activation)     (None, None)         0           line_filter_layer_1[0][0]        
==================================================================================================
Total params: 831,681
Trainable params: 830,529
Non-trainable params: 1,152
__________________________________________________________________________________________________

#sess = K.get_session()
#sess = tf_debug.LocalCLIDebugWrapperSession(sess)
#K.set_session(sess)

iteration = 1
for datasetPath in datasetList:
    print("cleaning up dataset")
    del x_train
    del y_train
    gc.collect()
    
    print("loading dataset " + datasetPath)
    (x_train, y_train) = loadDataset(datasetPath)
    
    print(x_train.shape)
    print(y_train.shape)
    
    # update the line filter layer to reflect new board size in dataset
    LineFilterLayer.imgWidth = x_train.shape[-2]
    LineFilterLayer.imgHeight = x_train.shape[-3]
    model = load_model(modelPath, custom_objects={'LineFilterLayer':LineFilterLayer}) 

    # Training
    callbacks = []

    checkpoint = ModelCheckpoint(filepath=modelPath+".checkpoint."+str(iteration), save_weights_only=False)
    callbacks.append(checkpoint)

    progbar = ProgbarLogger()
    callbacks.append(progbar)

    #tensorboard = TensorBoard(log_dir='model/log2', write_grads=True, write_graph=True, write_images=True, histogram_freq=1)
    #callbacks.append(tensorboard)

    model.fit(x_train, y_train, epochs=1, batch_size=64, callbacks=callbacks, validation_split=0.001)

    model.save(modelPath)
    
    iteration += 1

cleaning up dataset
loading dataset StageTwo-1000-5x4-15:53-09_04_2018.npz
(1000, 11, 13, 5)
(1000, 49)
LineFilterLayer from_config!
using static image size 13 x 11
Train on 999 samples, validate on 1 samples
Epoch 1/1
Epoch 1/1
999/999 [==============================] - 3s 3ms/step - loss: 4.1161 - val_loss: 3.3467
999/999 [==============================] - 3s 3ms/step - loss: 4.1161 - val_loss: 3.3467
cleaning up dataset
loading dataset StageTwo-1000000-6x5-23:21-08_04_2018.npz
(1000000, 13, 15, 5)
(1000000, 71)
LineFilterLayer from_config!
using static image size 15 x 13
Train on 999000 samples, validate on 1000 samples
Epoch 1/1
Epoch 1/1
999000/999000 [==============================] - 776s 777us/step - loss: 1.0198 - val_loss: 0.9364
999000/999000 [==============================] - 777s 778us/step - loss: 1.0198 - val_loss: 0.9364
cleaning up dataset
loading dataset StageTwo-1000000-6x5-23:41-08_04_2018.npz
(1000000, 13, 15, 5)
(1000000, 71)
LineFilterLayer from_config!
using static image size 15 x 13
Train on 999000 samples, validate on 1000 samples
Epoch 1/1
Epoch 1/1
154112/999000 [===>..........................] - ETA: 11:40 - loss: 0.9172

---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-10-6c1c81c55914> in <module>()
     33     #callbacks.append(tensorboard)
     34 
---> 35     model.fit(x_train, y_train, epochs=1, batch_size=64, callbacks=callbacks, validation_split=0.001)
     36 
     37     model.save(modelPath)

/usr/lib/python2.7/site-packages/keras/engine/training.pyc in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
   1703                               initial_epoch=initial_epoch,
   1704                               steps_per_epoch=steps_per_epoch,
-> 1705                               validation_steps=validation_steps)
   1706 
   1707     def evaluate(self, x=None, y=None,

/usr/lib/python2.7/site-packages/keras/engine/training.pyc in _fit_loop(self, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)
   1233                         ins_batch[i] = ins_batch[i].toarray()
   1234 
-> 1235                     outs = f(ins_batch)
   1236                     if not isinstance(outs, list):
   1237                         outs = [outs]

/usr/lib/python2.7/site-packages/keras/backend/tensorflow_backend.pyc in __call__(self, inputs)
   2476         session = get_session()
   2477         updated = session.run(fetches=fetches, feed_dict=feed_dict,
-> 2478                               **self.session_kwargs)
   2479         return updated[:len(self.outputs)]
   2480 

/usr/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in run(self, fetches, feed_dict, options, run_metadata)
    903     try:
    904       result = self._run(None, fetches, feed_dict, options_ptr,
--> 905                          run_metadata_ptr)
    906       if run_metadata:
    907         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/usr/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run(self, handle, fetches, feed_dict, options, run_metadata)
   1135     if final_fetches or final_targets or (handle and feed_dict_tensor):
   1136       results = self._do_run(handle, final_targets, final_fetches,
-> 1137                              feed_dict_tensor, options, run_metadata)
   1138     else:
   1139       results = []

/usr/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1353     if handle is None:
   1354       return self._do_call(_run_fn, self._session, feeds, fetches, targets,
-> 1355                            options, run_metadata)
   1356     else:
   1357       return self._do_call(_prun_fn, self._session, handle, feeds, fetches)

/usr/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_call(self, fn, *args)
   1359   def _do_call(self, fn, *args):
   1360     try:
-> 1361       return fn(*args)
   1362     except errors.OpError as e:
   1363       message = compat.as_text(e.message)

/usr/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
   1338         else:
   1339           return tf_session.TF_Run(session, options, feed_dict, fetch_list,
-> 1340                                    target_list, status, run_metadata)
   1341 
   1342     def _prun_fn(session, handle, feed_dict, fetch_list):

KeyboardInterrupt:

def linesToDotsAndBoxesImage(lines, imgWidth, imgHeight):
    boxWidth = imgSizeToBoxes(imgWidth)
    boxHeight = imgSizeToBoxes(imgHeight)
    linesCnt = 2*boxWidth*boxHeight+boxWidth+boxHeight
    mat = np.zeros((imgHeight, imgWidth), dtype=lines.dtype)
    for idx in range(linesCnt):
        y1 = idx / ((2*boxWidth) + 1)
        if idx % ((2*boxWidth) + 1) < boxWidth:
            # horizontal line
            x1 = idx % ((2*boxWidth) + 1)
            x2 = x1 + 1
            y2 = y1
        else:
            # vertical line
            x1 = idx % ((2*boxWidth) + 1) - boxWidth
            x2 = x1
            y2 = y1 + 1
        px = x2 * 2 + y2 - y1
        py = y2 * 2 + x2 - x1
        mat[py,px] = lines[idx]
    return mat

example = random.randrange(x_train.shape[0])
print("example: "+str(example))

input_data = x_train[example:example+1]
input_data_cat = x_train_cat[example:example+1]

prediction_lines = model.predict(input_data_cat)
prediction_lines_print = prediction_lines * 100
print(prediction_lines_print.astype(np.uint8))
print(np.sum(prediction_lines))
prediction = linesToDotsAndBoxesImage(prediction_lines[0], x_train.shape[2], x_train.shape[1])

# print input data
input_data_print = x_train[example,:,:] 
input_data_print = input_data_print.astype(np.uint8)
print("input "+str(input_data_print.shape)+": ")
print(input_data_print)

# generate greyscale image data from input data
target_imgdata = x_train[example,:,:] 
target_imgdata = target_imgdata.astype(np.uint8)

# print prediction
prediction_data_print = prediction * 100 
prediction_data_print = prediction_data_print.astype(np.uint8)
print("prediction: ")
print(prediction_data_print)

# generate greyscale image data from prediction data
prediction_imgdata = prediction * 255
prediction_imgdata = prediction_imgdata.astype(np.uint8)

# merge image data in color channels
tmp = np.zeros((prediction.shape[0], prediction.shape[1]), dtype=np.uint8)
merged_imgdata = np.stack([target_imgdata, prediction_imgdata, tmp], axis=2)

#create image
img = Image.fromarray(merged_imgdata, 'RGB')
img = img.resize(size=(img.size[0]*10, img.size[1]*10))

img

example: 806312
[[ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 99  0  0  0  0  0
   0]]
1.0
input (11, 13): 
[[  0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0 215 255 215   0 215 255 215 255 215 255 215   0]
 [  0 255  65 255   0   0   0 255   0   0   0 255   0]
 [  0 215 255 215 255 215   0 215   0 215   0 215   0]
 [  0   0   0 255   0   0   0 255   0 255   0 255   0]
 [  0 215   0 215   0 215 255 215   0 215   0 215   0]
 [  0 255   0   0   0 255   0   0   0 255   0   0   0]
 [  0 215 255 215 255 215   0 215 255 215 255 215   0]
 [  0 255 150 255   0   0   0 255   0   0   0   0   0]
 [  0 215 255 215   0 215 255 215   0 215 255 215   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0]]
prediction: 
[[ 0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0 99  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0]]