AlphaZero version 11¶

This AlphaZero version was trained from scratch on 1.000.000 training examples from the StageFourNoMCTS dataset on various board sizes.

import sys
sys.path.append('..')

import numpy as np
import tensorflow as tf
from tensorflow.python import debug as tf_debug

from keras.callbacks import *
from keras.models import *
from keras.layers import *
from keras.optimizers import *
from keras.initializers import *
from keras.utils.np_utils import to_categorical
from keras.utils import plot_model
import keras.backend as K
from keras.regularizers import l2
from keras.engine.topology import Layer

from PIL import Image
from matplotlib.pyplot import imshow
%matplotlib inline
import random
import gc

from LineFilterLayer import LineFilterLayer
from ValueLayer import ValueLayer

modelPath = 'model/alphaZeroV11.h5'

datasetPath = 'StageFour-AlphaZeroV7-noMCTS-1000000-4x3-23:37-31_05_2018.npz'

Using TensorFlow backend.

print(K.image_data_format()) 
# expected output: channels_last

channels_last

def dotsAndBoxesToCategorical(inputData):
    inp = np.copy(inputData)
    inp[inp == 255] = 1 # Line - comes first so that target data only has two categories
    inp[inp == 65] = 2 # Box A
    inp[inp == 150] = 3 # Box B
    inp[inp == 215] = 4 # Dot
    cat = to_categorical(inp)
    newShape = inp.shape + (cat.shape[-1],)
    return cat.reshape(newShape)

def imgSizeToBoxes(x):
    return (x-3)/2

def lineFilterMatrixNP(imgWidth,imgHeight):
    boxWidth = imgSizeToBoxes(imgWidth)
    boxHeight = imgSizeToBoxes(imgHeight)
    linesCnt = 2*boxWidth*boxHeight+boxWidth+boxHeight
    mat = np.zeros((imgHeight, imgWidth), dtype=np.bool)
    for idx in range(linesCnt):
        y1 = idx / ((2*boxWidth) + 1)
        if idx % ((2*boxWidth) + 1) < boxWidth:
            # horizontal line
            x1 = idx % ((2*boxWidth) + 1)
            x2 = x1 + 1
            y2 = y1
        else:
            # vertical line
            x1 = idx % ((2*boxWidth) + 1) - boxWidth
            x2 = x1
            y2 = y1 + 1
        px = x2 * 2 + y2 - y1
        py = y2 * 2 + x2 - x1
        mat[py,px] = 1
    return mat

def loadPVDataset(datasetPath):
    rawDataset = np.load(datasetPath)
    
    x_input = rawDataset['input']
    y_policy = rawDataset['policy']
    y_value = rawDataset['value']
    
    x_input = dotsAndBoxesToCategorical(x_input)
    y_policy = y_policy[:,lineFilterMatrixNP(y_policy.shape[-1], y_policy.shape[-2])]
    y_policy /= 255
    
    return (x_input, y_policy, y_value)

np.set_printoptions(precision=2)
(x_input, y_policy, y_value) = loadPVDataset(datasetPath)

print(x_input.shape)
print(y_policy.shape)
print(y_value.shape)
print("input:")
print(x_input[0,::,::,1])
print("policy:")
print(y_policy[0])
print('value:')
print(y_value[0])

(1000000, 9, 11, 5)
(1000000, 31)
(1000000, 1)
input:
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 1. 0. 1. 0. 0.]
 [0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0.]
 [0. 0. 0. 0. 1. 0. 1. 0. 1. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 1. 0. 1. 0. 1. 0. 0.]
 [0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0.]
 [0. 0. 1. 0. 1. 0. 1. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
policy:
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
value:
[-0.17]

kernelSize = (5,5)
filterCnt = 128
l2reg = 1e-4
resBlockCnt = 8
imgWidth = x_input.shape[-2]
imgHeight = x_input.shape[-3]

def build_residual_block(x, index):
        in_x = x
        res_name = "res"+str(index)
        x = Conv2D(filters=filterCnt, kernel_size=kernelSize, padding="same",
                   data_format="channels_last", kernel_regularizer=l2(l2reg), 
                   name=res_name+"_conv1_"+str(filterCnt))(x)
        x = BatchNormalization(name=res_name+"_batchnorm1")(x)
        x = Activation("relu",name=res_name+"_relu1")(x)
        x = Conv2D(filters=filterCnt, kernel_size=kernelSize, padding="same",
                   data_format="channels_last", kernel_regularizer=l2(l2reg), 
                   name=res_name+"_conv2-"+str(filterCnt))(x)
        x = BatchNormalization(name="res"+str(index)+"_batchnorm2")(x)
        x = Add(name=res_name+"_add")([in_x, x])
        x = Activation("relu", name=res_name+"_relu2")(x)
        return x


img_input = Input(shape=(None,None,5,))
x = Conv2D(filterCnt, kernelSize, padding='same', kernel_regularizer=l2(l2reg), name="input_conv")(img_input)
x = Activation("relu", name="input_relu")(x)
x = BatchNormalization()(x)

for i in range(resBlockCnt):
    x = build_residual_block(x, i+1)

res_out = x

# policy output
x = Conv2D(1, kernelSize, padding='same', kernel_regularizer=l2(l2reg), name="policy_conv")(x)
x = LineFilterLayer(imgWidth, imgHeight)(x)
x = Activation("softmax", name="policy")(x)
policy_output = x

# value output
x = Conv2D(1, kernelSize, padding='same', kernel_regularizer=l2(l2reg), name="value_conv")(res_out)
#x = Flatten()(x)
#x = Dense(1, trainable=False, kernel_initializer=Constant(1.0/(imgWidth*imgHeight)), use_bias=False, name="value_dense")(x)
x = ValueLayer(imgWidth, imgHeight)(x)
x = Activation("tanh", name="value")(x)
value_output = x
    
model = Model(inputs=img_input, outputs=[policy_output, value_output])
model.compile(optimizer='adam', loss=['categorical_crossentropy', 'mean_squared_error'])

#for layer in model.layers:
#    print("{:30}: {}".format(layer.name, layer.output_shape))
#    if layer.name is 'value_dense':
#        print(layer.kernel)
    
model.summary()

LineFilterLayer with image size 11 x 9
ValueLayer with image size 11 x 9
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_1 (InputLayer)            (None, None, None, 5 0                                            
__________________________________________________________________________________________________
input_conv (Conv2D)             (None, None, None, 1 16128       input_1[0][0]                    
__________________________________________________________________________________________________
input_relu (Activation)         (None, None, None, 1 0           input_conv[0][0]                 
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, None, None, 1 512         input_relu[0][0]                 
__________________________________________________________________________________________________
res1_conv1_128 (Conv2D)         (None, None, None, 1 409728      batch_normalization_1[0][0]      
__________________________________________________________________________________________________
res1_batchnorm1 (BatchNormaliza (None, None, None, 1 512         res1_conv1_128[0][0]             
__________________________________________________________________________________________________
res1_relu1 (Activation)         (None, None, None, 1 0           res1_batchnorm1[0][0]            
__________________________________________________________________________________________________
res1_conv2-128 (Conv2D)         (None, None, None, 1 409728      res1_relu1[0][0]                 
__________________________________________________________________________________________________
res1_batchnorm2 (BatchNormaliza (None, None, None, 1 512         res1_conv2-128[0][0]             
__________________________________________________________________________________________________
res1_add (Add)                  (None, None, None, 1 0           batch_normalization_1[0][0]      
                                                                 res1_batchnorm2[0][0]            
__________________________________________________________________________________________________
res1_relu2 (Activation)         (None, None, None, 1 0           res1_add[0][0]                   
__________________________________________________________________________________________________
res2_conv1_128 (Conv2D)         (None, None, None, 1 409728      res1_relu2[0][0]                 
__________________________________________________________________________________________________
res2_batchnorm1 (BatchNormaliza (None, None, None, 1 512         res2_conv1_128[0][0]             
__________________________________________________________________________________________________
res2_relu1 (Activation)         (None, None, None, 1 0           res2_batchnorm1[0][0]            
__________________________________________________________________________________________________
res2_conv2-128 (Conv2D)         (None, None, None, 1 409728      res2_relu1[0][0]                 
__________________________________________________________________________________________________
res2_batchnorm2 (BatchNormaliza (None, None, None, 1 512         res2_conv2-128[0][0]             
__________________________________________________________________________________________________
res2_add (Add)                  (None, None, None, 1 0           res1_relu2[0][0]                 
                                                                 res2_batchnorm2[0][0]            
__________________________________________________________________________________________________
res2_relu2 (Activation)         (None, None, None, 1 0           res2_add[0][0]                   
__________________________________________________________________________________________________
res3_conv1_128 (Conv2D)         (None, None, None, 1 409728      res2_relu2[0][0]                 
__________________________________________________________________________________________________
res3_batchnorm1 (BatchNormaliza (None, None, None, 1 512         res3_conv1_128[0][0]             
__________________________________________________________________________________________________
res3_relu1 (Activation)         (None, None, None, 1 0           res3_batchnorm1[0][0]            
__________________________________________________________________________________________________
res3_conv2-128 (Conv2D)         (None, None, None, 1 409728      res3_relu1[0][0]                 
__________________________________________________________________________________________________
res3_batchnorm2 (BatchNormaliza (None, None, None, 1 512         res3_conv2-128[0][0]             
__________________________________________________________________________________________________
res3_add (Add)                  (None, None, None, 1 0           res2_relu2[0][0]                 
                                                                 res3_batchnorm2[0][0]            
__________________________________________________________________________________________________
res3_relu2 (Activation)         (None, None, None, 1 0           res3_add[0][0]                   
__________________________________________________________________________________________________
res4_conv1_128 (Conv2D)         (None, None, None, 1 409728      res3_relu2[0][0]                 
__________________________________________________________________________________________________
res4_batchnorm1 (BatchNormaliza (None, None, None, 1 512         res4_conv1_128[0][0]             
__________________________________________________________________________________________________
res4_relu1 (Activation)         (None, None, None, 1 0           res4_batchnorm1[0][0]            
__________________________________________________________________________________________________
res4_conv2-128 (Conv2D)         (None, None, None, 1 409728      res4_relu1[0][0]                 
__________________________________________________________________________________________________
res4_batchnorm2 (BatchNormaliza (None, None, None, 1 512         res4_conv2-128[0][0]             
__________________________________________________________________________________________________
res4_add (Add)                  (None, None, None, 1 0           res3_relu2[0][0]                 
                                                                 res4_batchnorm2[0][0]            
__________________________________________________________________________________________________
res4_relu2 (Activation)         (None, None, None, 1 0           res4_add[0][0]                   
__________________________________________________________________________________________________
res5_conv1_128 (Conv2D)         (None, None, None, 1 409728      res4_relu2[0][0]                 
__________________________________________________________________________________________________
res5_batchnorm1 (BatchNormaliza (None, None, None, 1 512         res5_conv1_128[0][0]             
__________________________________________________________________________________________________
res5_relu1 (Activation)         (None, None, None, 1 0           res5_batchnorm1[0][0]            
__________________________________________________________________________________________________
res5_conv2-128 (Conv2D)         (None, None, None, 1 409728      res5_relu1[0][0]                 
__________________________________________________________________________________________________
res5_batchnorm2 (BatchNormaliza (None, None, None, 1 512         res5_conv2-128[0][0]             
__________________________________________________________________________________________________
res5_add (Add)                  (None, None, None, 1 0           res4_relu2[0][0]                 
                                                                 res5_batchnorm2[0][0]            
__________________________________________________________________________________________________
res5_relu2 (Activation)         (None, None, None, 1 0           res5_add[0][0]                   
__________________________________________________________________________________________________
res6_conv1_128 (Conv2D)         (None, None, None, 1 409728      res5_relu2[0][0]                 
__________________________________________________________________________________________________
res6_batchnorm1 (BatchNormaliza (None, None, None, 1 512         res6_conv1_128[0][0]             
__________________________________________________________________________________________________
res6_relu1 (Activation)         (None, None, None, 1 0           res6_batchnorm1[0][0]            
__________________________________________________________________________________________________
res6_conv2-128 (Conv2D)         (None, None, None, 1 409728      res6_relu1[0][0]                 
__________________________________________________________________________________________________
res6_batchnorm2 (BatchNormaliza (None, None, None, 1 512         res6_conv2-128[0][0]             
__________________________________________________________________________________________________
res6_add (Add)                  (None, None, None, 1 0           res5_relu2[0][0]                 
                                                                 res6_batchnorm2[0][0]            
__________________________________________________________________________________________________
res6_relu2 (Activation)         (None, None, None, 1 0           res6_add[0][0]                   
__________________________________________________________________________________________________
res7_conv1_128 (Conv2D)         (None, None, None, 1 409728      res6_relu2[0][0]                 
__________________________________________________________________________________________________
res7_batchnorm1 (BatchNormaliza (None, None, None, 1 512         res7_conv1_128[0][0]             
__________________________________________________________________________________________________
res7_relu1 (Activation)         (None, None, None, 1 0           res7_batchnorm1[0][0]            
__________________________________________________________________________________________________
res7_conv2-128 (Conv2D)         (None, None, None, 1 409728      res7_relu1[0][0]                 
__________________________________________________________________________________________________
res7_batchnorm2 (BatchNormaliza (None, None, None, 1 512         res7_conv2-128[0][0]             
__________________________________________________________________________________________________
res7_add (Add)                  (None, None, None, 1 0           res6_relu2[0][0]                 
                                                                 res7_batchnorm2[0][0]            
__________________________________________________________________________________________________
res7_relu2 (Activation)         (None, None, None, 1 0           res7_add[0][0]                   
__________________________________________________________________________________________________
res8_conv1_128 (Conv2D)         (None, None, None, 1 409728      res7_relu2[0][0]                 
__________________________________________________________________________________________________
res8_batchnorm1 (BatchNormaliza (None, None, None, 1 512         res8_conv1_128[0][0]             
__________________________________________________________________________________________________
res8_relu1 (Activation)         (None, None, None, 1 0           res8_batchnorm1[0][0]            
__________________________________________________________________________________________________
res8_conv2-128 (Conv2D)         (None, None, None, 1 409728      res8_relu1[0][0]                 
__________________________________________________________________________________________________
res8_batchnorm2 (BatchNormaliza (None, None, None, 1 512         res8_conv2-128[0][0]             
__________________________________________________________________________________________________
res8_add (Add)                  (None, None, None, 1 0           res7_relu2[0][0]                 
                                                                 res8_batchnorm2[0][0]            
__________________________________________________________________________________________________
res8_relu2 (Activation)         (None, None, None, 1 0           res8_add[0][0]                   
__________________________________________________________________________________________________
policy_conv (Conv2D)            (None, None, None, 1 3201        res8_relu2[0][0]                 
__________________________________________________________________________________________________
value_conv (Conv2D)             (None, None, None, 1 3201        res8_relu2[0][0]                 
__________________________________________________________________________________________________
line_filter_layer_1 (LineFilter (None, None)         0           policy_conv[0][0]                
__________________________________________________________________________________________________
value_layer_1 (ValueLayer)      (None, 1)            0           value_conv[0][0]                 
__________________________________________________________________________________________________
policy (Activation)             (None, None)         0           line_filter_layer_1[0][0]        
__________________________________________________________________________________________________
value (Activation)              (None, 1)            0           value_layer_1[0][0]              
==================================================================================================
Total params: 6,586,882
Trainable params: 6,582,530
Non-trainable params: 4,352
__________________________________________________________________________________________________

#sess = K.get_session()
#sess = tf_debug.LocalCLIDebugWrapperSession(sess)
#K.set_session(sess)

# Training
callbacks = []

checkpoint = ModelCheckpoint(filepath=modelPath+".checkpoint", save_weights_only=False)
callbacks.append(checkpoint)

progbar = ProgbarLogger()
callbacks.append(progbar)

tensorboard = TensorBoard(log_dir='model/log2', write_grads=True, write_graph=True, write_images=True, histogram_freq=1)
#callbacks.append(tensorboard)

model.fit(x_input, [y_policy, y_value], epochs=2, batch_size=64, callbacks=callbacks, validation_split=0.001)

model.save(modelPath)

Train on 999000 samples, validate on 1000 samples
Epoch 1/2
Epoch 1/2
999000/999000 [==============================] - 4691s 5ms/step - loss: 1.4390 - policy_loss: 0.5548 - value_loss: 0.7304 - val_loss: 0.7435 - val_policy_loss: 0.4760 - val_value_loss: 0.1198
999000/999000 [==============================] - 4693s 5ms/step - loss: 1.4390 - policy_loss: 0.5548 - value_loss: 0.7304 - val_loss: 0.7435 - val_policy_loss: 0.4760 - val_value_loss: 0.1198
Epoch 2/2
Epoch 2/2
107200/999000 [==>...........................] - ETA: 1:13:04 - loss: 0.7889 - policy_loss: 0.5122 - value_loss: 0.1143

---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-9-0875ac397bc6> in <module>()
     15 #callbacks.append(tensorboard)
     16 
---> 17 model.fit(x_input, [y_policy, y_value], epochs=2, batch_size=64, callbacks=callbacks, validation_split=0.001)
     18 
     19 model.save(modelPath)

/usr/lib/python2.7/site-packages/keras/engine/training.pyc in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
   1703                               initial_epoch=initial_epoch,
   1704                               steps_per_epoch=steps_per_epoch,
-> 1705                               validation_steps=validation_steps)
   1706 
   1707     def evaluate(self, x=None, y=None,

/usr/lib/python2.7/site-packages/keras/engine/training.pyc in _fit_loop(self, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)
   1233                         ins_batch[i] = ins_batch[i].toarray()
   1234 
-> 1235                     outs = f(ins_batch)
   1236                     if not isinstance(outs, list):
   1237                         outs = [outs]

/usr/lib/python2.7/site-packages/keras/backend/tensorflow_backend.pyc in __call__(self, inputs)
   2476         session = get_session()
   2477         updated = session.run(fetches=fetches, feed_dict=feed_dict,
-> 2478                               **self.session_kwargs)
   2479         return updated[:len(self.outputs)]
   2480 

/usr/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in run(self, fetches, feed_dict, options, run_metadata)
    898     try:
    899       result = self._run(None, fetches, feed_dict, options_ptr,
--> 900                          run_metadata_ptr)
    901       if run_metadata:
    902         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/usr/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run(self, handle, fetches, feed_dict, options, run_metadata)
   1133     if final_fetches or final_targets or (handle and feed_dict_tensor):
   1134       results = self._do_run(handle, final_targets, final_fetches,
-> 1135                              feed_dict_tensor, options, run_metadata)
   1136     else:
   1137       results = []

/usr/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1314     if handle is None:
   1315       return self._do_call(_run_fn, feeds, fetches, targets, options,
-> 1316                            run_metadata)
   1317     else:
   1318       return self._do_call(_prun_fn, handle, feeds, fetches)

/usr/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_call(self, fn, *args)
   1320   def _do_call(self, fn, *args):
   1321     try:
-> 1322       return fn(*args)
   1323     except errors.OpError as e:
   1324       message = compat.as_text(e.message)

/usr/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run_fn(feed_dict, fetch_list, target_list, options, run_metadata)
   1305       self._extend_graph()
   1306       return self._call_tf_sessionrun(
-> 1307           options, feed_dict, fetch_list, target_list, run_metadata)
   1308 
   1309     def _prun_fn(handle, feed_dict, fetch_list):

/usr/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _call_tf_sessionrun(self, options, feed_dict, fetch_list, target_list, run_metadata)
   1407       return tf_session.TF_SessionRun_wrapper(
   1408           self._session, options, feed_dict, fetch_list, target_list,
-> 1409           run_metadata)
   1410     else:
   1411       with errors.raise_exception_on_not_ok_status() as status:

KeyboardInterrupt:

model.save(modelPath)

def linesToDotsAndBoxesImage(lines, imgWidth, imgHeight):
    boxWidth = imgSizeToBoxes(imgWidth)
    boxHeight = imgSizeToBoxes(imgHeight)
    linesCnt = 2*boxWidth*boxHeight+boxWidth+boxHeight
    mat = np.zeros((imgHeight, imgWidth), dtype=lines.dtype)
    for idx in range(linesCnt):
        y1 = idx / ((2*boxWidth) + 1)
        if idx % ((2*boxWidth) + 1) < boxWidth:
            # horizontal line
            x1 = idx % ((2*boxWidth) + 1)
            x2 = x1 + 1
            y2 = y1
        else:
            # vertical line
            x1 = idx % ((2*boxWidth) + 1) - boxWidth
            x2 = x1
            y2 = y1 + 1
        px = x2 * 2 + y2 - y1
        py = y2 * 2 + x2 - x1
        mat[py,px] = lines[idx]
    return mat

example = random.randrange(x_input.shape[0])
print("example: "+str(example))

input_data = x_input[example:example+1]

(prediction_lines, prediction_value) = model.predict(input_data)
prediction_lines_print = prediction_lines * 100
print(prediction_lines_print.astype(np.uint8))
print(np.sum(prediction_lines))
prediction = linesToDotsAndBoxesImage(prediction_lines[0], imgWidth, imgHeight)

# print input data
input_data_print = x_input[example,:,:,1] 
input_data_print = input_data_print.astype(np.uint8)
print("input "+str(input_data_print.shape)+": ")
print(input_data_print)

# generate greyscale image data from input data
planes = [1,2,3,4]
input_imgdata = np.sum(x_input[example,:,:,1:], axis=-1) * 255
input_imgdata = input_imgdata.astype(np.uint8)

# print prediction
prediction_data_print = prediction * 100 
prediction_data_print = prediction_data_print.astype(np.uint8)
print("prediction policy: ")
print(prediction_data_print)

print("prediction value: ")
print(prediction_value)

print("target value: ")
print(y_value[example])

# generate greyscale image data from prediction data
prediction_imgdata = prediction * 255
prediction_imgdata = prediction_imgdata.astype(np.uint8)

# generate greyscale image of target data
target_imgdata = linesToDotsAndBoxesImage(y_policy[example], imgWidth, imgHeight) * 255

# merge image data in color channels
merged_imgdata = np.stack([input_imgdata, prediction_imgdata, target_imgdata], axis=2)

#create image
img = Image.fromarray(merged_imgdata, 'RGB')
img = img.resize(size=(img.size[0]*10, img.size[1]*10))

img

example: 408113
[[ 0  0  0 33  0  0  0  0 19  0  0  0 15  0  0 20  9  0  0  0  0  0  0  0
   0  0  0  0  0  0  0]]
1.0
input (9, 11): 
[[0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0]
 [0 1 0 1 0 1 0 1 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 1 0]
 [0 0 1 0 1 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 1 0 1 0]
 [0 0 0 0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0]]
prediction policy: 
[[ 0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0 33  0  0]
 [ 0  0  0  0  0  0  0  0  0 19  0]
 [ 0  0  0  0  0  0  0  0 15  0  0]
 [ 0  0  0  0  0 20  0  9  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0]]
prediction value: 
[[0.1]]
target value: 
[0.5]