[Computer-go] CNN for winrate and territory

Detlef Schmicker ds2 at physik.de
Sun Feb 8 03:08:27 PST 2015


Exactly the one from the cited paper:


The best network had one convolutional layer with 64 7x7
filters, two convolutional layers with 64 5x5 filters, two lay-
ers with 48 5x5 filters, two layers with 32 5x5 filters, and
one fully connected layer.


I use caffe and the definition of the training network is:

name: "LogReg"
layers {
   name: "mnist"
   type: DATA
   top: "data_orig"
   top: "label"
   data_param {
     source: "train_result_leveldb/"
     batch_size: 256
   }
   include: { phase: TRAIN }
}
layers {
   name: "mnist"
   type: DATA
   top: "data_orig"
   top: "label"
   data_param {
     source: "test_result_leveldb/"
     batch_size: 256
   }
   include: { phase: TEST }
}

layers {
   name: "slice"
   type: SLICE
   bottom: "data_orig"
   top: "data"
   top: "data_territory"
   slice_param {
     slice_dim: 1
     slice_point : 2
   }
}

#this part should be the same in learning and prediction network
layers {
   name: "conv1_7x7_64"
   type: CONVOLUTION
   blobs_lr: 1.
   blobs_lr: 2.
   bottom: "data"
   top: "conv2"
   convolution_param {
     num_output: 64
     kernel_size: 7
     pad: 3
     weight_filler {
       type: "xavier"
       }
       bias_filler {
       type: "constant"
       }
     }
}

layers {
   name: "relu2"
   type: TANH
   bottom: "conv2"
   top: "conv2u"
}

layers {
   name: "conv2_5x5_64"
   type: CONVOLUTION
   blobs_lr: 1.
   blobs_lr: 2.
   bottom: "conv2u"
   top: "conv3"
   convolution_param {
     num_output: 64
     kernel_size: 5
     pad: 2
     weight_filler {
       type: "xavier"
       }
       bias_filler {
       type: "constant"
       }
     }
}

layers {
   name: "relu3"
   type: TANH
   bottom: "conv3"
   top: "conv3u"
}

layers {
   name: "conv3_5x5_64"
   type: CONVOLUTION
   blobs_lr: 1.
   blobs_lr: 2.
   bottom: "conv3u"
   top: "conv4"
   convolution_param {
     num_output: 64
     kernel_size: 5
     pad: 2
     weight_filler {
       type: "xavier"
       }
       bias_filler {
       type: "constant"
       }
     }
}

layers {
   name: "relu4"
   type: TANH
   bottom: "conv4"
   top: "conv4u"
}

layers {
   name: "conv4_5x5_48"
   type: CONVOLUTION
   blobs_lr: 1.
   blobs_lr: 2.
   bottom: "conv4u"
   top: "conv5"
   convolution_param {
     num_output: 48
     kernel_size: 5
     pad: 2
     weight_filler {
       type: "xavier"
       }
       bias_filler {
       type: "constant"
       }
     }
}

layers {
   name: "relu5"
   type: TANH
   bottom: "conv5"
   top: "conv5u"
}

layers {
   name: "conv5_5x5_48"
   type: CONVOLUTION
   blobs_lr: 1.
   blobs_lr: 2.
   bottom: "conv5u"
   top: "conv6"
   convolution_param {
     num_output: 48
     kernel_size: 5
     pad: 2
     weight_filler {
       type: "xavier"
       }
       bias_filler {
       type: "constant"
       }
     }
}

layers {
   name: "relu6"
   type: TANH
   bottom: "conv6"
   top: "conv6u"
}


layers {
   name: "conv6_5x5_32"
   type: CONVOLUTION
   blobs_lr: 1.
   blobs_lr: 2.
   bottom: "conv6u"
   top: "conv7"
   convolution_param {
     num_output: 32
     kernel_size: 5
     pad: 2
     weight_filler {
       type: "xavier"
       }
       bias_filler {
       type: "constant"
       }
     }
}

layers {
   name: "relu7"
   type: TANH
   bottom: "conv7"
   top: "conv7u"
}


layers {
   name: "conv7_5x5_32"
   type: CONVOLUTION
   blobs_lr: 1.
   blobs_lr: 2.
   bottom: "conv7u"
   top: "conv8"
   convolution_param {
     num_output: 32
     kernel_size: 5
     pad: 2
     weight_filler {
       type: "xavier"
       }
       bias_filler {
       type: "constant"
       }
     }
}

layers {
   name: "relu8"
   type: TANH
   bottom: "conv8"
   top: "conv8u"
}

layers {
   name: "flat"
   type: FLATTEN
   bottom: "conv8u"
   top: "conv8_flat"
}

layers {
   name: "split"
   type: SPLIT
   bottom: "conv8_flat"
   top: "conv8_flata"
   top: "conv8_flatb"
}

layers {
   name: "ip"
   type: INNER_PRODUCT
   bottom: "conv8_flata"
   top: "ip_zw"
   inner_product_param {
     num_output: 361
     weight_filler {
       type: "xavier"
       }
     bias_filler {
       type: "constant"
       }
    }
}

layers {
   name: "sigmoid"
   type: SIGMOID
   bottom: "ip_zw"
   top: "ip_zws"
}

layers {
   name: "ip2"
   type: INNER_PRODUCT
   bottom: "conv8_flatb"
   top: "ip_label"
   inner_product_param {
     num_output: 121
     weight_filler {
       type: "xavier"
       }
     bias_filler {
       type: "constant"
       }
    }
}


#only learning framework
layers {
   name: "flat"
   type: FLATTEN
   bottom: "data_territory"
   top: "flat"
}
layers {
   name: "loss"
   type: EUCLIDEAN_LOSS
   bottom: "ip_zws"
   bottom: "flat"
   top: "lossa"
}
layers {
name: "accuracy"
type: ACCURACY
bottom: "ip_label"
bottom: "label"
top: "accuracy"
}

layers {
   name: "loss"
   type: SOFTMAX_LOSS
   bottom: "ip_label"
   bottom: "label"
   top: "lossb"
}



Am 08.02.2015 um 11:43 schrieb Álvaro Begué:
> What network architecture did you use? Can you give us some details?
>
>
>
> On Sun, Feb 8, 2015 at 5:22 AM, Detlef Schmicker <ds2 at physik.de 
> <mailto:ds2 at physik.de>> wrote:
>
>     Hi,
>
>     I am working on a CNN for winrate and territory:
>
>     approach:
>      - input 2 layers for b and w stones
>      - 1. output: 1 layer territory (0.0 for owned by white, 1.0 for
>     owned by black (because I missed TANH in the first place I used
>     SIGMOID))
>      - 2. output: label for -60 to +60 territory leading by black
>     the loss of both outputs is trained
>
>     the idea is, that this way I do not have to put komi into input
>     and make the winrate from the statistics of the trained label:
>
>     e.g. komi 6.5: I sum the probabilites from +7 to +60 and get
>     something like a winrate
>
>     I trained with 800000 positions with territory information through
>     500 playouts from oakfoam, which I symmetrized by the 8 
>     transformation leading to >6000000 positions. (It is expensive to
>     produce the positions due to the playouts....)
>
>     The layers are the same as the large network from Christopher
>     Clark <http://arxiv.org/find/cs/1/au:+Clark_C/0/1/0/all/0/1>, Amos
>     Storkey <http://arxiv.org/find/cs/1/au:+Storkey_A/0/1/0/all/0/1> :
>     http://arxiv.org/abs/1412.3409
>
>
>     I get reasonable territory predictions from this network (compared
>     to 500 playouts of oakfoam), the winrates seems to be
>     overestimated. But anyway, it looks as it is worth to do some more
>     work on it.
>
>     The idea is, I can do the equivalent of lets say 1000 playouts
>     with a call to the CNN for the cost of 2 playouts some time...
>
>
>     Now I try to do a soft turnover from conventional playouts to CNN
>     predicted winrates within the framework of MC.
>
>     I do have some ideas, but I am not happy with them.
>
>     Maybe you have better ones :)
>
>
>     Thanks a lot
>
>     Detlef
>
>
>     _______________________________________________
>     Computer-go mailing list
>     Computer-go at computer-go.org <mailto:Computer-go at computer-go.org>
>     http://computer-go.org/mailman/listinfo/computer-go
>
>
>
>
> _______________________________________________
> Computer-go mailing list
> Computer-go at computer-go.org
> http://computer-go.org/mailman/listinfo/computer-go

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://computer-go.org/pipermail/computer-go/attachments/20150208/7f2c8915/attachment.html>


More information about the Computer-go mailing list