annotate baseline/deep_mlp/deepmlp.py @ 596:f6a3b28b002c

nips2010_submission.pdf
author Yoshua Bengio <bengioy@iro.umontreal.ca>
date Thu, 14 Oct 2010 15:52:02 -0400
parents 89a725d332ae
children
rev   line source
21
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
1 #
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
2
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
3 import numpy, cPickle, gzip
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
4
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
5
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
6 import theano
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
7 import theano.tensor as T
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
8
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
9 import time
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
10
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
11 import theano.tensor.nnet
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
12
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
13 class MLP(object):
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
14 """Multi-Layer Perceptron Class
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
15
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
16 A multilayer perceptron is a feedforward artificial neural network model
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
17 that has one layer or more of hidden units and nonlinear activations.
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
18 Intermidiate layers usually have as activation function thanh or the
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
19 sigmoid function while the top layer is a softamx layer.
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
20 """
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
21
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
22
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
23
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
24 def __init__(self, input, n_in, n_hidden, n_out):
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
25 """Initialize the parameters for the multilayer perceptron
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
26
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
27 :param input: symbolic variable that describes the input of the
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
28 architecture (one minibatch)
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
29
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
30 :param n_in: number of input units, the dimension of the space in
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
31 which the datapoints lie
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
32
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
33 :param n_hidden: List representing the number of units for each
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
34 hidden layer
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
35
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
36 #:param n_layer: Number of hidden layers
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
37
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
38 :param n_out: number of output units, the dimension of the space in
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
39 which the labels lie
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
40
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
41 """
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
42
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
43 # initialize the parameters theta = (W,b) ; Here W and b are lists
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
44 # where W[i] and b[i] represent the parameters and the bias vector
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
45 # of the i-th layer.
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
46 n_layer=len(n_hidden)
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
47 W_values=[]
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
48 b_values=[]
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
49 self.W=[]
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
50 self.b=[]
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
51
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
52 # We first initialize the matrix W[0] and b[0] that represent the parameters
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
53 # from the input to the first hidden layer
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
54 W_values.append(numpy.asarray( numpy.random.uniform( \
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
55 low = -numpy.sqrt(6./(n_in+n_hidden[0])), \
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
56 high = numpy.sqrt(6./(n_in+n_hidden[0])), \
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
57 size = (n_in, n_hidden[0])), dtype = theano.config.floatX))
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
58 self.W.append(theano.shared( value = W_values[0] ))
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
59 self.b.append(theano.shared( value = numpy.zeros((n_hidden[0],),
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
60 dtype= theano.config.floatX)))
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
61
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
62 # We initialize the parameters between all consecutive hidden layers
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
63 for i in range(1,n_layer):
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
64 # Each `W[i]` is initialized with `W_values[i]` which is uniformely sampled
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
65 # from -6./sqrt(n_hidden[i]+n_hidden[i+1]) and 6./sqrt(n_hidden[i]+n_hidden[i+1])
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
66 # the output of uniform if converted using asarray to dtype
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
67 # theano.config.floatX so that the code is runable on GPU
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
68 W_values.append(numpy.asarray( numpy.random.uniform( \
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
69 low = -numpy.sqrt(6./(n_hidden[i-1]+n_hidden[i])), \
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
70 high = numpy.sqrt(6./(n_hidden[i-1]+n_hidden[i])), \
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
71 size = (n_hidden[i-1], n_hidden[i])), dtype = theano.config.floatX))
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
72 self.W.append(theano.shared( value = W_values[i] ))
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
73 self.b.append(theano.shared( value = numpy.zeros((n_hidden[i],),
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
74 dtype= theano.config.floatX)))
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
75
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
76 # We initialize the matrix W[n_layer] and b[n_layer] that represent
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
77 # the parameters from the last hidden layer to the output layer using the
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
78 # same uniform sampling.
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
79 W_values.append(numpy.asarray( numpy.random.uniform(
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
80 low = -numpy.sqrt(6./(n_hidden[n_layer-1]+n_out)), \
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
81 high= numpy.sqrt(6./(n_hidden[n_layer-1]+n_out)),\
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
82 size= (n_hidden[n_layer-1], n_out)), dtype = theano.config.floatX))
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
83 self.W.append(theano.shared( value = W_values[n_layer]))
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
84 self.b.append(theano.shared( value = numpy.zeros((n_out,),
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
85 dtype= theano.config.floatX)))
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
86
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
87 # List of the symbolic expressions computing the values each hidden layer
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
88 self.hidden = []
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
89
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
90 # Symbolic expression of the first hidden layer
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
91 self.hidden.append(T.tanh(T.dot(input, self.W[0])+ self.b[0]))
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
92 for i in range(1,n_layer):
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
93 # Symbolic expression of the i-th hidden layer
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
94 self.hidden.append(T.tanh(T.dot(self.hidden[i-1], self.W[i])+ self.b[i]))
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
95
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
96 # symbolic expression computing the values of the top layer
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
97 self.p_y_given_x= T.nnet.softmax(T.dot(self.hidden[n_layer-1], self.W[n_layer])+self.b[n_layer])
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
98
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
99 # compute prediction as class whose probability is maximal in
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
100 # symbolic form
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
101 self.y_pred = T.argmax( self.p_y_given_x, axis =1)
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
102
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
103 # L1 norm ; one regularization option is to enforce L1 norm to
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
104 # be small
22
cb47cbc95a21 I fixed a bug in the computation of L1 and L2 regularizations
Razvan Pascanu <r.pascanu@gmail.com>
parents: 21
diff changeset
105 self.L1=abs(self.W[0]).sum()
cb47cbc95a21 I fixed a bug in the computation of L1 and L2 regularizations
Razvan Pascanu <r.pascanu@gmail.com>
parents: 21
diff changeset
106 self.L2_sqr=abs(self.W[0]).sum()
21
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
107 for i in range(1,n_layer+1):
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
108 self.L1 += abs(self.W[i]).sum()
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
109 # square of L2 norm ; one regularization option is to enforce
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
110 # square of L2 norm to be small
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
111 for i in range(n_layer+1):
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
112 self.L2_sqr += abs(self.W[i]**2).sum()
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
113
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
114 def negative_log_likelihood(self, y):
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
115 return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y])
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
116
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
117 def errors(self, y):
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
118 """Return a float representing the number of errors in the minibatch
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
119 over the total number of examples of the minibatch
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
120 """
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
121
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
122 # check if y has same dimension of y_pred
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
123 if y.ndim != self.y_pred.ndim:
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
124 raise TypeError('y should have the same shape as self.y_pred',
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
125 ('y', target.type, 'y_pred', self.y_pred.type))
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
126 # check if y is of the correct datatype
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
127 if y.dtype.startswith('int'):
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
128 # the T.neq operator returns a vector of 0s and 1s, where 1
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
129 # represents a mistake in prediction
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
130 return T.mean(T.neq(self.y_pred, y))
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
131 else:
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
132 raise NotImplementedError()
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
133 def sgd_optimization_mnist( learning_rate=0.01, L1_reg = 0.00, \
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
134 L2_reg = 0.0001, n_iter=100,n_hidden=[200,100,90,80,70]):
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
135 """
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
136 Demonstrate stochastic gradient descent optimization for a multilayer
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
137 perceptron
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
138
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
139 This is demonstrated on MNIST.
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
140
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
141 :param learning_rate: learning rate used (factor for the stochastic
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
142 gradient
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
143
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
144 :param L1_reg: L1-norm's weight when added to the cost (see
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
145 regularization)
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
146
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
147 :param L2_reg: L2-norm's weight when added to the cost (see
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
148 regularization)
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
149
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
150 :param n_iter: maximal number of iterations ot run the optimizer
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
151
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
152 """
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
153
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
154 # Load the dataset
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
155 f = gzip.open('mnist.pkl.gz','rb')
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
156 train_set, valid_set, test_set = cPickle.load(f)
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
157 f.close()
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
158
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
159 # make minibatches of size 20
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
160 batch_size = 20 # sized of the minibatch
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
161
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
162 # Dealing with the training set
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
163 # get the list of training images (x) and their labels (y)
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
164 (train_set_x, train_set_y) = train_set
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
165
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
166 # initialize the list of training minibatches with empty list
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
167 train_batches = []
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
168 for i in xrange(0, len(train_set_x), batch_size):
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
169 # add to the list of minibatches the minibatch starting at
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
170 # position i, ending at position i+batch_size
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
171 # a minibatch is a pair ; the first element of the pair is a list
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
172 # of datapoints, the second element is the list of corresponding
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
173 # labels
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
174 train_batches = train_batches + \
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
175 [(train_set_x[i:i+batch_size], train_set_y[i:i+batch_size])]
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
176
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
177 # Dealing with the validation set
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
178 (valid_set_x, valid_set_y) = valid_set
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
179 # initialize the list of validation minibatches
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
180 valid_batches = []
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
181 for i in xrange(0, len(valid_set_x), batch_size):
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
182 valid_batches = valid_batches + \
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
183 [(valid_set_x[i:i+batch_size], valid_set_y[i:i+batch_size])]
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
184
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
185 # Dealing with the testing set
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
186 (test_set_x, test_set_y) = test_set
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
187 # initialize the list of testing minibatches
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
188 test_batches = []
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
189 for i in xrange(0, len(test_set_x), batch_size):
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
190 test_batches = test_batches + \
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
191 [(test_set_x[i:i+batch_size], test_set_y[i:i+batch_size])]
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
192
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
193
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
194 ishape = (28,28) # this is the size of MNIST images
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
195
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
196 # allocate symbolic variables for the data
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
197 x = T.fmatrix() # the data is presented as rasterized images
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
198 y = T.lvector() # the labels are presented as 1D vector of
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
199 # [long int] labels
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
200
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
201 # construct the logistic regression class
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
202 classifier = MLP( input=x.reshape((batch_size,28*28)),\
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
203 n_in=28*28, n_hidden=n_hidden, n_out=10)
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
204
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
205 # the cost we minimize during training is the negative log likelihood of
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
206 # the model plus the regularization terms (L1 and L2); cost is expressed
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
207 # here symbolically
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
208 cost = classifier.negative_log_likelihood(y) \
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
209 + L1_reg * classifier.L1 \
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
210 + L2_reg * classifier.L2_sqr
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
211
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
212 # compiling a theano function that computes the mistakes that are made by
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
213 # the model on a minibatch
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
214 test_model = theano.function([x,y], classifier.errors(y))
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
215 g_W=[]
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
216 g_b=[]
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
217 # compute the gradient of cost with respect to theta = (W1, b1, W2, b2)
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
218 for i in range(len(n_hidden)+1):
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
219 g_W.append(T.grad(cost, classifier.W[i]))
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
220 g_b.append(T.grad(cost, classifier.b[i]))
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
221
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
222
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
223 # specify how to update the parameters of the model as a dictionary
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
224 updates={}
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
225 for i in range(len(n_hidden)+1):
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
226 updates[classifier.W[i]]= classifier.W[i] - learning_rate*g_W[i]
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
227 updates[classifier.b[i]]= classifier.b[i] - learning_rate*g_b[i]
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
228 # compiling a theano function `train_model` that returns the cost, but in
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
229 # the same time updates the parameter of the model based on the rules
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
230 # defined in `updates`
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
231 train_model = theano.function([x, y], cost, updates = updates )
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
232 n_minibatches = len(train_batches)
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
233
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
234 # early-stopping parameters
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
235 patience = 10000 # look as this many examples regardless
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
236 patience_increase = 2 # wait this much longer when a new best is
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
237 # found
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
238 improvement_threshold = 0.995 # a relative improvement of this much is
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
239 # considered significant
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
240 validation_frequency = n_minibatches # go through this many
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
241 # minibatche before checking the network
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
242 # on the validation set; in this case we
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
243 # check every epoch
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
244
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
245
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
246 best_params = None
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
247 best_validation_loss = float('inf')
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
248 best_iter = 0
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
249 test_score = 0.
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
250 start_time = time.clock()
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
251 # have a maximum of `n_iter` iterations through the entire dataset
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
252 for iter in xrange(n_iter* n_minibatches):
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
253
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
254 # get epoch and minibatch index
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
255 epoch = iter / n_minibatches
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
256 minibatch_index = iter % n_minibatches
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
257
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
258 # get the minibatches corresponding to `iter` modulo
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
259 # `len(train_batches)`
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
260 x,y = train_batches[ minibatch_index ]
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
261 cost_ij = train_model(x,y)
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
262
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
263 if (iter+1) % validation_frequency == 0:
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
264 # compute zero-one loss on validation set
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
265 this_validation_loss = 0.
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
266 for x,y in valid_batches:
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
267 # sum up the errors for each minibatch
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
268 this_validation_loss += test_model(x,y)
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
269 # get the average by dividing with the number of minibatches
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
270 this_validation_loss /= len(valid_batches)
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
271
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
272 print('epoch %i, minibatch %i/%i, validation error %f %%' % \
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
273 (epoch, minibatch_index+1, n_minibatches, \
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
274 this_validation_loss*100.))
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
275
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
276
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
277 # if we got the best validation score until now
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
278 if this_validation_loss < best_validation_loss:
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
279
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
280 #improve patience if loss improvement is good enough
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
281 if this_validation_loss < best_validation_loss * \
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
282 improvement_threshold :
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
283 patience = max(patience, iter * patience_increase)
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
284
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
285 # save best validation score and iteration number
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
286 best_validation_loss = this_validation_loss
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
287 best_iter = iter
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
288
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
289 # test it on the test set
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
290 test_score = 0.
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
291 for x,y in test_batches:
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
292 test_score += test_model(x,y)
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
293 test_score /= len(test_batches)
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
294 print((' epoch %i, minibatch %i/%i, test error of best '
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
295 'model %f %%') %
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
296 (epoch, minibatch_index+1, n_minibatches,
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
297 test_score*100.))
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
298
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
299 if patience <= iter :
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
300 break
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
301
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
302 end_time = time.clock()
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
303 print(('Optimization complete. Best validation score of %f %% '
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
304 'obtained at iteration %i, with test performance %f %%') %
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
305 (best_validation_loss * 100., best_iter, test_score*100.))
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
306 print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
307 #test on NIST (you need pylearn and access to NIST to do that)
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
308 if __name__ == '__main__':
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
309 sgd_optimization_mnist()
afdd41db8152 Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff changeset
310