Mercurial > ift6266
annotate baseline/deep_mlp/deepmlp.py @ 596:f6a3b28b002c
nips2010_submission.pdf
author | Yoshua Bengio <bengioy@iro.umontreal.ca> |
---|---|
date | Thu, 14 Oct 2010 15:52:02 -0400 |
parents | 89a725d332ae |
children |
rev | line source |
---|---|
21
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
1 # |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
2 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
3 import numpy, cPickle, gzip |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
4 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
5 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
6 import theano |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
7 import theano.tensor as T |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
8 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
9 import time |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
10 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
11 import theano.tensor.nnet |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
12 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
13 class MLP(object): |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
14 """Multi-Layer Perceptron Class |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
15 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
16 A multilayer perceptron is a feedforward artificial neural network model |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
17 that has one layer or more of hidden units and nonlinear activations. |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
18 Intermidiate layers usually have as activation function thanh or the |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
19 sigmoid function while the top layer is a softamx layer. |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
20 """ |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
21 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
22 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
23 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
24 def __init__(self, input, n_in, n_hidden, n_out): |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
25 """Initialize the parameters for the multilayer perceptron |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
26 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
27 :param input: symbolic variable that describes the input of the |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
28 architecture (one minibatch) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
29 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
30 :param n_in: number of input units, the dimension of the space in |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
31 which the datapoints lie |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
32 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
33 :param n_hidden: List representing the number of units for each |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
34 hidden layer |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
35 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
36 #:param n_layer: Number of hidden layers |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
37 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
38 :param n_out: number of output units, the dimension of the space in |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
39 which the labels lie |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
40 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
41 """ |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
42 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
43 # initialize the parameters theta = (W,b) ; Here W and b are lists |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
44 # where W[i] and b[i] represent the parameters and the bias vector |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
45 # of the i-th layer. |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
46 n_layer=len(n_hidden) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
47 W_values=[] |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
48 b_values=[] |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
49 self.W=[] |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
50 self.b=[] |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
51 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
52 # We first initialize the matrix W[0] and b[0] that represent the parameters |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
53 # from the input to the first hidden layer |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
54 W_values.append(numpy.asarray( numpy.random.uniform( \ |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
55 low = -numpy.sqrt(6./(n_in+n_hidden[0])), \ |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
56 high = numpy.sqrt(6./(n_in+n_hidden[0])), \ |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
57 size = (n_in, n_hidden[0])), dtype = theano.config.floatX)) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
58 self.W.append(theano.shared( value = W_values[0] )) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
59 self.b.append(theano.shared( value = numpy.zeros((n_hidden[0],), |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
60 dtype= theano.config.floatX))) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
61 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
62 # We initialize the parameters between all consecutive hidden layers |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
63 for i in range(1,n_layer): |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
64 # Each `W[i]` is initialized with `W_values[i]` which is uniformely sampled |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
65 # from -6./sqrt(n_hidden[i]+n_hidden[i+1]) and 6./sqrt(n_hidden[i]+n_hidden[i+1]) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
66 # the output of uniform if converted using asarray to dtype |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
67 # theano.config.floatX so that the code is runable on GPU |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
68 W_values.append(numpy.asarray( numpy.random.uniform( \ |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
69 low = -numpy.sqrt(6./(n_hidden[i-1]+n_hidden[i])), \ |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
70 high = numpy.sqrt(6./(n_hidden[i-1]+n_hidden[i])), \ |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
71 size = (n_hidden[i-1], n_hidden[i])), dtype = theano.config.floatX)) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
72 self.W.append(theano.shared( value = W_values[i] )) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
73 self.b.append(theano.shared( value = numpy.zeros((n_hidden[i],), |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
74 dtype= theano.config.floatX))) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
75 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
76 # We initialize the matrix W[n_layer] and b[n_layer] that represent |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
77 # the parameters from the last hidden layer to the output layer using the |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
78 # same uniform sampling. |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
79 W_values.append(numpy.asarray( numpy.random.uniform( |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
80 low = -numpy.sqrt(6./(n_hidden[n_layer-1]+n_out)), \ |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
81 high= numpy.sqrt(6./(n_hidden[n_layer-1]+n_out)),\ |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
82 size= (n_hidden[n_layer-1], n_out)), dtype = theano.config.floatX)) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
83 self.W.append(theano.shared( value = W_values[n_layer])) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
84 self.b.append(theano.shared( value = numpy.zeros((n_out,), |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
85 dtype= theano.config.floatX))) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
86 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
87 # List of the symbolic expressions computing the values each hidden layer |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
88 self.hidden = [] |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
89 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
90 # Symbolic expression of the first hidden layer |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
91 self.hidden.append(T.tanh(T.dot(input, self.W[0])+ self.b[0])) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
92 for i in range(1,n_layer): |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
93 # Symbolic expression of the i-th hidden layer |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
94 self.hidden.append(T.tanh(T.dot(self.hidden[i-1], self.W[i])+ self.b[i])) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
95 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
96 # symbolic expression computing the values of the top layer |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
97 self.p_y_given_x= T.nnet.softmax(T.dot(self.hidden[n_layer-1], self.W[n_layer])+self.b[n_layer]) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
98 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
99 # compute prediction as class whose probability is maximal in |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
100 # symbolic form |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
101 self.y_pred = T.argmax( self.p_y_given_x, axis =1) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
102 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
103 # L1 norm ; one regularization option is to enforce L1 norm to |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
104 # be small |
22
cb47cbc95a21
I fixed a bug in the computation of L1 and L2 regularizations
Razvan Pascanu <r.pascanu@gmail.com>
parents:
21
diff
changeset
|
105 self.L1=abs(self.W[0]).sum() |
cb47cbc95a21
I fixed a bug in the computation of L1 and L2 regularizations
Razvan Pascanu <r.pascanu@gmail.com>
parents:
21
diff
changeset
|
106 self.L2_sqr=abs(self.W[0]).sum() |
21
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
107 for i in range(1,n_layer+1): |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
108 self.L1 += abs(self.W[i]).sum() |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
109 # square of L2 norm ; one regularization option is to enforce |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
110 # square of L2 norm to be small |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
111 for i in range(n_layer+1): |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
112 self.L2_sqr += abs(self.W[i]**2).sum() |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
113 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
114 def negative_log_likelihood(self, y): |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
115 return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y]) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
116 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
117 def errors(self, y): |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
118 """Return a float representing the number of errors in the minibatch |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
119 over the total number of examples of the minibatch |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
120 """ |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
121 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
122 # check if y has same dimension of y_pred |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
123 if y.ndim != self.y_pred.ndim: |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
124 raise TypeError('y should have the same shape as self.y_pred', |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
125 ('y', target.type, 'y_pred', self.y_pred.type)) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
126 # check if y is of the correct datatype |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
127 if y.dtype.startswith('int'): |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
128 # the T.neq operator returns a vector of 0s and 1s, where 1 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
129 # represents a mistake in prediction |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
130 return T.mean(T.neq(self.y_pred, y)) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
131 else: |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
132 raise NotImplementedError() |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
133 def sgd_optimization_mnist( learning_rate=0.01, L1_reg = 0.00, \ |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
134 L2_reg = 0.0001, n_iter=100,n_hidden=[200,100,90,80,70]): |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
135 """ |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
136 Demonstrate stochastic gradient descent optimization for a multilayer |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
137 perceptron |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
138 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
139 This is demonstrated on MNIST. |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
140 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
141 :param learning_rate: learning rate used (factor for the stochastic |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
142 gradient |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
143 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
144 :param L1_reg: L1-norm's weight when added to the cost (see |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
145 regularization) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
146 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
147 :param L2_reg: L2-norm's weight when added to the cost (see |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
148 regularization) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
149 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
150 :param n_iter: maximal number of iterations ot run the optimizer |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
151 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
152 """ |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
153 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
154 # Load the dataset |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
155 f = gzip.open('mnist.pkl.gz','rb') |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
156 train_set, valid_set, test_set = cPickle.load(f) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
157 f.close() |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
158 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
159 # make minibatches of size 20 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
160 batch_size = 20 # sized of the minibatch |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
161 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
162 # Dealing with the training set |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
163 # get the list of training images (x) and their labels (y) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
164 (train_set_x, train_set_y) = train_set |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
165 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
166 # initialize the list of training minibatches with empty list |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
167 train_batches = [] |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
168 for i in xrange(0, len(train_set_x), batch_size): |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
169 # add to the list of minibatches the minibatch starting at |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
170 # position i, ending at position i+batch_size |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
171 # a minibatch is a pair ; the first element of the pair is a list |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
172 # of datapoints, the second element is the list of corresponding |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
173 # labels |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
174 train_batches = train_batches + \ |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
175 [(train_set_x[i:i+batch_size], train_set_y[i:i+batch_size])] |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
176 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
177 # Dealing with the validation set |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
178 (valid_set_x, valid_set_y) = valid_set |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
179 # initialize the list of validation minibatches |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
180 valid_batches = [] |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
181 for i in xrange(0, len(valid_set_x), batch_size): |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
182 valid_batches = valid_batches + \ |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
183 [(valid_set_x[i:i+batch_size], valid_set_y[i:i+batch_size])] |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
184 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
185 # Dealing with the testing set |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
186 (test_set_x, test_set_y) = test_set |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
187 # initialize the list of testing minibatches |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
188 test_batches = [] |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
189 for i in xrange(0, len(test_set_x), batch_size): |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
190 test_batches = test_batches + \ |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
191 [(test_set_x[i:i+batch_size], test_set_y[i:i+batch_size])] |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
192 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
193 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
194 ishape = (28,28) # this is the size of MNIST images |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
195 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
196 # allocate symbolic variables for the data |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
197 x = T.fmatrix() # the data is presented as rasterized images |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
198 y = T.lvector() # the labels are presented as 1D vector of |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
199 # [long int] labels |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
200 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
201 # construct the logistic regression class |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
202 classifier = MLP( input=x.reshape((batch_size,28*28)),\ |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
203 n_in=28*28, n_hidden=n_hidden, n_out=10) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
204 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
205 # the cost we minimize during training is the negative log likelihood of |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
206 # the model plus the regularization terms (L1 and L2); cost is expressed |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
207 # here symbolically |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
208 cost = classifier.negative_log_likelihood(y) \ |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
209 + L1_reg * classifier.L1 \ |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
210 + L2_reg * classifier.L2_sqr |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
211 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
212 # compiling a theano function that computes the mistakes that are made by |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
213 # the model on a minibatch |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
214 test_model = theano.function([x,y], classifier.errors(y)) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
215 g_W=[] |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
216 g_b=[] |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
217 # compute the gradient of cost with respect to theta = (W1, b1, W2, b2) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
218 for i in range(len(n_hidden)+1): |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
219 g_W.append(T.grad(cost, classifier.W[i])) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
220 g_b.append(T.grad(cost, classifier.b[i])) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
221 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
222 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
223 # specify how to update the parameters of the model as a dictionary |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
224 updates={} |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
225 for i in range(len(n_hidden)+1): |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
226 updates[classifier.W[i]]= classifier.W[i] - learning_rate*g_W[i] |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
227 updates[classifier.b[i]]= classifier.b[i] - learning_rate*g_b[i] |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
228 # compiling a theano function `train_model` that returns the cost, but in |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
229 # the same time updates the parameter of the model based on the rules |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
230 # defined in `updates` |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
231 train_model = theano.function([x, y], cost, updates = updates ) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
232 n_minibatches = len(train_batches) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
233 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
234 # early-stopping parameters |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
235 patience = 10000 # look as this many examples regardless |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
236 patience_increase = 2 # wait this much longer when a new best is |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
237 # found |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
238 improvement_threshold = 0.995 # a relative improvement of this much is |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
239 # considered significant |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
240 validation_frequency = n_minibatches # go through this many |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
241 # minibatche before checking the network |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
242 # on the validation set; in this case we |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
243 # check every epoch |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
244 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
245 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
246 best_params = None |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
247 best_validation_loss = float('inf') |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
248 best_iter = 0 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
249 test_score = 0. |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
250 start_time = time.clock() |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
251 # have a maximum of `n_iter` iterations through the entire dataset |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
252 for iter in xrange(n_iter* n_minibatches): |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
253 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
254 # get epoch and minibatch index |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
255 epoch = iter / n_minibatches |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
256 minibatch_index = iter % n_minibatches |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
257 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
258 # get the minibatches corresponding to `iter` modulo |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
259 # `len(train_batches)` |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
260 x,y = train_batches[ minibatch_index ] |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
261 cost_ij = train_model(x,y) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
262 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
263 if (iter+1) % validation_frequency == 0: |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
264 # compute zero-one loss on validation set |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
265 this_validation_loss = 0. |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
266 for x,y in valid_batches: |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
267 # sum up the errors for each minibatch |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
268 this_validation_loss += test_model(x,y) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
269 # get the average by dividing with the number of minibatches |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
270 this_validation_loss /= len(valid_batches) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
271 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
272 print('epoch %i, minibatch %i/%i, validation error %f %%' % \ |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
273 (epoch, minibatch_index+1, n_minibatches, \ |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
274 this_validation_loss*100.)) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
275 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
276 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
277 # if we got the best validation score until now |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
278 if this_validation_loss < best_validation_loss: |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
279 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
280 #improve patience if loss improvement is good enough |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
281 if this_validation_loss < best_validation_loss * \ |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
282 improvement_threshold : |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
283 patience = max(patience, iter * patience_increase) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
284 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
285 # save best validation score and iteration number |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
286 best_validation_loss = this_validation_loss |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
287 best_iter = iter |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
288 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
289 # test it on the test set |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
290 test_score = 0. |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
291 for x,y in test_batches: |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
292 test_score += test_model(x,y) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
293 test_score /= len(test_batches) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
294 print((' epoch %i, minibatch %i/%i, test error of best ' |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
295 'model %f %%') % |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
296 (epoch, minibatch_index+1, n_minibatches, |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
297 test_score*100.)) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
298 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
299 if patience <= iter : |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
300 break |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
301 |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
302 end_time = time.clock() |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
303 print(('Optimization complete. Best validation score of %f %% ' |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
304 'obtained at iteration %i, with test performance %f %%') % |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
305 (best_validation_loss * 100., best_iter, test_score*100.)) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
306 print ('The code ran for %f minutes' % ((end_time-start_time)/60.)) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
307 #test on NIST (you need pylearn and access to NIST to do that) |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
308 if __name__ == '__main__': |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
309 sgd_optimization_mnist() |
afdd41db8152
Initial commit of the multiple hidden layer perceptron
Owner <salahmeister@gmail.com>
parents:
diff
changeset
|
310 |