Mercurial > pylearn
annotate mlp.py @ 178:4090779e39a9
merged
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Tue, 13 May 2008 15:12:20 -0400 |
parents | ae5651a3696b |
children | 9911d2cc3c01 |
rev | line source |
---|---|
132
f6505ec32dc3
Updated documentation slightly
Joseph Turian <turian@gmail.com>
parents:
129
diff
changeset
|
1 """ |
f6505ec32dc3
Updated documentation slightly
Joseph Turian <turian@gmail.com>
parents:
129
diff
changeset
|
2 A straightforward classicial feedforward |
f6505ec32dc3
Updated documentation slightly
Joseph Turian <turian@gmail.com>
parents:
129
diff
changeset
|
3 one-hidden-layer neural net, with L2 regularization. |
f6505ec32dc3
Updated documentation slightly
Joseph Turian <turian@gmail.com>
parents:
129
diff
changeset
|
4 This is one of the simplest example of L{Learner}, and illustrates |
f6505ec32dc3
Updated documentation slightly
Joseph Turian <turian@gmail.com>
parents:
129
diff
changeset
|
5 the use of theano. |
f6505ec32dc3
Updated documentation slightly
Joseph Turian <turian@gmail.com>
parents:
129
diff
changeset
|
6 """ |
111
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
7 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
8 from learner import * |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
9 from theano import tensor as t |
118
d0a1bd0378c6
Finished draft of OneHiddenLayerNNetClassifier to debut learner.py
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
111
diff
changeset
|
10 from nnet_ops import * |
133 | 11 import math |
111
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
12 |
178 | 13 def sum_l2_cost(*params): |
14 p = params[0] | |
15 rval = t.sum(p*p) | |
16 for p in params[1:]: | |
17 rval = rval + t.sum(p*p) | |
18 return rval | |
19 | |
20 def activation(w, b, v, c, x): | |
21 return t.dot(t.tanh(t.dot(x, w) + b), v) + c | |
22 def nll(w, b, v, c, x, y): | |
23 return crossentropy_softmax_1hot(prediction(w, b, v, c, x), y)[0] | |
24 def output(w, b, v, c, x, y): | |
25 return crossentropy_softmax_1hot(prediction(w, b, v, c, x), y)[1] | |
26 | |
27 | |
111
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
28 |
129
4c2280edcaf5
Fixed typos in learner.py
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
126
diff
changeset
|
29 class OneHiddenLayerNNetClassifier(OnlineGradientTLearner): |
111
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
30 """ |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
31 Implement a straightforward classicial feedforward |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
32 one-hidden-layer neural net, with L2 regularization. |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
33 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
34 The predictor parameters are obtained by minibatch/online gradient descent. |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
35 Training can proceed sequentially (with multiple calls to update with |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
36 different disjoint subsets of the training sets). |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
37 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
38 Hyper-parameters: |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
39 - L2_regularizer |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
40 - learning_rate |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
41 - n_hidden |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
42 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
43 For each (input_t,output_t) pair in a minibatch,:: |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
44 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
45 output_activations_t = b2+W2*tanh(b1+W1*input_t) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
46 output_t = softmax(output_activations_t) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
47 output_class_t = argmax(output_activations_t) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
48 class_error_t = 1_{output_class_t != target_t} |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
49 nll_t = -log(output_t[target_t]) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
50 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
51 and the training criterion is:: |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
52 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
53 loss = L2_regularizer*(||W1||^2 + ||W2||^2) + sum_t nll_t |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
54 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
55 The parameters are [b1,W1,b2,W2] and are obtained by minimizing the loss by |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
56 stochastic minibatch gradient descent:: |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
57 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
58 parameters[i] -= learning_rate * dloss/dparameters[i] |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
59 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
60 The fields and attributes expected and produced by use and update are the following: |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
61 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
62 - Input and output fields (example-wise quantities): |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
63 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
64 - 'input' (always expected by use and update) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
65 - 'target' (optionally expected by use and always by update) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
66 - 'output' (optionally produced by use) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
67 - 'output_class' (optionally produced by use) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
68 - 'class_error' (optionally produced by use) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
69 - 'nll' (optionally produced by use) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
70 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
71 - optional attributes (optionally expected as input_dataset attributes) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
72 (warning, this may be dangerous, the 'use' method will use those provided in the |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
73 input_dataset rather than those learned during 'update'; currently no support |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
74 for providing these to update): |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
75 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
76 - 'L2_regularizer' |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
77 - 'b1' |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
78 - 'W1' |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
79 - 'b2' |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
80 - 'W2' |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
81 - 'parameters' = [b1, W1, b2, W2] |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
82 - 'regularization_term' |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
83 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
84 """ |
134
3f4e5c9bdc5e
Fixes to ApplyFunctionDataSet and other things to make learner and mlp work
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
133
diff
changeset
|
85 def __init__(self,n_hidden,n_classes,learning_rate,max_n_epochs,L2_regularizer=0,init_range=1.,n_inputs=None,minibatch_size=None): |
133 | 86 self._n_inputs = n_inputs |
121 | 87 self._n_outputs = n_classes |
88 self._n_hidden = n_hidden | |
89 self._init_range = init_range | |
133 | 90 self._max_n_epochs = max_n_epochs |
91 self._minibatch_size = minibatch_size | |
121 | 92 self.learning_rate = learning_rate # this is the float |
134
3f4e5c9bdc5e
Fixes to ApplyFunctionDataSet and other things to make learner and mlp work
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
133
diff
changeset
|
93 self.L2_regularizer = L2_regularizer |
121 | 94 self._learning_rate = t.scalar('learning_rate') # this is the symbol |
95 self._input = t.matrix('input') # n_examples x n_inputs | |
134
3f4e5c9bdc5e
Fixes to ApplyFunctionDataSet and other things to make learner and mlp work
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
133
diff
changeset
|
96 self._target = t.imatrix('target') # n_examples x 1 |
3f4e5c9bdc5e
Fixes to ApplyFunctionDataSet and other things to make learner and mlp work
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
133
diff
changeset
|
97 self._target_vector = self._target[:,0] |
121 | 98 self._L2_regularizer = t.scalar('L2_regularizer') |
99 self._W1 = t.matrix('W1') | |
100 self._W2 = t.matrix('W2') | |
101 self._b1 = t.row('b1') | |
102 self._b2 = t.row('b2') | |
126 | 103 self._regularization_term = self._L2_regularizer * (t.sum(self._W1*self._W1) + t.sum(self._W2*self._W2)) |
121 | 104 self._output_activations =self._b2+t.dot(t.tanh(self._b1+t.dot(self._input,self._W1.T)),self._W2.T) |
134
3f4e5c9bdc5e
Fixes to ApplyFunctionDataSet and other things to make learner and mlp work
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
133
diff
changeset
|
105 self._nll,self._output = crossentropy_softmax_1hot(self._output_activations,self._target_vector) |
155
ae5651a3696b
new argmax calling convention
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
134
diff
changeset
|
106 self._output_class = t.argmax(self._output,1) |
134
3f4e5c9bdc5e
Fixes to ApplyFunctionDataSet and other things to make learner and mlp work
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
133
diff
changeset
|
107 self._class_error = t.neq(self._output_class,self._target_vector) |
121 | 108 self._minibatch_criterion = self._nll + self._regularization_term / t.shape(self._input)[0] |
129
4c2280edcaf5
Fixed typos in learner.py
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
126
diff
changeset
|
109 OnlineGradientTLearner.__init__(self) |
121 | 110 |
111
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
111 def attributeNames(self): |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
112 return ["parameters","b1","W2","b2","W2", "L2_regularizer","regularization_term"] |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
113 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
114 def parameterAttributes(self): |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
115 return ["b1","W1", "b2", "W2"] |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
116 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
117 def updateMinibatchInputFields(self): |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
118 return ["input","target"] |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
119 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
120 def updateEndOutputAttributes(self): |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
121 return ["regularization_term"] |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
122 |
118
d0a1bd0378c6
Finished draft of OneHiddenLayerNNetClassifier to debut learner.py
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
111
diff
changeset
|
123 def lossAttribute(self): |
d0a1bd0378c6
Finished draft of OneHiddenLayerNNetClassifier to debut learner.py
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
111
diff
changeset
|
124 return "minibatch_criterion" |
d0a1bd0378c6
Finished draft of OneHiddenLayerNNetClassifier to debut learner.py
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
111
diff
changeset
|
125 |
111
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
126 def defaultOutputFields(self, input_fields): |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
127 output_fields = ["output", "output_class",] |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
128 if "target" in input_fields: |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
129 output_fields += ["class_error", "nll"] |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
130 return output_fields |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
131 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
132 def allocate(self,minibatch): |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
133 minibatch_n_inputs = minibatch["input"].shape[1] |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
134 if not self._n_inputs: |
118
d0a1bd0378c6
Finished draft of OneHiddenLayerNNetClassifier to debut learner.py
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
111
diff
changeset
|
135 self._n_inputs = minibatch_n_inputs |
134
3f4e5c9bdc5e
Fixes to ApplyFunctionDataSet and other things to make learner and mlp work
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
133
diff
changeset
|
136 self.b1 = numpy.zeros((1,self._n_hidden)) |
3f4e5c9bdc5e
Fixes to ApplyFunctionDataSet and other things to make learner and mlp work
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
133
diff
changeset
|
137 self.b2 = numpy.zeros((1,self._n_outputs)) |
111
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
138 self.forget() |
118
d0a1bd0378c6
Finished draft of OneHiddenLayerNNetClassifier to debut learner.py
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
111
diff
changeset
|
139 elif self._n_inputs!=minibatch_n_inputs: |
d0a1bd0378c6
Finished draft of OneHiddenLayerNNetClassifier to debut learner.py
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
111
diff
changeset
|
140 # if the input changes dimension on the fly, we resize and forget everything |
111
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
141 self.forget() |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
142 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
143 def forget(self): |
118
d0a1bd0378c6
Finished draft of OneHiddenLayerNNetClassifier to debut learner.py
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
111
diff
changeset
|
144 if self._n_inputs: |
d0a1bd0378c6
Finished draft of OneHiddenLayerNNetClassifier to debut learner.py
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
111
diff
changeset
|
145 r = self._init_range/math.sqrt(self._n_inputs) |
d0a1bd0378c6
Finished draft of OneHiddenLayerNNetClassifier to debut learner.py
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
111
diff
changeset
|
146 self.W1 = numpy.random.uniform(low=-r,high=r, |
d0a1bd0378c6
Finished draft of OneHiddenLayerNNetClassifier to debut learner.py
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
111
diff
changeset
|
147 size=(self._n_hidden,self._n_inputs)) |
d0a1bd0378c6
Finished draft of OneHiddenLayerNNetClassifier to debut learner.py
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
111
diff
changeset
|
148 r = self._init_range/math.sqrt(self._n_hidden) |
d0a1bd0378c6
Finished draft of OneHiddenLayerNNetClassifier to debut learner.py
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
111
diff
changeset
|
149 self.W2 = numpy.random.uniform(low=-r,high=r, |
d0a1bd0378c6
Finished draft of OneHiddenLayerNNetClassifier to debut learner.py
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
111
diff
changeset
|
150 size=(self._n_outputs,self._n_hidden)) |
d0a1bd0378c6
Finished draft of OneHiddenLayerNNetClassifier to debut learner.py
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
111
diff
changeset
|
151 self.b1[:]=0 |
d0a1bd0378c6
Finished draft of OneHiddenLayerNNetClassifier to debut learner.py
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
111
diff
changeset
|
152 self.b2[:]=0 |
133 | 153 self._n_epochs=0 |
111
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
154 |
133 | 155 def isLastEpoch(self): |
156 self._n_epochs +=1 | |
157 return self._n_epochs>=self._max_n_epochs | |
111
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
158 |
178 | 159 def updateMinibatch(self,minibatch): |
160 # make sure all required fields are allocated and initialized | |
161 self.allocate(minibatch) | |
162 input_attributes = self.names2attributes(self.updateMinibatchInputAttributes()) | |
163 input_fields = minibatch(*self.updateMinibatchInputFields()) | |
164 print 'input attributes', input_attributes | |
165 print 'input fields', input_fields | |
166 results = self.update_minibatch_function(*(input_attributes+input_fields)) | |
167 print 'output attributes', self.updateMinibatchOutputAttributes() | |
168 print 'results', results | |
169 self.setAttributes(self.updateMinibatchOutputAttributes(), | |
170 results) | |
171 | |
172 if 0: | |
173 print 'n0', self.names2OpResults(self.updateMinibatchOutputAttributes()+ self.updateMinibatchInputFields()) | |
174 print 'n1', self.names2OpResults(self.updateMinibatchOutputAttributes()) | |
175 print 'n2', self.names2OpResults(self.updateEndInputAttributes()) | |
176 print 'n3', self.names2OpResults(self.updateEndOutputAttributes()) | |
177 | |
111
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
178 class MLP(MinibatchUpdatesTLearner): |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
179 """ |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
180 Implement a feedforward multi-layer perceptron, with or without L1 and/or L2 regularization. |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
181 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
182 The predictor parameters are obtained by minibatch/online gradient descent. |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
183 Training can proceed sequentially (with multiple calls to update with |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
184 different disjoint subsets of the training sets). |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
185 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
186 Hyper-parameters: |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
187 - L1_regularizer |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
188 - L2_regularizer |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
189 - neuron_sparsity_regularizer |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
190 - initial_learning_rate |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
191 - learning_rate_decrease_rate |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
192 - n_hidden_per_layer (a list of integers) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
193 - activation_function ("sigmoid","tanh", or "ratio") |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
194 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
195 The output/task type (classification, regression, etc.) is obtained by specializing MLP. |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
196 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
197 For each (input[t],output[t]) pair in a minibatch,:: |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
198 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
199 activation[0] = input_t |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
200 for k=1 to n_hidden_layers: |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
201 activation[k]=activation_function(b[k]+ W[k]*activation[k-1]) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
202 output_t = output_activation_function(b[n_hidden_layers+1]+W[n_hidden_layers+1]*activation[n_hidden_layers]) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
203 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
204 and the b and W are obtained by minimizing the following by stochastic minibatch gradient descent:: |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
205 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
206 L2_regularizer sum_{ijk} W_{kij}^2 + L1_regularizer sum_{kij} |W_{kij}| |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
207 + neuron_sparsity_regularizer sum_{ki} |b_{ki} + infinity| |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
208 - sum_t log P_{output_model}(target_t | output_t) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
209 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
210 The fields and attributes expected and produced by use and update are the following: |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
211 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
212 - Input and output fields (example-wise quantities): |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
213 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
214 - 'input' (always expected by use and update) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
215 - 'target' (optionally expected by use and always by update) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
216 - 'output' (optionally produced by use) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
217 - error fields produced by sub-class of MLP |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
218 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
219 - optional attributes (optionally expected as input_dataset attributes) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
220 (warning, this may be dangerous, the 'use' method will use those provided in the |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
221 input_dataset rather than those learned during 'update'; currently no support |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
222 for providing these to update): |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
223 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
224 - 'L1_regularizer' |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
225 - 'L2_regularizer' |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
226 - 'b' |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
227 - 'W' |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
228 - 'parameters' = [b[1], W[1], b[2], W[2], ...] |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
229 - 'regularization_term' |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
230 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
231 """ |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
232 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
233 def attributeNames(self): |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
234 return ["parameters","b","W","L1_regularizer","L2_regularizer","neuron_sparsity_regularizer","regularization_term"] |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
235 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
236 def useInputAttributes(self): |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
237 return ["b","W"] |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
238 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
239 def useOutputAttributes(self): |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
240 return [] |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
241 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
242 def updateInputAttributes(self): |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
243 return ["b","W","L1_regularizer","L2_regularizer","neuron_sparsity_regularizer"] |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
244 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
245 def updateMinibatchInputFields(self): |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
246 return ["input","target"] |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
247 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
248 def updateMinibatchInputAttributes(self): |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
249 return ["b","W"] |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
250 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
251 def updateMinibatchOutputAttributes(self): |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
252 return ["new_XtX","new_XtY"] |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
253 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
254 def updateEndInputAttributes(self): |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
255 return ["theta","XtX","XtY"] |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
256 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
257 def updateEndOutputAttributes(self): |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
258 return ["new_theta","b","W","regularization_term"] # CHECK: WILL b AND W CONTAIN OLD OR NEW THETA? @todo i.e. order of computation = ? |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
259 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
260 def parameterAttributes(self): |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
261 return ["b","W"] |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
262 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
263 def defaultOutputFields(self, input_fields): |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
264 output_fields = ["output"] |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
265 if "target" in input_fields: |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
266 output_fields.append("squared_error") |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
267 return output_fields |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
268 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
269 def __init__(self): |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
270 self._input = t.matrix('input') # n_examples x n_inputs |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
271 self._target = t.matrix('target') # n_examples x n_outputs |
121 | 272 self._L2_regularizer = t.scalar('L2_regularizer') |
111
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
273 self._theta = t.matrix('theta') |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
274 self._W = self._theta[:,1:] |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
275 self._b = self._theta[:,0] |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
276 self._XtX = t.matrix('XtX') |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
277 self._XtY = t.matrix('XtY') |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
278 self._extended_input = t.prepend_one_to_each_row(self._input) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
279 self._output = t.dot(self._input,self._W.T) + self._b # (n_examples , n_outputs) matrix |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
280 self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector |
118
d0a1bd0378c6
Finished draft of OneHiddenLayerNNetClassifier to debut learner.py
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
111
diff
changeset
|
281 self._regularizer = self._L2_regularizer * t.dot(self._W,self._W) |
111
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
282 self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input)) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
283 self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target)) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
284 self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
285 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
286 OneShotTLearner.__init__(self) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
287 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
288 def allocate(self,minibatch): |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
289 minibatch_n_inputs = minibatch["input"].shape[1] |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
290 minibatch_n_outputs = minibatch["target"].shape[1] |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
291 if not self._n_inputs: |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
292 self._n_inputs = minibatch_n_inputs |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
293 self._n_outputs = minibatch_n_outputs |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
294 self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs)) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
295 self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs)) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
296 self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs)) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
297 self.forget() |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
298 elif self._n_inputs!=minibatch_n_inputs or self._n_outputs!=minibatch_n_outputs: |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
299 # if the input or target changes dimension on the fly, we resize and forget everything |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
300 self.forget() |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
301 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
302 def forget(self): |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
303 if self._n_inputs and self._n_outputs: |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
304 self.XtX.resize((1+self.n_inputs,1+self.n_inputs)) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
305 self.XtY.resize((1+self.n_inputs,self.n_outputs)) |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
306 self.XtX.data[:,:]=0 |
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
307 self.XtY.data[:,:]=0 |
118
d0a1bd0378c6
Finished draft of OneHiddenLayerNNetClassifier to debut learner.py
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
111
diff
changeset
|
308 numpy.diag(self.XtX.data)[1:]=self.L2_regularizer |
111
88257dfedf8c
Added another work in progress, for mlp's
bengioy@bengiomac.local
parents:
diff
changeset
|
309 |