diff mlp.py @ 187:ebbb0e749565

added mlp_factory_approach
author James Bergstra <bergstrj@iro.umontreal.ca>
date Wed, 14 May 2008 11:51:08 -0400
parents 562f308873f0
children d1359de1ea13
line wrap: on
line diff
--- a/mlp.py	Tue May 13 20:10:03 2008 -0400
+++ b/mlp.py	Wed May 14 11:51:08 2008 -0400
@@ -21,29 +21,32 @@
             linker='c&yp', 
             hidden_layer=None):
         class Vars:
-            def __init__(self, lr):
+            def __init__(self, lr, l2coef=0.0):
                 lr = t.constant(lr)
+                l2coef = t.constant(l2coef)
                 input = t.matrix('input') # n_examples x n_inputs
                 target = t.ivector('target') # n_examples x 1
                 W2 = t.matrix('W2')
                 b2 = t.vector('b2')
 
                 if hidden_layer:
-                    hidden, hidden_params, hidden_ivals = hidden_layer(input)
+                    hid, hid_params, hid_ivals, hid_regularization = hidden_layer(input)
                 else:
                     W1 = t.matrix('W1')
                     b1 = t.vector('b1')
-                    hidden = t.tanh(b1 + t.dot(input, W1))
-                    hidden_params = [W1, b1]
-                    hidden_ivals = [randshape(ninputs, nhid), randshape(nhid)]
+                    hid = t.tanh(b1 + t.dot(input, W1))
+                    hid_params = [W1, b1]
+                    hid_regularization = l2coef * t.sum(W1*W1)
+                    hid_ivals = [randshape(ninputs, nhid), randshape(nhid)]
 
-                params = [W2, b2] + hidden_params
+                params = [W2, b2] + hid_params
                 ivals = [randshape(nhid, nclass), randshape(nclass)]\
-                        + hidden_ivals
-                nll, predictions = crossentropy_softmax_1hot( b2 + t.dot(hidden, W2), target)
+                        + hid_ivals
+                nll, predictions = crossentropy_softmax_1hot( b2 + t.dot(hid, W2), target)
+                regularization = l2coef * t.sum(W2*W2) + hid_regularization
                 output_class = t.argmax(predictions,1)
                 loss_01 = t.neq(output_class, target)
-                g_params = t.grad(nll, params)
+                g_params = t.grad(nll + regularization, params)
                 new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)]
                 self.__dict__.update(locals()); del self.self
         self.nhid = nhid