Mercurial > pylearn
comparison mlp_factory_approach.py @ 208:bf320808919f
back to James' version
author | Yoshua Bengio <bengioy@iro.umontreal.ca> |
---|---|
date | Fri, 16 May 2008 16:39:01 -0400 |
parents | c5a7105fa40b |
children | bd728c83faff |
comparison
equal
deleted
inserted
replaced
207:c5a7105fa40b | 208:bf320808919f |
---|---|
1 import dataset | 1 import copy, sys |
2 import numpy | |
3 | |
2 import theano | 4 import theano |
3 import theano.tensor as t | 5 from theano import tensor as t |
4 import numpy | 6 |
5 import nnet_ops | 7 from tlearn import dataset, nnet_ops, stopper |
6 | 8 |
7 def _randshape(*shape): | 9 def _randshape(*shape): |
8 return (numpy.random.rand(*shape) -0.5) * 0.001 | 10 return (numpy.random.rand(*shape) -0.5) * 0.001 |
9 def _function(inputs, outputs, linker='c&py'): | |
10 return theano.function(inputs, outputs, unpack_single=False,linker=linker) | |
11 | 11 |
12 class NeuralNet(object): | 12 def _cache(d, key, valfn): |
13 #valfn() is only evaluated if key isn't in dictionary d | |
14 if key not in d: | |
15 d[key] = valfn() | |
16 return d[key] | |
13 | 17 |
14 class Model(object): | 18 class _Model(object): |
15 def __init__(self, nnet, params): | 19 def __init__(self, algo, params): |
16 self.nnet = nnet | 20 self.algo = algo |
17 self.params = params | 21 self.params = params |
22 v = algo.v | |
23 self.update_fn = algo._fn([v.input, v.target] + v.params, [v.nll] + v.new_params) | |
24 self._fn_cache = {} | |
18 | 25 |
19 def update(self, trainset, stopper=None): | 26 def __copy__(self): |
20 """Update this model from more training data.""" | 27 return _Model(self.algo, [copy.copy(p) for p in params]) |
21 v = self.nnet.v | |
22 params = self.params | |
23 update_fn = _function([v.input, v.target] + v.params, [v.nll] + v.new_params) | |
24 if stopper is not None: | |
25 raise NotImplementedError() | |
26 else: | |
27 for i in xrange(100): | |
28 for input, target in trainset.minibatches(['input', 'target'], | |
29 minibatch_size=min(32, len(trainset))): | |
30 results = update_fn(input, target[:,0], *params) | |
31 if 0: print results[0] | |
32 # print params['b'] | |
33 | 28 |
34 def __call__(self, testset, | 29 def update(self, input_target): |
35 output_fieldnames=['output_class'], | 30 """Update this model from more training data.""" |
36 test_stats_collector=None, | 31 params = self.params |
37 copy_inputs=False, | 32 #TODO: why should we have to unpack target like this? |
38 put_stats_in_output_dataset=True, | 33 for input, target in input_target: |
39 output_attributes=[]): | 34 self.update_fn(input, target[:,0], *params) |
40 """Apply this model (as a function) to new data""" | |
41 inputs = [self.nnet.v.input, self.nnet.v.target] + self.nnet.v.params | |
42 fn = _function(inputs, [getattr(self.nnet.v, name) for name in output_fieldnames]) | |
43 if 'target' in testset.fieldNames(): | |
44 return dataset.ApplyFunctionDataSet(testset, | |
45 lambda input, target: fn(input, target[:,0], *self.params), | |
46 output_fieldnames) | |
47 else: | |
48 return dataset.ApplyFunctionDataSet(testset, | |
49 lambda input: fn(input, numpy.zeros(1,dtype='int64'), *self.params), | |
50 output_fieldnames) | |
51 | 35 |
52 def __init__(self, ninputs, nhid, nclass, lr, nepochs, | 36 def __call__(self, testset, fieldnames=['output_class']): |
53 l2coef=0.0, | 37 """Apply this model (as a function) to new data""" |
54 linker='c&yp', | 38 #TODO: cache fn between calls |
55 hidden_layer=None): | 39 assert 'input' == testset.fieldNames()[0] |
56 if not hidden_layer: | 40 assert len(testset.fieldNames()) <= 2 |
57 hidden_layer = AffineSigmoidLayer("hidden",ninputs,nhid,l2coef) | 41 v = self.algo.v |
58 class Vars: | 42 outputs = [getattr(v, name) for name in fieldnames] |
59 def __init__(self, lr, l2coef): | 43 inputs = [v.input] + ([v.target] if 'target' in testset else []) |
44 inputs.extend(v.params) | |
45 theano_fn = _cache(self._fn_cache, (tuple(inputs), tuple(outputs)), | |
46 lambda: self.algo._fn(inputs, outputs)) | |
47 lambda_fn = lambda *args: theano_fn(*(list(args) + self.params)) | |
48 return dataset.ApplyFunctionDataSet(testset, lambda_fn, fieldnames) | |
49 | |
50 class AutonameVars(object): | |
51 def __init__(self, dct): | |
52 for key, val in dct.items(): | |
53 if type(key) is str and hasattr(val, 'name'): | |
54 val.name = key | |
55 self.__dict__.update(dct) | |
56 | |
57 class MultiLayerPerceptron(object): | |
58 | |
59 def __init__(self, ninputs, nhid, nclass, lr, | |
60 l2coef=0.0, | |
61 linker='c&py', | |
62 hidden_layer=None, | |
63 early_stopper=None, | |
64 validation_portion=0.2, | |
65 V_extern=None): | |
66 class V_intern(AutonameVars): | |
67 def __init__(v_self, lr, l2coef, **kwargs): | |
60 lr = t.constant(lr) | 68 lr = t.constant(lr) |
61 l2coef = t.constant(l2coef) | 69 l2coef = t.constant(l2coef) |
62 input = t.matrix('input') # n_examples x n_inputs | 70 input = t.matrix() # n_examples x n_inputs |
63 target = t.ivector('target') # n_examples x 1 | 71 target = t.ivector() # len: n_examples |
64 W2 = t.matrix('W2') | 72 W2, b2 = t.matrix(), t.vector() |
65 b2 = t.vector('b2') | |
66 | 73 |
67 hid = hidden_layer(input) | 74 if hidden_layer: |
68 hid_params = hidden_layer.params() | 75 hid, hid_params, hid_ivals, hid_regularization = hidden_layer(input) |
69 hid_params_init_vals = hidden_layer.params_ivals() | 76 else: |
70 hid_regularization = hidden_layer.regularization() | 77 W1, b1 = t.matrix(), t.vector() |
71 | 78 hid = t.tanh(b1 + t.dot(input, W1)) |
79 hid_params = [W1, b1] | |
80 hid_regularization = l2coef * t.sum(W1*W1) | |
81 hid_ivals = lambda : [_randshape(ninputs, nhid), _randshape(nhid)] | |
82 | |
72 params = [W2, b2] + hid_params | 83 params = [W2, b2] + hid_params |
73 nll, predictions = nnet_ops.crossentropy_softmax_1hot( b2 + t.dot(hid, W2), target) | 84 activations = b2 + t.dot(hid, W2) |
85 nll, predictions = nnet_ops.crossentropy_softmax_1hot(activations, target) | |
74 regularization = l2coef * t.sum(W2*W2) + hid_regularization | 86 regularization = l2coef * t.sum(W2*W2) + hid_regularization |
75 output_class = t.argmax(predictions,1) | 87 output_class = t.argmax(activations,1) |
76 loss_01 = t.neq(output_class, target) | 88 loss_01 = t.neq(output_class, target) |
77 g_params = t.grad(nll + regularization, params) | 89 g_params = t.grad(nll + regularization, params) |
78 new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)] | 90 new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)] |
79 setattr_and_name(self, locals()) | 91 self.__dict__.update(locals()); del self.self |
92 AutonameVars.__init__(v_self, locals()) | |
80 self.nhid = nhid | 93 self.nhid = nhid |
81 self.nclass = nclass | 94 self.nclass = nclass |
82 self.nepochs = nepochs | 95 self.v = V_intern(**locals()) if V_extern is None else V_extern(**locals()) |
83 self.v = Vars(lr, l2coef) | 96 self.linker = linker |
84 self.params = None | 97 self.early_stopper = early_stopper if early_stopper is not None else lambda: stopper.NStages(10,1) |
98 self.validation_portion = validation_portion | |
99 | |
100 def _fn(self, inputs, outputs): | |
101 # Caching here would hamper multi-threaded apps | |
102 # prefer caching in _Model.__call__ | |
103 return theano.function(inputs, outputs, unpack_single=False, linker=self.linker) | |
85 | 104 |
86 def __call__(self, trainset=None, iparams=None): | 105 def __call__(self, trainset=None, iparams=None): |
106 """Allocate and optionally train a model""" | |
87 if iparams is None: | 107 if iparams is None: |
88 iparams = LookupList(["W","b"],[_randshape(self.nhid, self.nclass), _randshape(self.nclass)]) | 108 iparams = [_randshape(self.nhid, self.nclass), _randshape(self.nclass)]\ |
89 + self.v.hid_params_init_vals() | 109 + self.v.hid_ivals() |
90 rval = NeuralNet.Model(self, iparams) | 110 rval = _Model(self, iparams) |
91 if trainset: | 111 if trainset: |
92 rval.update(trainset) | 112 if len(trainset) == sys.maxint: |
113 raise NotImplementedError('Learning from infinite streams is not supported') | |
114 nval = int(self.validation_portion * len(trainset)) | |
115 nmin = len(trainset) - nval | |
116 assert nmin >= 0 | |
117 minset = trainset[:nmin] #real training set for minimizing loss | |
118 valset = trainset[nmin:] #validation set for early stopping | |
119 best = rval | |
120 for stp in self.early_stopper(): | |
121 rval.update( | |
122 trainset.minibatches(['input', 'target'], minibatch_size=min(32, | |
123 len(trainset)))) | |
124 if stp.set_score: | |
125 stp.score = rval(valset, ['loss_01']) | |
126 if (stp.score < stp.best_score): | |
127 best = copy.copy(rval) | |
128 rval = best | |
93 return rval | 129 return rval |
94 | 130 |
95 | 131 |
96 def setattr_and_name(self, dict): | 132 import unittest |
97 """This will do a self.__setattr__ for all elements in the dict | |
98 (except for element self). In addition it will make sure that | |
99 each element's .name (if it exists) is set to the element's key | |
100 in the dicitonary. | |
101 Typical usage: setattr_and_name(self, locals()) """ | |
102 for varname,var in locals.items(): | |
103 if var is not self: | |
104 if hasattr(var,"name") and not var.name: | |
105 var.name=varname | |
106 self.__setattr__(varname,var) | |
107 | 133 |
134 class TestMLP(unittest.TestCase): | |
135 def test0(self): | |
136 | |
137 training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], | |
138 [0, 1, 1], | |
139 [1, 0, 1], | |
140 [1, 1, 1]]), | |
141 {'input':slice(2),'target':2}) | |
142 training_set2 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], | |
143 [0, 1, 1], | |
144 [1, 0, 0], | |
145 [1, 1, 1]]), | |
146 {'input':slice(2),'target':2}) | |
147 test_data = dataset.ArrayDataSet(numpy.array([[0, 0, 0], | |
148 [0, 1, 1], | |
149 [1, 0, 0], | |
150 [1, 1, 1]]), | |
151 {'input':slice(2)}) | |
152 | |
153 learn_algo = MultiLayerPerceptron(2, 10, 2, .1 | |
154 , linker='c&py' | |
155 , early_stopper = lambda:stopper.NStages(100,1)) | |
156 | |
157 model1 = learn_algo(training_set1) | |
158 | |
159 model2 = learn_algo(training_set2) | |
160 | |
161 n_match = 0 | |
162 for o1, o2 in zip(model1(test_data), model2(test_data)): | |
163 #print o1 | |
164 #print o2 | |
165 n_match += (o1 == o2) | |
166 | |
167 assert n_match == (numpy.sum(training_set1.fields()['target'] == | |
168 training_set2.fields()['target'])) | |
108 | 169 |
109 if __name__ == '__main__': | 170 if __name__ == '__main__': |
110 training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], | 171 unittest.main() |
111 [0, 1, 1], | |
112 [1, 0, 1], | |
113 [1, 1, 1]]), | |
114 {'input':slice(2),'target':2}) | |
115 training_set2 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], | |
116 [0, 1, 1], | |
117 [1, 0, 0], | |
118 [1, 1, 1]]), | |
119 {'input':slice(2),'target':2}) | |
120 test_data = dataset.ArrayDataSet(numpy.array([[0, 0, 0], | |
121 [0, 1, 1], | |
122 [1, 0, 0], | |
123 [1, 1, 1]]), | |
124 {'input':slice(2)}) | |
125 | 172 |
126 | |
127 learn_algo = NeuralNet(2, 10, 3, .1, 1000) | |
128 | |
129 model = learn_algo() | |
130 | |
131 model1 = learn_algo(training_set1) | |
132 | |
133 model2 = learn_algo(training_set2) | |
134 | |
135 n_match = 0 | |
136 for o1, o2 in zip(model1(test_data), model2(test_data)): | |
137 n_match += (o1 == o2) | |
138 | |
139 print n_match, numpy.sum(training_set1.fields()['target'] == | |
140 training_set2.fields()['target']) | |
141 |