Mercurial > pylearn
comparison mlp_factory_approach.py @ 244:3156a9976183
mlp_factory_approach.py, updated and un-deprecated by popular demand
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Mon, 02 Jun 2008 17:08:17 -0400 |
parents | c047238e5b3f |
children | a1793a5e9523 |
comparison
equal
deleted
inserted
replaced
235:a70f2c973ea5 | 244:3156a9976183 |
---|---|
1 """ | |
2 | |
3 | |
4 | |
5 This file is deprecated. I'm continuing development in hpu/models.py. | |
6 | |
7 Get that project like this: hg clone ssh://user@lgcm/../bergstrj/hpu | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 """ | |
14 import copy, sys | 1 import copy, sys |
15 import numpy | 2 import numpy |
16 | 3 |
17 import theano | 4 import theano |
18 from theano import tensor as t | 5 from theano import tensor as T |
19 | 6 |
20 from pylearn import dataset, nnet_ops, stopper | 7 from pylearn import dataset, nnet_ops, stopper, LookupList |
21 | 8 |
22 | 9 class AbstractFunction (Exception): pass |
23 def _randshape(*shape): | 10 |
24 return (numpy.random.rand(*shape) -0.5) * 0.001 | 11 class AutoName(object): |
25 | 12 """ |
26 def _cache(d, key, valfn): | 13 By inheriting from this class, class variables which have a name attribute |
27 #valfn() is only evaluated if key isn't in dictionary d | 14 will have that name attribute set to the class variable name. |
28 if key not in d: | 15 """ |
29 d[key] = valfn() | 16 class __metaclass__(type): |
30 return d[key] | 17 def __init__(cls, name, bases, dct): |
31 | 18 type.__init__(name, bases, dct) |
32 class _Model(object): | 19 for key, val in dct.items(): |
33 def __init__(self, algo, params): | 20 assert type(key) is str |
34 self.algo = algo | 21 if hasattr(val, 'name'): |
35 self.params = params | 22 val.name = key |
36 v = algo.v | 23 |
37 self.update_fn = algo._fn([v.input, v.target] + v.params, [v.nll] + v.new_params) | 24 class GraphLearner(object): |
38 self._fn_cache = {} | 25 class Model(object): |
39 | 26 def __init__(self, algo, params): |
40 def __copy__(self): | 27 self.algo = algo |
41 return _Model(self.algo, [copy.copy(p) for p in params]) | 28 self.params = params |
42 | 29 graph = self.algo.graph |
43 def update(self, input_target): | 30 self.update_fn = algo._fn([graph.input, graph.target] + graph.params, |
44 """Update this model from more training data.""" | 31 [graph.nll] + graph.new_params) |
45 params = self.params | 32 self._fn_cache = {} |
46 #TODO: why should we have to unpack target like this? | 33 |
47 # tbm : creates problem... | 34 def __copy__(self): |
48 for input, target in input_target: | 35 raise Exception('why not called?') |
49 rval= self.update_fn(input, target, *params) | 36 return GraphLearner.Model(self.algo, [copy.copy(p) for p in params]) |
50 #print rval[0] | 37 |
51 | 38 def _cache(self, key, valfn): |
52 def __call__(self, testset, fieldnames=['output_class'],input='input',target='target'): | 39 d = self._fn_cache |
53 """Apply this model (as a function) to new data""" | 40 if key not in d: |
54 #TODO: cache fn between calls | 41 d[key] = valfn() |
55 assert input == testset.fieldNames()[0] # why first one??? | 42 return d[key] |
56 assert len(testset.fieldNames()) <= 2 | 43 |
57 v = self.algo.v | 44 def update_minibatch(self, minibatch): |
58 outputs = [getattr(v, name) for name in fieldnames] | 45 assert isinstance(minibatch, LookupList) |
59 inputs = [v.input] + ([v.target] if target in testset else []) | 46 self.update_fn(minibatch['input'], minibatch['target'], *self.params) |
60 inputs.extend(v.params) | 47 |
61 theano_fn = _cache(self._fn_cache, (tuple(inputs), tuple(outputs)), | 48 def update(self, dataset, |
62 lambda: self.algo._fn(inputs, outputs)) | 49 default_minibatch_size=32): |
63 lambda_fn = lambda *args: theano_fn(*(list(args) + self.params)) | 50 """Update this model from more training data.""" |
64 return dataset.ApplyFunctionDataSet(testset, lambda_fn, fieldnames) | 51 params = self.params |
65 | 52 minibatch_size = min(default_minibatch_size, len(dataset)) |
66 class AutonameVars(object): | 53 for mb in dataset.minibatches(['input', 'target'], minibatch_size=minibatch_size): |
67 def __init__(self, dct): | 54 self.update_minibatch(mb) |
68 for key, val in dct.items(): | 55 |
69 if type(key) is str and hasattr(val, 'name'): | 56 def __call__(self, testset, fieldnames=['output_class']): |
70 val.name = key | 57 """Apply this model (as a function) to new data. |
71 self.__dict__.update(dct) | 58 |
72 | 59 @param testset: DataSet, whose fields feed Result terms in self.algo.g |
73 class MultiLayerPerceptron(object): | 60 @type testset: DataSet |
74 | 61 |
75 def __init__(self, ninputs, nhid, nclass, lr, | 62 @param fieldnames: names of results in self.algo.g to compute. |
76 l2coef=0.0, | 63 @type fieldnames: list of strings |
77 linker='c&py', | 64 |
78 hidden_layer=None, | 65 @return: DataSet with fields from fieldnames, computed from testset by |
79 early_stopper=None, | 66 this model. |
80 validation_portion=0.2, | 67 @rtype: ApplyFunctionDataSet instance |
81 V_extern=None): | 68 |
82 class V_intern(AutonameVars): | 69 """ |
83 def __init__(v_self, lr, l2coef, **kwargs): | 70 graph = self.algo.graph |
84 lr = t.constant(lr) | 71 def getresult(name): |
85 l2coef = t.constant(l2coef) | 72 r = getattr(graph, name) |
86 input = t.matrix() # n_examples x n_inputs | 73 if not isinstance(r, theano.Result): |
87 target = t.ivector() # len: n_examples | 74 raise TypeError('string does not name a theano.Result', (name, r)) |
88 W2, b2 = t.matrix(), t.vector() | 75 return r |
89 | 76 |
90 if hidden_layer: | 77 provided = [getresult(name) for name in testset.fieldNames()] |
91 hid, hid_params, hid_ivals, hid_regularization = hidden_layer(input) | 78 wanted = [getresult(name) for name in fieldnames] |
92 else: | 79 inputs = provided + graph.params |
93 W1, b1 = t.matrix(), t.vector() | 80 |
94 hid = t.tanh(b1 + t.dot(input, W1)) | 81 theano_fn = self._cache((tuple(inputs), tuple(wanted)), |
95 hid_params = [W1, b1] | 82 lambda: self.algo._fn(inputs, wanted)) |
96 hid_regularization = l2coef * t.sum(W1*W1) | 83 lambda_fn = lambda *args: theano_fn(*(list(args) + self.params)) |
97 hid_ivals = lambda : [_randshape(ninputs, nhid), _randshape(nhid)] | 84 return dataset.ApplyFunctionDataSet(testset, lambda_fn, fieldnames) |
98 | 85 |
99 params = [W2, b2] + hid_params | 86 class Graph(object): |
100 activations = b2 + t.dot(hid, W2) | 87 class Opt(object): |
101 nll, predictions = nnet_ops.crossentropy_softmax_1hot(activations, target) | 88 merge = theano.gof.MergeOptimizer() |
102 regularization = l2coef * t.sum(W2*W2) + hid_regularization | 89 gemm_opt_1 = theano.gof.TopoOptimizer(theano.tensor_opt.gemm_pattern_1) |
103 output_class = t.argmax(activations,1) | 90 sqr_opt_0 = theano.gof.TopoOptimizer(theano.gof.PatternSub( |
104 loss_01 = t.neq(output_class, target) | 91 (T.mul,'x', 'x'), |
105 g_params = t.grad(nll + regularization, params) | 92 (T.sqr, 'x'))) |
106 new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)] | 93 |
107 self.__dict__.update(locals()); del self.self | 94 def __init__(self, do_sqr=True): |
108 AutonameVars.__init__(v_self, locals()) | 95 self.do_sqr = do_sqr |
109 self.nhid = nhid | 96 |
110 self.nclass = nclass | 97 def __call__(self, env): |
111 self.v = V_intern(**locals()) if V_extern is None else V_extern(**locals()) | 98 self.merge(env) |
112 self.linker = linker | 99 self.gemm_opt_1(env) |
113 self.early_stopper = early_stopper if early_stopper is not None else lambda: stopper.NStages(10,1) | 100 if self.do_sqr: |
114 self.validation_portion = validation_portion | 101 self.sqr_opt_0(env) |
102 self.merge(env) | |
103 | |
104 def linker(self): | |
105 return theano.gof.PerformLinker() | |
106 | |
107 def early_stopper(self): | |
108 stopper.NStages(10,1) | |
109 | |
110 def train_iter(self, trainset): | |
111 raise AbstractFunction | |
112 optimizer = Opt() | |
113 | |
114 def __init__(self, graph): | |
115 self.graph = graph | |
115 | 116 |
116 def _fn(self, inputs, outputs): | 117 def _fn(self, inputs, outputs): |
117 # Caching here would hamper multi-threaded apps | 118 # Caching here would hamper multi-threaded apps |
118 # prefer caching in _Model.__call__ | 119 # prefer caching in Model.__call__ |
119 return theano.function(inputs, outputs, unpack_single=False, linker=self.linker) | 120 return theano.function(inputs, outputs, |
120 | 121 unpack_single=False, |
121 def __call__(self, trainset=None, iparams=None, input='input', target='target'): | 122 optimizer=self.graph.optimizer, |
122 """Allocate and optionally train a model""" | 123 linker=self.graph.linker() if hasattr(self.graph, 'linker') |
123 if iparams is None: | 124 else 'c&py') |
124 iparams = [_randshape(self.nhid, self.nclass), _randshape(self.nclass)]\ | 125 |
125 + self.v.hid_ivals() | 126 def __call__(self, |
126 rval = _Model(self, iparams) | 127 trainset=None, |
127 if trainset: | 128 validset=None, |
128 if len(trainset) == sys.maxint: | 129 iparams=None): |
129 raise NotImplementedError('Learning from infinite streams is not supported') | 130 """Allocate and optionally train a model |
130 nval = int(self.validation_portion * len(trainset)) | 131 |
131 nmin = len(trainset) - nval | 132 @param trainset: Data for minimizing the cost function |
132 assert nmin >= 0 | 133 @type trainset: None or Dataset |
133 minset = trainset[:nmin] #real training set for minimizing loss | 134 |
134 valset = trainset[nmin:] #validation set for early stopping | 135 @param validset: Data for early stopping |
135 best = rval | 136 @type validset: None or Dataset |
136 for stp in self.early_stopper(): | 137 |
137 rval.update( | 138 @param input: name of field to use as input |
138 minset.minibatches([input, target], minibatch_size=min(32, | 139 @type input: string |
139 len(minset)))) | 140 |
140 #print 'mlp.__call__(), we did an update' | 141 @param target: name of field to use as target |
142 @type target: string | |
143 | |
144 @return: model | |
145 @rtype: GraphLearner.Model instance | |
146 | |
147 """ | |
148 iparams = self.graph.iparams() if iparams is None else iparams | |
149 curmodel = GraphLearner.Model(self, iparams) | |
150 best = curmodel | |
151 | |
152 if trainset is not None: | |
153 #do some training by calling Model.update_minibatch() | |
154 stp = self.graph.early_stopper() | |
155 for mb in self.graph.train_iter(trainset): | |
156 curmodel.update_minibatch(mb) | |
141 if stp.set_score: | 157 if stp.set_score: |
142 stp.score = rval(valset, ['loss_01']) | 158 if validset: |
143 if (stp.score < stp.best_score): | 159 stp.score = curmodel(validset, ['validset_score']) |
144 best = copy.copy(rval) | 160 if (stp.score < stp.best_score): |
145 rval = best | 161 best = copy.copy(curmodel) |
146 return rval | 162 else: |
163 stp.score = 0.0 | |
164 stp.next() | |
165 if validset: | |
166 curmodel = best | |
167 return curmodel | |
168 | |
169 def graphMLP(ninputs, nhid, nclass, lr_val, l2coef_val=0.0): | |
170 def wrapper(i, node, thunk): | |
171 if 0: | |
172 print i, node | |
173 print thunk.inputs | |
174 print thunk.outputs | |
175 if node.op == nnet_ops.crossentropy_softmax_1hot_with_bias: | |
176 print 'here is the nll op' | |
177 thunk() #actually compute this piece of the graph | |
178 | |
179 class G(GraphLearner.Graph, AutoName): | |
180 | |
181 lr = T.constant(lr_val) | |
182 assert l2coef_val == 0.0 | |
183 l2coef = T.constant(l2coef_val) | |
184 input = T.matrix() # n_examples x n_inputs | |
185 target = T.ivector() # len: n_examples | |
186 W2, b2 = T.matrix(), T.vector() | |
187 | |
188 W1, b1 = T.matrix(), T.vector() | |
189 hid = T.tanh(b1 + T.dot(input, W1)) | |
190 hid_regularization = l2coef * T.sum(W1*W1) | |
191 | |
192 params = [W1, b1, W2, b2] | |
193 activations = b2 + T.dot(hid, W2) | |
194 nll, predictions = nnet_ops.crossentropy_softmax_1hot(activations, target) | |
195 regularization = l2coef * T.sum(W2*W2) + hid_regularization | |
196 output_class = T.argmax(activations,1) | |
197 loss_01 = T.neq(output_class, target) | |
198 #g_params = T.grad(nll + regularization, params) | |
199 g_params = T.grad(nll, params) | |
200 new_params = [T.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)] | |
201 | |
202 def iparams(self): | |
203 def randsmall(*shape): | |
204 return (numpy.random.rand(*shape) -0.5) * 0.001 | |
205 return [randsmall(ninputs, nhid) | |
206 , randsmall(nhid) | |
207 , randsmall(nhid, nclass) | |
208 , randsmall(nclass)] | |
209 | |
210 def train_iter(self, trainset): | |
211 return trainset.minibatches(['input', 'target'], | |
212 minibatch_size=min(len(trainset), 32), n_batches=300) | |
213 def early_stopper(self): | |
214 return stopper.NStages(300,1) | |
215 | |
216 return G() | |
147 | 217 |
148 | 218 |
149 import unittest | 219 import unittest |
150 | 220 |
151 class TestMLP(unittest.TestCase): | 221 class TestMLP(unittest.TestCase): |
152 def test0(self): | 222 def blah(self, g): |
153 | |
154 training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], | 223 training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], |
155 [0, 1, 1], | 224 [0, 1, 1], |
156 [1, 0, 1], | 225 [1, 0, 1], |
157 [1, 1, 1]]), | 226 [1, 1, 1]]), |
158 {'input':slice(2),'target':2}) | 227 {'input':slice(2),'target':2}) |
165 [0, 1, 1], | 234 [0, 1, 1], |
166 [1, 0, 0], | 235 [1, 0, 0], |
167 [1, 1, 1]]), | 236 [1, 1, 1]]), |
168 {'input':slice(2)}) | 237 {'input':slice(2)}) |
169 | 238 |
170 learn_algo = MultiLayerPerceptron(2, 10, 2, .1 | 239 learn_algo = GraphLearner(g) |
171 , linker='c&py' | |
172 , early_stopper = lambda:stopper.NStages(100,1)) | |
173 | 240 |
174 model1 = learn_algo(training_set1) | 241 model1 = learn_algo(training_set1) |
175 | 242 |
176 model2 = learn_algo(training_set2) | 243 model2 = learn_algo(training_set2) |
177 | 244 |
178 n_match = 0 | 245 omatch = [o1 == o2 for o1, o2 in zip(model1(test_data), |
179 for o1, o2 in zip(model1(test_data), model2(test_data)): | 246 model2(test_data))] |
180 #print o1 | 247 |
181 #print o2 | 248 n_match = sum(omatch) |
182 n_match += (o1 == o2) | 249 |
183 | 250 self.failUnless(n_match == (numpy.sum(training_set1.fields()['target'] == |
184 assert n_match == (numpy.sum(training_set1.fields()['target'] == | 251 training_set2.fields()['target'])), omatch) |
185 training_set2.fields()['target'])) | 252 |
253 def equiv(self, g0, g1): | |
254 training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], | |
255 [0, 1, 1], | |
256 [1, 0, 1], | |
257 [1, 1, 1]]), | |
258 {'input':slice(2),'target':2}) | |
259 learn_algo_0 = GraphLearner(g0) | |
260 learn_algo_1 = GraphLearner(g1) | |
261 | |
262 model_0 = learn_algo_0(training_set1) | |
263 model_1 = learn_algo_1(training_set1) | |
264 | |
265 print '----' | |
266 for p in zip(model_0.params, model_1.params): | |
267 abs_rel_err = theano.gradient.numeric_grad.abs_rel_err(p[0], p[1]) | |
268 max_abs_rel_err = numpy.max(abs_rel_err) | |
269 if max_abs_rel_err > 1.0e-7: | |
270 print 'p0', p[0] | |
271 print 'p1', p[1] | |
272 #self.failUnless(max_abs_rel_err < 1.0e-7, max_abs_rel_err) | |
273 | |
274 | |
275 def test0(self): self.blah(graphMLP(2, 10, 2, .1)) | |
276 def test1(self): self.blah(graphMLP(2, 3, 2, .1)) | |
186 | 277 |
187 if __name__ == '__main__': | 278 if __name__ == '__main__': |
188 unittest.main() | 279 unittest.main() |
189 | 280 |
281 |