comparison mlp_factory_approach.py @ 244:3156a9976183

mlp_factory_approach.py, updated and un-deprecated by popular demand
author James Bergstra <bergstrj@iro.umontreal.ca>
date Mon, 02 Jun 2008 17:08:17 -0400
parents c047238e5b3f
children a1793a5e9523
comparison
equal deleted inserted replaced
235:a70f2c973ea5 244:3156a9976183
1 """
2
3
4
5 This file is deprecated. I'm continuing development in hpu/models.py.
6
7 Get that project like this: hg clone ssh://user@lgcm/../bergstrj/hpu
8
9
10
11
12
13 """
14 import copy, sys 1 import copy, sys
15 import numpy 2 import numpy
16 3
17 import theano 4 import theano
18 from theano import tensor as t 5 from theano import tensor as T
19 6
20 from pylearn import dataset, nnet_ops, stopper 7 from pylearn import dataset, nnet_ops, stopper, LookupList
21 8
22 9 class AbstractFunction (Exception): pass
23 def _randshape(*shape): 10
24 return (numpy.random.rand(*shape) -0.5) * 0.001 11 class AutoName(object):
25 12 """
26 def _cache(d, key, valfn): 13 By inheriting from this class, class variables which have a name attribute
27 #valfn() is only evaluated if key isn't in dictionary d 14 will have that name attribute set to the class variable name.
28 if key not in d: 15 """
29 d[key] = valfn() 16 class __metaclass__(type):
30 return d[key] 17 def __init__(cls, name, bases, dct):
31 18 type.__init__(name, bases, dct)
32 class _Model(object): 19 for key, val in dct.items():
33 def __init__(self, algo, params): 20 assert type(key) is str
34 self.algo = algo 21 if hasattr(val, 'name'):
35 self.params = params 22 val.name = key
36 v = algo.v 23
37 self.update_fn = algo._fn([v.input, v.target] + v.params, [v.nll] + v.new_params) 24 class GraphLearner(object):
38 self._fn_cache = {} 25 class Model(object):
39 26 def __init__(self, algo, params):
40 def __copy__(self): 27 self.algo = algo
41 return _Model(self.algo, [copy.copy(p) for p in params]) 28 self.params = params
42 29 graph = self.algo.graph
43 def update(self, input_target): 30 self.update_fn = algo._fn([graph.input, graph.target] + graph.params,
44 """Update this model from more training data.""" 31 [graph.nll] + graph.new_params)
45 params = self.params 32 self._fn_cache = {}
46 #TODO: why should we have to unpack target like this? 33
47 # tbm : creates problem... 34 def __copy__(self):
48 for input, target in input_target: 35 raise Exception('why not called?')
49 rval= self.update_fn(input, target, *params) 36 return GraphLearner.Model(self.algo, [copy.copy(p) for p in params])
50 #print rval[0] 37
51 38 def _cache(self, key, valfn):
52 def __call__(self, testset, fieldnames=['output_class'],input='input',target='target'): 39 d = self._fn_cache
53 """Apply this model (as a function) to new data""" 40 if key not in d:
54 #TODO: cache fn between calls 41 d[key] = valfn()
55 assert input == testset.fieldNames()[0] # why first one??? 42 return d[key]
56 assert len(testset.fieldNames()) <= 2 43
57 v = self.algo.v 44 def update_minibatch(self, minibatch):
58 outputs = [getattr(v, name) for name in fieldnames] 45 assert isinstance(minibatch, LookupList)
59 inputs = [v.input] + ([v.target] if target in testset else []) 46 self.update_fn(minibatch['input'], minibatch['target'], *self.params)
60 inputs.extend(v.params) 47
61 theano_fn = _cache(self._fn_cache, (tuple(inputs), tuple(outputs)), 48 def update(self, dataset,
62 lambda: self.algo._fn(inputs, outputs)) 49 default_minibatch_size=32):
63 lambda_fn = lambda *args: theano_fn(*(list(args) + self.params)) 50 """Update this model from more training data."""
64 return dataset.ApplyFunctionDataSet(testset, lambda_fn, fieldnames) 51 params = self.params
65 52 minibatch_size = min(default_minibatch_size, len(dataset))
66 class AutonameVars(object): 53 for mb in dataset.minibatches(['input', 'target'], minibatch_size=minibatch_size):
67 def __init__(self, dct): 54 self.update_minibatch(mb)
68 for key, val in dct.items(): 55
69 if type(key) is str and hasattr(val, 'name'): 56 def __call__(self, testset, fieldnames=['output_class']):
70 val.name = key 57 """Apply this model (as a function) to new data.
71 self.__dict__.update(dct) 58
72 59 @param testset: DataSet, whose fields feed Result terms in self.algo.g
73 class MultiLayerPerceptron(object): 60 @type testset: DataSet
74 61
75 def __init__(self, ninputs, nhid, nclass, lr, 62 @param fieldnames: names of results in self.algo.g to compute.
76 l2coef=0.0, 63 @type fieldnames: list of strings
77 linker='c&py', 64
78 hidden_layer=None, 65 @return: DataSet with fields from fieldnames, computed from testset by
79 early_stopper=None, 66 this model.
80 validation_portion=0.2, 67 @rtype: ApplyFunctionDataSet instance
81 V_extern=None): 68
82 class V_intern(AutonameVars): 69 """
83 def __init__(v_self, lr, l2coef, **kwargs): 70 graph = self.algo.graph
84 lr = t.constant(lr) 71 def getresult(name):
85 l2coef = t.constant(l2coef) 72 r = getattr(graph, name)
86 input = t.matrix() # n_examples x n_inputs 73 if not isinstance(r, theano.Result):
87 target = t.ivector() # len: n_examples 74 raise TypeError('string does not name a theano.Result', (name, r))
88 W2, b2 = t.matrix(), t.vector() 75 return r
89 76
90 if hidden_layer: 77 provided = [getresult(name) for name in testset.fieldNames()]
91 hid, hid_params, hid_ivals, hid_regularization = hidden_layer(input) 78 wanted = [getresult(name) for name in fieldnames]
92 else: 79 inputs = provided + graph.params
93 W1, b1 = t.matrix(), t.vector() 80
94 hid = t.tanh(b1 + t.dot(input, W1)) 81 theano_fn = self._cache((tuple(inputs), tuple(wanted)),
95 hid_params = [W1, b1] 82 lambda: self.algo._fn(inputs, wanted))
96 hid_regularization = l2coef * t.sum(W1*W1) 83 lambda_fn = lambda *args: theano_fn(*(list(args) + self.params))
97 hid_ivals = lambda : [_randshape(ninputs, nhid), _randshape(nhid)] 84 return dataset.ApplyFunctionDataSet(testset, lambda_fn, fieldnames)
98 85
99 params = [W2, b2] + hid_params 86 class Graph(object):
100 activations = b2 + t.dot(hid, W2) 87 class Opt(object):
101 nll, predictions = nnet_ops.crossentropy_softmax_1hot(activations, target) 88 merge = theano.gof.MergeOptimizer()
102 regularization = l2coef * t.sum(W2*W2) + hid_regularization 89 gemm_opt_1 = theano.gof.TopoOptimizer(theano.tensor_opt.gemm_pattern_1)
103 output_class = t.argmax(activations,1) 90 sqr_opt_0 = theano.gof.TopoOptimizer(theano.gof.PatternSub(
104 loss_01 = t.neq(output_class, target) 91 (T.mul,'x', 'x'),
105 g_params = t.grad(nll + regularization, params) 92 (T.sqr, 'x')))
106 new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)] 93
107 self.__dict__.update(locals()); del self.self 94 def __init__(self, do_sqr=True):
108 AutonameVars.__init__(v_self, locals()) 95 self.do_sqr = do_sqr
109 self.nhid = nhid 96
110 self.nclass = nclass 97 def __call__(self, env):
111 self.v = V_intern(**locals()) if V_extern is None else V_extern(**locals()) 98 self.merge(env)
112 self.linker = linker 99 self.gemm_opt_1(env)
113 self.early_stopper = early_stopper if early_stopper is not None else lambda: stopper.NStages(10,1) 100 if self.do_sqr:
114 self.validation_portion = validation_portion 101 self.sqr_opt_0(env)
102 self.merge(env)
103
104 def linker(self):
105 return theano.gof.PerformLinker()
106
107 def early_stopper(self):
108 stopper.NStages(10,1)
109
110 def train_iter(self, trainset):
111 raise AbstractFunction
112 optimizer = Opt()
113
114 def __init__(self, graph):
115 self.graph = graph
115 116
116 def _fn(self, inputs, outputs): 117 def _fn(self, inputs, outputs):
117 # Caching here would hamper multi-threaded apps 118 # Caching here would hamper multi-threaded apps
118 # prefer caching in _Model.__call__ 119 # prefer caching in Model.__call__
119 return theano.function(inputs, outputs, unpack_single=False, linker=self.linker) 120 return theano.function(inputs, outputs,
120 121 unpack_single=False,
121 def __call__(self, trainset=None, iparams=None, input='input', target='target'): 122 optimizer=self.graph.optimizer,
122 """Allocate and optionally train a model""" 123 linker=self.graph.linker() if hasattr(self.graph, 'linker')
123 if iparams is None: 124 else 'c&py')
124 iparams = [_randshape(self.nhid, self.nclass), _randshape(self.nclass)]\ 125
125 + self.v.hid_ivals() 126 def __call__(self,
126 rval = _Model(self, iparams) 127 trainset=None,
127 if trainset: 128 validset=None,
128 if len(trainset) == sys.maxint: 129 iparams=None):
129 raise NotImplementedError('Learning from infinite streams is not supported') 130 """Allocate and optionally train a model
130 nval = int(self.validation_portion * len(trainset)) 131
131 nmin = len(trainset) - nval 132 @param trainset: Data for minimizing the cost function
132 assert nmin >= 0 133 @type trainset: None or Dataset
133 minset = trainset[:nmin] #real training set for minimizing loss 134
134 valset = trainset[nmin:] #validation set for early stopping 135 @param validset: Data for early stopping
135 best = rval 136 @type validset: None or Dataset
136 for stp in self.early_stopper(): 137
137 rval.update( 138 @param input: name of field to use as input
138 minset.minibatches([input, target], minibatch_size=min(32, 139 @type input: string
139 len(minset)))) 140
140 #print 'mlp.__call__(), we did an update' 141 @param target: name of field to use as target
142 @type target: string
143
144 @return: model
145 @rtype: GraphLearner.Model instance
146
147 """
148 iparams = self.graph.iparams() if iparams is None else iparams
149 curmodel = GraphLearner.Model(self, iparams)
150 best = curmodel
151
152 if trainset is not None:
153 #do some training by calling Model.update_minibatch()
154 stp = self.graph.early_stopper()
155 for mb in self.graph.train_iter(trainset):
156 curmodel.update_minibatch(mb)
141 if stp.set_score: 157 if stp.set_score:
142 stp.score = rval(valset, ['loss_01']) 158 if validset:
143 if (stp.score < stp.best_score): 159 stp.score = curmodel(validset, ['validset_score'])
144 best = copy.copy(rval) 160 if (stp.score < stp.best_score):
145 rval = best 161 best = copy.copy(curmodel)
146 return rval 162 else:
163 stp.score = 0.0
164 stp.next()
165 if validset:
166 curmodel = best
167 return curmodel
168
169 def graphMLP(ninputs, nhid, nclass, lr_val, l2coef_val=0.0):
170 def wrapper(i, node, thunk):
171 if 0:
172 print i, node
173 print thunk.inputs
174 print thunk.outputs
175 if node.op == nnet_ops.crossentropy_softmax_1hot_with_bias:
176 print 'here is the nll op'
177 thunk() #actually compute this piece of the graph
178
179 class G(GraphLearner.Graph, AutoName):
180
181 lr = T.constant(lr_val)
182 assert l2coef_val == 0.0
183 l2coef = T.constant(l2coef_val)
184 input = T.matrix() # n_examples x n_inputs
185 target = T.ivector() # len: n_examples
186 W2, b2 = T.matrix(), T.vector()
187
188 W1, b1 = T.matrix(), T.vector()
189 hid = T.tanh(b1 + T.dot(input, W1))
190 hid_regularization = l2coef * T.sum(W1*W1)
191
192 params = [W1, b1, W2, b2]
193 activations = b2 + T.dot(hid, W2)
194 nll, predictions = nnet_ops.crossentropy_softmax_1hot(activations, target)
195 regularization = l2coef * T.sum(W2*W2) + hid_regularization
196 output_class = T.argmax(activations,1)
197 loss_01 = T.neq(output_class, target)
198 #g_params = T.grad(nll + regularization, params)
199 g_params = T.grad(nll, params)
200 new_params = [T.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)]
201
202 def iparams(self):
203 def randsmall(*shape):
204 return (numpy.random.rand(*shape) -0.5) * 0.001
205 return [randsmall(ninputs, nhid)
206 , randsmall(nhid)
207 , randsmall(nhid, nclass)
208 , randsmall(nclass)]
209
210 def train_iter(self, trainset):
211 return trainset.minibatches(['input', 'target'],
212 minibatch_size=min(len(trainset), 32), n_batches=300)
213 def early_stopper(self):
214 return stopper.NStages(300,1)
215
216 return G()
147 217
148 218
149 import unittest 219 import unittest
150 220
151 class TestMLP(unittest.TestCase): 221 class TestMLP(unittest.TestCase):
152 def test0(self): 222 def blah(self, g):
153
154 training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], 223 training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
155 [0, 1, 1], 224 [0, 1, 1],
156 [1, 0, 1], 225 [1, 0, 1],
157 [1, 1, 1]]), 226 [1, 1, 1]]),
158 {'input':slice(2),'target':2}) 227 {'input':slice(2),'target':2})
165 [0, 1, 1], 234 [0, 1, 1],
166 [1, 0, 0], 235 [1, 0, 0],
167 [1, 1, 1]]), 236 [1, 1, 1]]),
168 {'input':slice(2)}) 237 {'input':slice(2)})
169 238
170 learn_algo = MultiLayerPerceptron(2, 10, 2, .1 239 learn_algo = GraphLearner(g)
171 , linker='c&py'
172 , early_stopper = lambda:stopper.NStages(100,1))
173 240
174 model1 = learn_algo(training_set1) 241 model1 = learn_algo(training_set1)
175 242
176 model2 = learn_algo(training_set2) 243 model2 = learn_algo(training_set2)
177 244
178 n_match = 0 245 omatch = [o1 == o2 for o1, o2 in zip(model1(test_data),
179 for o1, o2 in zip(model1(test_data), model2(test_data)): 246 model2(test_data))]
180 #print o1 247
181 #print o2 248 n_match = sum(omatch)
182 n_match += (o1 == o2) 249
183 250 self.failUnless(n_match == (numpy.sum(training_set1.fields()['target'] ==
184 assert n_match == (numpy.sum(training_set1.fields()['target'] == 251 training_set2.fields()['target'])), omatch)
185 training_set2.fields()['target'])) 252
253 def equiv(self, g0, g1):
254 training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
255 [0, 1, 1],
256 [1, 0, 1],
257 [1, 1, 1]]),
258 {'input':slice(2),'target':2})
259 learn_algo_0 = GraphLearner(g0)
260 learn_algo_1 = GraphLearner(g1)
261
262 model_0 = learn_algo_0(training_set1)
263 model_1 = learn_algo_1(training_set1)
264
265 print '----'
266 for p in zip(model_0.params, model_1.params):
267 abs_rel_err = theano.gradient.numeric_grad.abs_rel_err(p[0], p[1])
268 max_abs_rel_err = numpy.max(abs_rel_err)
269 if max_abs_rel_err > 1.0e-7:
270 print 'p0', p[0]
271 print 'p1', p[1]
272 #self.failUnless(max_abs_rel_err < 1.0e-7, max_abs_rel_err)
273
274
275 def test0(self): self.blah(graphMLP(2, 10, 2, .1))
276 def test1(self): self.blah(graphMLP(2, 3, 2, .1))
186 277
187 if __name__ == '__main__': 278 if __name__ == '__main__':
188 unittest.main() 279 unittest.main()
189 280
281