Mercurial > pylearn
comparison pylearn/algorithms/regressor.py @ 537:b054271b2504
new file structure layout, factories, etc.
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Wed, 12 Nov 2008 21:57:54 -0500 |
parents | algorithms/regressor.py@2b0e10ac6929 |
children | ba65e95d1221 |
comparison
equal
deleted
inserted
replaced
518:4aa7f74ea93f | 537:b054271b2504 |
---|---|
1 | |
2 import theano | |
3 from theano import tensor as T | |
4 from theano.tensor import nnet as NN | |
5 import numpy as N | |
6 | |
7 class Regressor(theano.FancyModule): | |
8 | |
9 def __init__(self, input = None, target = None, regularize = True): | |
10 super(Regressor, self).__init__() | |
11 | |
12 # MODEL CONFIGURATION | |
13 self.regularize = regularize | |
14 | |
15 # ACQUIRE/MAKE INPUT AND TARGET | |
16 self.input = theano.External(input) if input else T.matrix('input') | |
17 self.target = theano.External(target) if target else T.matrix('target') | |
18 | |
19 # HYPER-PARAMETERS | |
20 self.lr = theano.Member(T.scalar()) | |
21 | |
22 # PARAMETERS | |
23 self.w = theano.Member(T.matrix()) | |
24 self.b = theano.Member(T.vector()) | |
25 | |
26 # OUTPUT | |
27 self.output_activation = T.dot(self.input, self.w) + self.b | |
28 self.output = self.build_output() | |
29 | |
30 # REGRESSION COST | |
31 self.regression_cost = self.build_regression_cost() | |
32 | |
33 # REGULARIZATION COST | |
34 self.regularization = self.build_regularization() | |
35 | |
36 # TOTAL COST | |
37 self.cost = self.regression_cost | |
38 if self.regularize: | |
39 self.cost = self.cost + self.regularization | |
40 | |
41 # GRADIENTS AND UPDATES | |
42 self.params = self.w, self.b | |
43 gradients = T.grad(self.cost, self.params) | |
44 updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gradients)) | |
45 | |
46 # INTERFACE METHODS | |
47 self.update = theano.Method([self.input, self.target], self.cost, updates) | |
48 self.get_cost = theano.Method([self.input, self.target], self.cost) | |
49 self.predict = theano.Method(self.input, self.output) | |
50 | |
51 self.build_extensions() | |
52 | |
53 def _instance_initialize(self, obj, input_size = None, output_size = None, seed = None, **init): | |
54 if seed is not None: | |
55 R = N.random.RandomState(seed) | |
56 else: | |
57 R = N.random | |
58 if (input_size is None) ^ (output_size is None): | |
59 raise ValueError("Must specify input_size and output_size or neither.") | |
60 super(Regressor, self)._instance_initialize(obj, **init) | |
61 if input_size is not None: | |
62 sz = (input_size, output_size) | |
63 range = 1/N.sqrt(input_size) | |
64 obj.w = R.uniform(size = sz, low = -range, high = range) | |
65 obj.b = N.zeros(output_size) | |
66 obj.__hide__ = ['params'] | |
67 | |
68 def _instance_flops_approx(self, obj): | |
69 return obj.w.size | |
70 | |
71 def build_extensions(self): | |
72 pass | |
73 | |
74 def build_output(self): | |
75 raise NotImplementedError('override in subclass') | |
76 | |
77 def build_regression_cost(self): | |
78 raise NotImplementedError('override in subclass') | |
79 | |
80 def build_regularization(self): | |
81 return T.zero() # no regularization! | |
82 | |
83 | |
84 class BinRegressor(Regressor): | |
85 | |
86 def build_extensions(self): | |
87 self.classes = T.iround(self.output) | |
88 self.classify = theano.Method(self.input, self.classes) | |
89 | |
90 def build_output(self): | |
91 return NN.sigmoid(self.output_activation) | |
92 | |
93 def build_regression_cost(self): | |
94 self.regression_cost_matrix = self.target * T.log(self.output) + (1.0 - self.target) * T.log(1.0 - self.output) | |
95 self.regression_costs = -T.sum(self.regression_cost_matrix, axis=1) | |
96 return T.mean(self.regression_costs) | |
97 | |
98 def build_regularization(self): | |
99 self.l2_coef = theano.Member(T.scalar()) | |
100 return self.l2_coef * T.sum(self.w * self.w) | |
101 | |
102 def _instance_initialize(self, obj, input_size = None, output_size = 1, seed = None, **init): | |
103 init.setdefault('l2_coef', 0) | |
104 super(BinRegressor, self)._instance_initialize(obj, input_size, output_size, seed, **init) |