annotate algorithms/regressor.py @ 532:34ee3aff3e8f

Improved embedding word preprocessing.
author Joseph Turian <turian@gmail.com>
date Tue, 18 Nov 2008 02:57:50 -0500
parents 2b0e10ac6929
children
rev   line source
476
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
1
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
2 import theano
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
3 from theano import tensor as T
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
4 from theano.tensor import nnet as NN
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
5 import numpy as N
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
6
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
7 class Regressor(theano.FancyModule):
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
8
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
9 def __init__(self, input = None, target = None, regularize = True):
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
10 super(Regressor, self).__init__()
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
11
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
12 # MODEL CONFIGURATION
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
13 self.regularize = regularize
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
14
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
15 # ACQUIRE/MAKE INPUT AND TARGET
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
16 self.input = theano.External(input) if input else T.matrix('input')
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
17 self.target = theano.External(target) if target else T.matrix('target')
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
18
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
19 # HYPER-PARAMETERS
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
20 self.lr = theano.Member(T.scalar())
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
21
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
22 # PARAMETERS
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
23 self.w = theano.Member(T.matrix())
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
24 self.b = theano.Member(T.vector())
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
25
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
26 # OUTPUT
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
27 self.output_activation = T.dot(self.input, self.w) + self.b
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
28 self.output = self.build_output()
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
29
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
30 # REGRESSION COST
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
31 self.regression_cost = self.build_regression_cost()
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
32
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
33 # REGULARIZATION COST
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
34 self.regularization = self.build_regularization()
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
35
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
36 # TOTAL COST
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
37 self.cost = self.regression_cost
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
38 if self.regularize:
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
39 self.cost = self.cost + self.regularization
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
40
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
41 # GRADIENTS AND UPDATES
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
42 self.params = self.w, self.b
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
43 gradients = T.grad(self.cost, self.params)
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
44 updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gradients))
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
45
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
46 # INTERFACE METHODS
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
47 self.update = theano.Method([self.input, self.target], self.cost, updates)
516
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents: 476
diff changeset
48 self.get_cost = theano.Method([self.input, self.target], self.cost)
476
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
49 self.predict = theano.Method(self.input, self.output)
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
50
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
51 self.build_extensions()
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
52
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
53 def _instance_initialize(self, obj, input_size = None, output_size = None, seed = None, **init):
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
54 if seed is not None:
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
55 R = N.random.RandomState(seed)
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
56 else:
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
57 R = N.random
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
58 if (input_size is None) ^ (output_size is None):
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
59 raise ValueError("Must specify input_size and output_size or neither.")
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
60 super(Regressor, self)._instance_initialize(obj, **init)
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
61 if input_size is not None:
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
62 sz = (input_size, output_size)
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
63 range = 1/N.sqrt(input_size)
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
64 obj.w = R.uniform(size = sz, low = -range, high = range)
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
65 obj.b = N.zeros(output_size)
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
66 obj.__hide__ = ['params']
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
67
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
68 def _instance_flops_approx(self, obj):
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
69 return obj.w.size
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
70
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
71 def build_extensions(self):
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
72 pass
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
73
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
74 def build_output(self):
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
75 raise NotImplementedError('override in subclass')
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
76
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
77 def build_regression_cost(self):
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
78 raise NotImplementedError('override in subclass')
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
79
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
80 def build_regularization(self):
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
81 return T.zero() # no regularization!
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
82
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
83
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
84 class BinRegressor(Regressor):
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
85
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
86 def build_extensions(self):
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
87 self.classes = T.iround(self.output)
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
88 self.classify = theano.Method(self.input, self.classes)
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
89
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
90 def build_output(self):
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
91 return NN.sigmoid(self.output_activation)
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
92
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
93 def build_regression_cost(self):
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
94 self.regression_cost_matrix = self.target * T.log(self.output) + (1.0 - self.target) * T.log(1.0 - self.output)
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
95 self.regression_costs = -T.sum(self.regression_cost_matrix, axis=1)
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
96 return T.mean(self.regression_costs)
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
97
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
98 def build_regularization(self):
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
99 self.l2_coef = theano.Member(T.scalar())
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
100 return self.l2_coef * T.sum(self.w * self.w)
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
101
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
102 def _instance_initialize(self, obj, input_size = None, output_size = 1, seed = None, **init):
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
103 init.setdefault('l2_coef', 0)
8fcd0f3d9a17 added a few algorithms
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents:
diff changeset
104 super(BinRegressor, self)._instance_initialize(obj, input_size, output_size, seed, **init)