diff algorithms/regressor.py @ 476:8fcd0f3d9a17

added a few algorithms
author Olivier Breuleux <breuleuo@iro.umontreal.ca>
date Mon, 27 Oct 2008 17:26:00 -0400
parents
children 2b0e10ac6929
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/algorithms/regressor.py	Mon Oct 27 17:26:00 2008 -0400
@@ -0,0 +1,103 @@
+
+import theano
+from theano import tensor as T
+from theano.tensor import nnet as NN
+import numpy as N
+
+class Regressor(theano.FancyModule):
+
+    def __init__(self, input = None, target = None, regularize = True):
+        super(Regressor, self).__init__()
+
+        # MODEL CONFIGURATION
+        self.regularize = regularize
+
+        # ACQUIRE/MAKE INPUT AND TARGET
+        self.input = theano.External(input) if input else T.matrix('input')
+        self.target = theano.External(target) if target else T.matrix('target')
+
+        # HYPER-PARAMETERS
+        self.lr = theano.Member(T.scalar())
+
+        # PARAMETERS
+        self.w = theano.Member(T.matrix())
+        self.b = theano.Member(T.vector())
+
+        # OUTPUT
+        self.output_activation = T.dot(self.input, self.w) + self.b
+        self.output = self.build_output()
+
+        # REGRESSION COST
+        self.regression_cost = self.build_regression_cost()
+
+        # REGULARIZATION COST
+        self.regularization = self.build_regularization()
+
+        # TOTAL COST
+        self.cost = self.regression_cost
+        if self.regularize:
+            self.cost = self.cost + self.regularization
+
+        # GRADIENTS AND UPDATES
+        self.params = self.w, self.b
+        gradients = T.grad(self.cost, self.params)
+        updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gradients))
+
+        # INTERFACE METHODS
+        self.update = theano.Method([self.input, self.target], self.cost, updates)
+        self.predict = theano.Method(self.input, self.output)
+
+        self.build_extensions()
+
+    def _instance_initialize(self, obj, input_size = None, output_size = None, seed = None, **init):
+        if seed is not None:
+            R = N.random.RandomState(seed)
+        else:
+            R = N.random
+        if (input_size is None) ^ (output_size is None):
+            raise ValueError("Must specify input_size and output_size or neither.")
+        super(Regressor, self)._instance_initialize(obj, **init)
+        if input_size is not None:
+            sz = (input_size, output_size)
+            range = 1/N.sqrt(input_size)
+            obj.w = R.uniform(size = sz, low = -range, high = range)
+            obj.b = N.zeros(output_size)
+        obj.__hide__ = ['params']
+
+    def _instance_flops_approx(self, obj):
+        return obj.w.size
+
+    def build_extensions(self):
+        pass
+
+    def build_output(self):
+        raise NotImplementedError('override in subclass')
+
+    def build_regression_cost(self):
+        raise NotImplementedError('override in subclass')
+
+    def build_regularization(self):
+        return T.zero() # no regularization!
+
+
+class BinRegressor(Regressor):
+
+    def build_extensions(self):
+        self.classes = T.iround(self.output)
+        self.classify = theano.Method(self.input, self.classes)
+
+    def build_output(self):
+        return NN.sigmoid(self.output_activation)
+
+    def build_regression_cost(self):
+        self.regression_cost_matrix = self.target * T.log(self.output) + (1.0 - self.target) * T.log(1.0 - self.output)
+        self.regression_costs = -T.sum(self.regression_cost_matrix, axis=1)
+        return T.mean(self.regression_costs)
+
+    def build_regularization(self):
+        self.l2_coef = theano.Member(T.scalar())
+        return self.l2_coef * T.sum(self.w * self.w)
+
+    def _instance_initialize(self, obj, input_size = None, output_size = 1, seed = None, **init):
+        init.setdefault('l2_coef', 0)
+        super(BinRegressor, self)._instance_initialize(obj, input_size, output_size, seed, **init)