# HG changeset patch
# User James Bergstra <bergstrj@iro.umontreal.ca>
# Date 1225142943 14400
# Node ID 1babf35fcef53874b233f9f065c1cea1dcfd193c
# Parent  fbfd3932fd0052df3f7e8670c068e3988acbb8a1# Parent  8fcd0f3d9a177c8c50bf0e726050cf8bedf82ebd
merged

diff -r fbfd3932fd00 -r 1babf35fcef5 algorithms/__init__.py
--- a/algorithms/__init__.py	Mon Oct 27 17:28:48 2008 -0400
+++ b/algorithms/__init__.py	Mon Oct 27 17:29:03 2008 -0400
@@ -0,0 +1,5 @@
+
+from regressor import Regressor, BinRegressor
+from aa import AutoEncoder, SigmoidXEAutoEncoder
+from daa import DenoisingAA, SigmoidXEDenoisingAA
+from stacker import Stacker
diff -r fbfd3932fd00 -r 1babf35fcef5 algorithms/aa.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/algorithms/aa.py	Mon Oct 27 17:29:03 2008 -0400
@@ -0,0 +1,108 @@
+
+import theano
+from theano import tensor as T
+from theano.tensor import nnet as NN
+import numpy as N
+
+class AutoEncoder(theano.FancyModule):
+
+    def __init__(self, input = None, regularize = True, tie_weights = True):
+        super(AutoEncoder, self).__init__()
+
+        # MODEL CONFIGURATION
+        self.regularize = regularize
+        self.tie_weights = tie_weights
+
+        # ACQUIRE/MAKE INPUT
+        if not input:
+            input = T.matrix('input')
+        self.input = theano.External(input)
+
+        # HYPER-PARAMETERS
+        self.lr = theano.Member(T.scalar())
+
+        # PARAMETERS
+        self.w1 = theano.Member(T.matrix())
+        if not tie_weights:
+            self.w2 = theano.Member(T.matrix())
+        else:
+            self.w2 = self.w1.T
+        self.b1 = theano.Member(T.vector())
+        self.b2 = theano.Member(T.vector())
+
+        # HIDDEN LAYER
+        self.hidden_activation = T.dot(input, self.w1) + self.b1
+        self.hidden = self.build_hidden()
+
+        # RECONSTRUCTION LAYER
+        self.output_activation = T.dot(self.hidden, self.w2) + self.b2
+        self.output = self.build_output()
+
+        # RECONSTRUCTION COST
+        self.reconstruction_cost = self.build_reconstruction_cost()
+
+        # REGULARIZATION COST
+        self.regularization = self.build_regularization()
+
+        # TOTAL COST
+        self.cost = self.reconstruction_cost
+        if self.regularize:
+            self.cost = self.cost + self.regularization
+
+        # GRADIENTS AND UPDATES
+        if self.tie_weights:
+            self.params = self.w1, self.b1, self.b2
+        else:
+            self.params = self.w1, self.w2, self.b1, self.b2
+        gradients = T.grad(self.cost, self.params)
+        updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gradients))
+
+        # INTERFACE METHODS
+        self.update = theano.Method(input, self.cost, updates)
+        self.reconstruction = theano.Method(input, self.output)
+        self.representation = theano.Method(input, self.hidden)
+
+    def _instance_initialize(self, obj, input_size = None, hidden_size = None, seed = None, **init):
+        if (input_size is None) ^ (hidden_size is None):
+            raise ValueError("Must specify hidden_size and target_size or neither.")
+        super(AutoEncoder, self)._instance_initialize(obj, **init)
+        if seed is not None:
+            R = N.random.RandomState(seed)
+        else:
+            R = N.random
+        if input_size is not None:
+            sz = (input_size, hidden_size)
+            range = 1/N.sqrt(input_size)
+            obj.w1 = R.uniform(size = sz, low = -range, high = range)
+            if not self.tie_weights:
+                obj.w2 = R.uniform(size = list(reversed(sz)), low = -range, high = range)
+            obj.b1 = N.zeros(hidden_size)
+            obj.b2 = N.zeros(input_size)
+
+    def build_regularization(self):
+        return T.zero() # no regularization!
+
+
+class SigmoidXEAutoEncoder(AutoEncoder):
+
+    def build_hidden(self):
+        return NN.sigmoid(self.hidden_activation)
+
+    def build_output(self):
+        return NN.sigmoid(self.output_activation)
+
+    def build_reconstruction_cost(self):
+        self.reconstruction_cost_matrix = self.input * T.log(self.output) + (1.0 - self.input) * T.log(1.0 - self.output)
+        self.reconstruction_costs = -T.sum(self.reconstruction_cost_matrix, axis=1)
+        return T.sum(self.reconstruction_costs)
+
+    def build_regularization(self):
+        self.l2_coef = theano.Member(T.scalar())
+        if self.tie_weights:
+            return self.l2_coef * T.sum(self.w1 * self.w1)
+        else:
+            return self.l2_coef * T.sum(self.w1 * self.w1) + T.sum(self.w2 * self.w2)
+
+    def _instance_initialize(self, obj, input_size = None, hidden_size = None, **init):
+        init.setdefault('l2_coef', 0)
+        super(SigmoidXEAutoEncoder, self)._instance_initialize(obj, input_size, hidden_size, **init)
diff -r fbfd3932fd00 -r 1babf35fcef5 algorithms/daa.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/algorithms/daa.py	Mon Oct 27 17:29:03 2008 -0400
@@ -0,0 +1,147 @@
+
+import theano
+from theano import tensor as T
+from theano.tensor import nnet as NN
+import numpy as N
+
+class DenoisingAA(T.RModule):
+
+    def __init__(self, input = None, regularize = True, tie_weights = True):
+        super(DenoisingAA, self).__init__()
+
+        # MODEL CONFIGURATION
+        self.regularize = regularize
+        self.tie_weights = tie_weights
+
+        # ACQUIRE/MAKE INPUT
+        if not input:
+            input = T.matrix('input')
+        self.input = theano.External(input)
+
+        # HYPER-PARAMETERS
+        self.lr = theano.Member(T.scalar())
+
+        # PARAMETERS
+        self.w1 = theano.Member(T.matrix())
+        if not tie_weights:
+            self.w2 = theano.Member(T.matrix())
+        else:
+            self.w2 = self.w1.T
+        self.b1 = theano.Member(T.vector())
+        self.b2 = theano.Member(T.vector())
+
+
+        # REGULARIZATION COST
+        self.regularization = self.build_regularization()
+
+
+        ### NOISELESS ###
+
+        # HIDDEN LAYER
+        self.hidden_activation = T.dot(self.input, self.w1) + self.b1
+        self.hidden = self.hid_activation_function(self.hidden_activation)
+
+        # RECONSTRUCTION LAYER
+        self.output_activation = T.dot(self.hidden, self.w2) + self.b2
+        self.output = self.out_activation_function(self.output_activation)
+
+        # RECONSTRUCTION COST
+        self.reconstruction_costs = self.build_reconstruction_costs(self.output)
+        self.reconstruction_cost = T.mean(self.reconstruction_costs)
+
+        # TOTAL COST
+        self.cost = self.reconstruction_cost
+        if self.regularize:
+            self.cost = self.cost + self.regularization
+
+
+        ### WITH NOISE ###
+        self.corrupted_input = self.build_corrupted_input()
+
+        # HIDDEN LAYER
+        self.nhidden_activation = T.dot(self.corrupted_input, self.w1) + self.b1
+        self.nhidden = self.hid_activation_function(self.nhidden_activation)
+
+        # RECONSTRUCTION LAYER
+        self.noutput_activation = T.dot(self.nhidden, self.w2) + self.b2
+        self.noutput = self.out_activation_function(self.noutput_activation)
+
+        # RECONSTRUCTION COST
+        self.nreconstruction_costs = self.build_reconstruction_costs(self.noutput)
+        self.nreconstruction_cost = T.mean(self.nreconstruction_costs)
+
+        # TOTAL COST
+        self.ncost = self.nreconstruction_cost
+        if self.regularize:
+            self.ncost = self.ncost + self.regularization
+
+
+        # GRADIENTS AND UPDATES
+        if self.tie_weights:
+            self.params = self.w1, self.b1, self.b2
+        else:
+            self.params = self.w1, self.w2, self.b1, self.b2
+        gradients = T.grad(self.ncost, self.params)
+        updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gradients))
+
+        # INTERFACE METHODS
+        self.update = theano.Method(self.input, self.ncost, updates)
+        self.compute_cost = theano.Method(self.input, self.cost)
+        self.noisify = theano.Method(self.input, self.corrupted_input)
+        self.reconstruction = theano.Method(self.input, self.output)
+        self.representation = theano.Method(self.input, self.hidden)
+        self.reconstruction_through_noise = theano.Method(self.input, [self.corrupted_input, self.noutput])
+
+    def _instance_initialize(self, obj, input_size = None, hidden_size = None, seed = None, **init):
+        if (input_size is None) ^ (hidden_size is None):
+            raise ValueError("Must specify hidden_size and target_size or neither.")
+        super(DenoisingAA, self)._instance_initialize(obj, **init)
+        if seed is not None:
+            R = N.random.RandomState(seed)
+        else:
+            R = N.random
+        if input_size is not None:
+            sz = (input_size, hidden_size)
+            inf = 1/N.sqrt(input_size)
+            hif = 1/N.sqrt(hidden_size)
+            obj.w1 = R.uniform(size = sz, low = -inf, high = inf)
+            if not self.tie_weights:
+                obj.w2 = R.uniform(size = list(reversed(sz)), low = -inf, high = inf)
+            obj.b1 = N.zeros(hidden_size)
+            obj.b2 = N.zeros(input_size)
+        if seed is not None:
+            self.seed(seed)
+        obj.__hide__ = ['params']
+
+    def build_regularization(self):
+        return T.zero() # no regularization!
+
+
+class SigmoidXEDenoisingAA(DenoisingAA):
+
+    def build_corrupted_input(self):
+        self.noise_level = theano.Member(T.scalar())
+        return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input
+
+    def hid_activation_function(self, activation):
+        return NN.sigmoid(activation)
+
+    def out_activation_function(self, activation):
+        return NN.sigmoid(activation)
+
+    def build_reconstruction_costs(self, output):
+        reconstruction_cost_matrix = -(self.input * T.log(output) + (1 - self.input) * T.log(1 - output))
+        return T.sum(reconstruction_cost_matrix, axis=1)
+
+    def build_regularization(self):
+        self.l2_coef = theano.Member(T.scalar())
+        if self.tie_weights:
+            return self.l2_coef * T.sum(self.w1 * self.w1)
+        else:
+            return self.l2_coef * T.sum(self.w1 * self.w1) + T.sum(self.w2 * self.w2)
+
+    def _instance_initialize(self, obj, input_size = None, hidden_size = None, seed = None, **init):
+        init.setdefault('noise_level', 0)
+        init.setdefault('l2_coef', 0)
+        super(SigmoidXEDenoisingAA, self)._instance_initialize(obj, input_size, hidden_size, seed, **init)
+
diff -r fbfd3932fd00 -r 1babf35fcef5 algorithms/logistic_regression.py
--- a/algorithms/logistic_regression.py	Mon Oct 27 17:28:48 2008 -0400
+++ b/algorithms/logistic_regression.py	Mon Oct 27 17:29:03 2008 -0400
@@ -10,7 +10,7 @@
 
 class Module_Nclass(module.FancyModule):
     class InstanceType(module.FancyModuleInstance):
-        def initialize(self, n_in, n_out):
+        def initialize(self, n_in, n_out, rng=N.random):
             #self.component is the LogisticRegressionTemplate instance that built this guy.
 
             self.w = N.zeros((n_in, n_out))
diff -r fbfd3932fd00 -r 1babf35fcef5 algorithms/regressor.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/algorithms/regressor.py	Mon Oct 27 17:29:03 2008 -0400
@@ -0,0 +1,103 @@
+
+import theano
+from theano import tensor as T
+from theano.tensor import nnet as NN
+import numpy as N
+
+class Regressor(theano.FancyModule):
+
+    def __init__(self, input = None, target = None, regularize = True):
+        super(Regressor, self).__init__()
+
+        # MODEL CONFIGURATION
+        self.regularize = regularize
+
+        # ACQUIRE/MAKE INPUT AND TARGET
+        self.input = theano.External(input) if input else T.matrix('input')
+        self.target = theano.External(target) if target else T.matrix('target')
+
+        # HYPER-PARAMETERS
+        self.lr = theano.Member(T.scalar())
+
+        # PARAMETERS
+        self.w = theano.Member(T.matrix())
+        self.b = theano.Member(T.vector())
+
+        # OUTPUT
+        self.output_activation = T.dot(self.input, self.w) + self.b
+        self.output = self.build_output()
+
+        # REGRESSION COST
+        self.regression_cost = self.build_regression_cost()
+
+        # REGULARIZATION COST
+        self.regularization = self.build_regularization()
+
+        # TOTAL COST
+        self.cost = self.regression_cost
+        if self.regularize:
+            self.cost = self.cost + self.regularization
+
+        # GRADIENTS AND UPDATES
+        self.params = self.w, self.b
+        gradients = T.grad(self.cost, self.params)
+        updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gradients))
+
+        # INTERFACE METHODS
+        self.update = theano.Method([self.input, self.target], self.cost, updates)
+        self.predict = theano.Method(self.input, self.output)
+
+        self.build_extensions()
+
+    def _instance_initialize(self, obj, input_size = None, output_size = None, seed = None, **init):
+        if seed is not None:
+            R = N.random.RandomState(seed)
+        else:
+            R = N.random
+        if (input_size is None) ^ (output_size is None):
+            raise ValueError("Must specify input_size and output_size or neither.")
+        super(Regressor, self)._instance_initialize(obj, **init)
+        if input_size is not None:
+            sz = (input_size, output_size)
+            range = 1/N.sqrt(input_size)
+            obj.w = R.uniform(size = sz, low = -range, high = range)
+            obj.b = N.zeros(output_size)
+        obj.__hide__ = ['params']
+
+    def _instance_flops_approx(self, obj):
+        return obj.w.size
+
+    def build_extensions(self):
+        pass
+
+    def build_output(self):
+        raise NotImplementedError('override in subclass')
+
+    def build_regression_cost(self):
+        raise NotImplementedError('override in subclass')
+
+    def build_regularization(self):
+        return T.zero() # no regularization!
+
+
+class BinRegressor(Regressor):
+
+    def build_extensions(self):
+        self.classes = T.iround(self.output)
+        self.classify = theano.Method(self.input, self.classes)
+
+    def build_output(self):
+        return NN.sigmoid(self.output_activation)
+
+    def build_regression_cost(self):
+        self.regression_cost_matrix = self.target * T.log(self.output) + (1.0 - self.target) * T.log(1.0 - self.output)
+        self.regression_costs = -T.sum(self.regression_cost_matrix, axis=1)
+        return T.mean(self.regression_costs)
+
+    def build_regularization(self):
+        self.l2_coef = theano.Member(T.scalar())
+        return self.l2_coef * T.sum(self.w * self.w)
+
+    def _instance_initialize(self, obj, input_size = None, output_size = 1, seed = None, **init):
+        init.setdefault('l2_coef', 0)
+        super(BinRegressor, self)._instance_initialize(obj, input_size, output_size, seed, **init)
diff -r fbfd3932fd00 -r 1babf35fcef5 algorithms/stacker.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/algorithms/stacker.py	Mon Oct 27 17:29:03 2008 -0400
@@ -0,0 +1,83 @@
+
+import theano
+from theano import tensor as T
+import sys
+import numpy as N
+
+class Stacker(T.RModule):
+
+    def __init__(self, submodules, input = None, regularize = False):
+        super(Stacker, self).__init__()
+
+        current = input
+        layers = []
+        for i, (submodule, outname) in enumerate(submodules):
+            layer = submodule(current, regularize = regularize)
+            layers.append(layer)
+            current = layer[outname]
+        self.layers = layers
+
+        self.input = self.layers[0].input
+        self.output = current
+
+        local_update = []
+        global_update = []
+        to_update = []
+        all_kits = []
+        for layer in layers:
+            u = layer.update
+            u.resolve_all()
+            to_update += u.updates.keys()
+            all_kits += u.kits
+            # the input is the whole deep model's input instead of the layer's own
+            # input (which is previous_layer[outname])
+            inputs = [self.input] + u.inputs[1:]
+            method = theano.Method(inputs, u.outputs, u.updates, u.kits)
+            local_update.append(method)
+            global_update.append(
+                theano.Method(inputs,
+                              u.outputs,
+                              # we update the params of the previous layers too but wrt
+                              # this layer's cost
+                              dict((param, param - layer.lr * T.grad(layer.cost, param))
+                                   for param in to_update),
+                              list(all_kits)))
+
+        self.local_update = local_update
+        self.global_update = global_update
+        self.update = self.global_update[-1]
+        self.compute = theano.Method(self.input, self.output)
+        ll = self.layers[-1]
+        for name, method in ll.components_map():
+            if isinstance(method, theano.Method) and not hasattr(self, name):
+                m = method.dup()
+                m.resolve_all()
+                m.inputs = [self.input if x is ll.input else x for x in m.inputs]
+                setattr(self, name, m)
+
+    def _instance_initialize(self, obj, nunits = None, lr = 0.01, seed = None, **kwargs):
+        super(Stacker, self)._instance_initialize(obj, **kwargs)
+        if seed is not None:
+            R = N.random.RandomState(seed)
+        else:
+            R = N.random
+        for layer in obj.layers:
+            if layer.lr is None:
+                layer.lr = lr
+        if nunits:
+            if len(nunits) != len(obj.layers) + 1:
+                raise ValueError('You should give exactly one more unit numbers as there are layers.')
+            for ni, no, layer in zip(nunits[:-1], nunits[1:], obj.layers):
+                if seed is not None:
+                    layer.initialize(ni, no, seed = R.random_integers(sys.maxint - 1))
+                else:
+                    layer.initialize(ni, no)
+        if seed is not None:
+            obj.seed(seed)
+
+    def _instance_flops_approx(self, obj):
+        rval = 0
+        for layer in obj.layers:
+            rval += layer.flops_approx()
+        return rval
+
diff -r fbfd3932fd00 -r 1babf35fcef5 algorithms/tests/test_aa.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/algorithms/tests/test_aa.py	Mon Oct 27 17:29:03 2008 -0400
@@ -0,0 +1,42 @@
+
+import models
+import theano
+import numpy
+import time
+
+
+def test_train(mode = theano.Mode('c|py', 'fast_run')):
+
+    aa = models.SigmoidXEAutoEncoder(regularize = False)
+#     print aa.update.pretty(mode = theano.Mode('py', 'fast_run').excluding('inplace'))
+
+    model = aa.make(lr = 0.01,
+                    input_size = 100,
+                    hidden_size = 1000,
+                    mode = mode)
+
+    data = [[0, 1, 0, 0, 1, 1, 1, 0, 1, 0]*10]*10
+    #data = numpy.random.rand(10, 100)
+
+    t1 = time.time()
+    for i in xrange(1001):
+        cost = model.update(data)
+        if i % 100 == 0:
+            print i, cost
+    t2 = time.time()
+    return t2 - t1
+
+if __name__ == '__main__':
+    numpy.random.seed(10)
+    print 'optimized:'
+    t1 = test_train(theano.Mode('c|py', 'fast_run'))
+    print 'time:',t1
+    print
+
+    numpy.random.seed(10)
+    print 'not optimized:'
+    t2 = test_train(theano.Mode('c|py', 'fast_compile'))
+    print 'time:',t2
+
+
+
diff -r fbfd3932fd00 -r 1babf35fcef5 algorithms/tests/test_regressor.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/algorithms/tests/test_regressor.py	Mon Oct 27 17:29:03 2008 -0400
@@ -0,0 +1,46 @@
+
+
+import models
+import theano
+import numpy
+import time
+
+
+def test_train(mode = theano.Mode('c|py', 'fast_run')):
+
+    reg = models.BinRegressor(regularize = False)
+
+    model = reg.make(lr = 0.01,
+                     input_size = 100,
+                     mode = mode,
+                     seed = 10)
+
+#     data = [[0, 1, 0, 0, 1, 1, 1, 0, 1, 0]*10]*10
+#     targets = [[1]]*10
+    #data = numpy.random.rand(10, 100)
+
+    R = numpy.random.RandomState(100)
+    t1 = time.time()
+    for i in xrange(1001):
+        data = R.random_integers(0, 1, size = (10, 100))
+        targets = data[:, 6].reshape((10, 1))
+        cost = model.update(data, targets)
+        if i % 100 == 0:
+            print i, '\t', cost, '\t', 1*(targets.T == model.classify(data).T)
+    t2 = time.time()
+    return t2 - t1
+
+if __name__ == '__main__':
+    print 'optimized:'
+    t1 = test_train(theano.Mode('c|py', 'fast_run'))
+    print 'time:',t1
+    print
+
+    print 'not optimized:'
+    t2 = test_train(theano.Mode('c|py', 'fast_compile'))
+    print 'time:',t2
+
+
+
+
+
diff -r fbfd3932fd00 -r 1babf35fcef5 algorithms/tests/test_stacker.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/algorithms/tests/test_stacker.py	Mon Oct 27 17:29:03 2008 -0400
@@ -0,0 +1,43 @@
+
+import models
+import theano
+import numpy
+import time
+
+
+def test_train(mode = theano.Mode('c|py', 'fast_run')):
+
+    reg = models.Stacker([(models.BinRegressor, 'output'), (models.BinRegressor, 'output')],
+                         regularize = False)
+    #print reg.global_update[1].pretty(mode = mode.excluding('inplace'))
+
+    model = reg.make([100, 200, 1],
+                     lr = 0.01,
+                     mode = mode,
+                     seed = 10)
+
+    R = numpy.random.RandomState(100)
+    t1 = time.time()
+    for i in xrange(1001):
+        data = R.random_integers(0, 1, size = (10, 100))
+        targets = data[:, 6].reshape((10, 1))
+        cost = model.update(data, targets)
+        if i % 100 == 0:
+            print i, '\t', cost, '\t', 1*(targets.T == model.classify(data).T)
+    t2 = time.time()
+    return t2 - t1
+
+if __name__ == '__main__':
+    print 'optimized:'
+    t1 = test_train(theano.Mode('c|py', 'fast_run'))
+    print 'time:',t1
+    print
+
+    print 'not optimized:'
+    t2 = test_train(theano.Mode('c|py', 'fast_compile'))
+    print 'time:',t2
+
+
+
+
+