view pylearn/algorithms/regressor.py @ 818:f4729745bb58

backporting to 2.4
author dumitru@deepnets.mtv.corp.google.com
date Wed, 02 Sep 2009 14:22:02 -0700
parents ba65e95d1221
children 972303bef0bf
line wrap: on
line source


import theano
from theano import tensor as T
from theano.tensor import nnet as NN
import numpy as N

class Regressor(theano.FancyModule):

    def __init__(self, input = None, target = None, regularize = True):
        super(Regressor, self).__init__()

        # MODEL CONFIGURATION
        self.regularize = regularize

        # ACQUIRE/MAKE INPUT AND TARGET
        if input:
          self.input = input
        else:
          self.target = target

        if target:
          self.target = target
        else:
          self.target = T.dmatrix('target')
        #backport
        #self.input = input if input else T.matrix('input')
        #self.target = target if target else T.matrix('target')

        # HYPER-PARAMETERS
        self.lr = T.scalar()

        # PARAMETERS
        self.w = T.matrix()
        self.b = T.vector()

        # OUTPUT
        self.output_activation = T.dot(self.input, self.w) + self.b
        self.output = self.build_output()

        # REGRESSION COST
        self.regression_cost = self.build_regression_cost()

        # REGULARIZATION COST
        self.regularization = self.build_regularization()

        # TOTAL COST
        self.cost = self.regression_cost
        if self.regularize:
            self.cost = self.cost + self.regularization

        # GRADIENTS AND UPDATES
        self.params = self.w, self.b
        gradients = T.grad(self.cost, self.params)
        updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gradients))

        # INTERFACE METHODS
        self.update = theano.Method([self.input, self.target], self.cost, updates)
        self.get_cost = theano.Method([self.input, self.target], self.cost)
        self.predict = theano.Method(self.input, self.output)

        self.build_extensions()

    def _instance_initialize(self, obj, input_size = None, output_size = None, seed = None, **init):
        if seed is not None:
            R = N.random.RandomState(seed)
        else:
            R = N.random
        if (input_size is None) ^ (output_size is None):
            raise ValueError("Must specify input_size and output_size or neither.")
        super(Regressor, self)._instance_initialize(obj, **init)
        if input_size is not None:
            sz = (input_size, output_size)
            range = 1/N.sqrt(input_size)
            obj.w = R.uniform(size = sz, low = -range, high = range)
            obj.b = N.zeros(output_size)
        obj.__hide__ = ['params']

    def _instance_flops_approx(self, obj):
        return obj.w.size

    def build_extensions(self):
        pass

    def build_output(self):
        raise NotImplementedError('override in subclass')

    def build_regression_cost(self):
        raise NotImplementedError('override in subclass')

    def build_regularization(self):
        return T.zero() # no regularization!


class BinRegressor(Regressor):

    def build_extensions(self):
        self.classes = T.iround(self.output)
        self.classify = theano.Method(self.input, self.classes)

    def build_output(self):
        return NN.sigmoid(self.output_activation)

    def build_regression_cost(self):
        self.regression_cost_matrix = self.target * T.log(self.output) + (1.0 - self.target) * T.log(1.0 - self.output)
        self.regression_costs = -T.sum(self.regression_cost_matrix, axis=1)
        return T.mean(self.regression_costs)

    def build_regularization(self):
        self.l2_coef = T.scalar()
        return self.l2_coef * T.sum(self.w * self.w)

    def _instance_initialize(self, obj, input_size = None, output_size = 1, seed = None, **init):
        init.setdefault('l2_coef', 0)
        super(BinRegressor, self)._instance_initialize(obj, input_size, output_size, seed, **init)