Mercurial > pylearn
view pylearn/algorithms/regressor.py @ 818:f4729745bb58
backporting to 2.4
author | dumitru@deepnets.mtv.corp.google.com |
---|---|
date | Wed, 02 Sep 2009 14:22:02 -0700 |
parents | ba65e95d1221 |
children | 972303bef0bf |
line wrap: on
line source
import theano from theano import tensor as T from theano.tensor import nnet as NN import numpy as N class Regressor(theano.FancyModule): def __init__(self, input = None, target = None, regularize = True): super(Regressor, self).__init__() # MODEL CONFIGURATION self.regularize = regularize # ACQUIRE/MAKE INPUT AND TARGET if input: self.input = input else: self.target = target if target: self.target = target else: self.target = T.dmatrix('target') #backport #self.input = input if input else T.matrix('input') #self.target = target if target else T.matrix('target') # HYPER-PARAMETERS self.lr = T.scalar() # PARAMETERS self.w = T.matrix() self.b = T.vector() # OUTPUT self.output_activation = T.dot(self.input, self.w) + self.b self.output = self.build_output() # REGRESSION COST self.regression_cost = self.build_regression_cost() # REGULARIZATION COST self.regularization = self.build_regularization() # TOTAL COST self.cost = self.regression_cost if self.regularize: self.cost = self.cost + self.regularization # GRADIENTS AND UPDATES self.params = self.w, self.b gradients = T.grad(self.cost, self.params) updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gradients)) # INTERFACE METHODS self.update = theano.Method([self.input, self.target], self.cost, updates) self.get_cost = theano.Method([self.input, self.target], self.cost) self.predict = theano.Method(self.input, self.output) self.build_extensions() def _instance_initialize(self, obj, input_size = None, output_size = None, seed = None, **init): if seed is not None: R = N.random.RandomState(seed) else: R = N.random if (input_size is None) ^ (output_size is None): raise ValueError("Must specify input_size and output_size or neither.") super(Regressor, self)._instance_initialize(obj, **init) if input_size is not None: sz = (input_size, output_size) range = 1/N.sqrt(input_size) obj.w = R.uniform(size = sz, low = -range, high = range) obj.b = N.zeros(output_size) obj.__hide__ = ['params'] def _instance_flops_approx(self, obj): return obj.w.size def build_extensions(self): pass def build_output(self): raise NotImplementedError('override in subclass') def build_regression_cost(self): raise NotImplementedError('override in subclass') def build_regularization(self): return T.zero() # no regularization! class BinRegressor(Regressor): def build_extensions(self): self.classes = T.iround(self.output) self.classify = theano.Method(self.input, self.classes) def build_output(self): return NN.sigmoid(self.output_activation) def build_regression_cost(self): self.regression_cost_matrix = self.target * T.log(self.output) + (1.0 - self.target) * T.log(1.0 - self.output) self.regression_costs = -T.sum(self.regression_cost_matrix, axis=1) return T.mean(self.regression_costs) def build_regularization(self): self.l2_coef = T.scalar() return self.l2_coef * T.sum(self.w * self.w) def _instance_initialize(self, obj, input_size = None, output_size = 1, seed = None, **init): init.setdefault('l2_coef', 0) super(BinRegressor, self)._instance_initialize(obj, input_size, output_size, seed, **init)