Mercurial > pylearn
view pylearn/algorithms/logistic_regression.py @ 1524:9d21919e2332
autopep8
author | Frederic Bastien <nouiz@nouiz.org> |
---|---|
date | Fri, 02 Nov 2012 13:02:18 -0400 |
parents | bd7d540db70d |
children |
line wrap: on
line source
import sys, copy import theano from theano import tensor as T from theano.tensor import nnet from theano.compile import module from theano import printing, pprint from theano import compile import numpy as N class LogRegN(module.FancyModule): """ A symbolic module for performing N-class logistic regression. Notable variables ----------------- self.input self.target self.softmax self.argmax self.regularized_cost self.unregularized_cost """ def __init__(self, n_in=None, n_out=None, input=None, target=None, w=None, b=None, l2=None, l1=None): super(LogRegN, self).__init__() #boilerplate self.n_in = n_in self.n_out = n_out if input is not None: self.input = input else: self.input = T.matrix() if target is not None: self.target = target else: self.target = T.lvector() #backport #self.input = input if input is not None else T.matrix() #self.target = target if target is not None else T.lvector() if w is not None: self.w = w else: self.w = (T.dmatrix()) if b is not None: self.b = b else: self.b = (T.dvector()) #backport #self.w = w if w is not None else (T.dmatrix()) #self.b = b if b is not None else (T.dvector()) self.params = [] for p in [self.w, self.b]: if p.owner is None: self.params += [p] #backport #the params of the model are the ones we fit to the data #self.params = [p for p in [self.w, self.b] if p.owner is None] if l2 is not None: self.l2 = l2 else: self.l2 = (T.dscalar()) if l1 is not None: self.l1 = l1 else: self.l1 = (T.dscalar()) #backport #the hyper-parameters of the model are not fit to the data #self.l2 = l2 if l2 is not None else (T.dscalar()) #self.l1 = l1 if l1 is not None else (T.dscalar()) #here we actually build the model self.linear_output = T.dot(self.input, self.w) + self.b if 0: # TODO: pending support for target being a sparse matrix self.softmax = nnet.softmax(self.linear_output) self._max_pr, self.argmax = T.max_and_argmax(self.linear_output) self._xent = self.target * T.log(self.softmax) else: # TODO: when above is fixed, remove this hack (need an argmax # which is independent of targets) self.argmax_standalone = T.argmax(self.linear_output) (self._xent, self.softmax, self._max_pr, self.argmax) =\ nnet.crossentropy_softmax_max_and_argmax_1hot( self.linear_output, self.target) self.unregularized_cost = T.mean(self._xent) self.l1_cost = self.l1 * T.sum(abs(self.w)) self.l2_cost = self.l2 * T.sum(self.w**2) self.regularized_cost = self.unregularized_cost + self.l1_cost + self.l2_cost self._loss_zero_one = T.mean(T.neq(self.argmax, self.target)) # Softmax being computed directly. # TODO: Move somewhere else, more clean. self.softmax_unsupervised = nnet.softmax(self.linear_output) # METHODS if 0: #TODO: PENDING THE BETTER IMPLEMENTATION ABOVE self.predict = module.Method([self.input], self.argmax) self.label_probs = module.Method([self.input], self.softmax) self.validate = module.Method([self.input, self.target], [self._loss_zero_one, self.regularized_cost, self.unregularized_cost]) def _instance_initialize(self, obj): obj.w = N.zeros((self.n_in, self.n_out)) obj.b = N.zeros(self.n_out) obj.__pp_hide__ = ['params'] def logistic_regression(n_in, n_out, l1, l2, minimizer=None): if n_out == 2: raise NotImplementedError() else: rval = LogRegN(n_in=n_in, n_out=n_out, l1=l1, l2=l2) print 'RVAL input target', rval.input, rval.target rval.minimizer = minimizer([rval.input, rval.target], rval.regularized_cost, rval.params) return rval.make(mode='FAST_RUN') #TODO: grouping parameters by prefix does not play well with providing defaults. Think... #FIX : Guillaume suggested a convention: plugin handlers (dataset_factory, minimizer_factory, # etc.) should never provide default arguments for parameters, and accept **kwargs to catch # irrelevant parameters. #SOLUTION: the jobman deals in nested dictionaries. This means that there is no [dumb] reason that # irrelevant arguments should be passed at all. class _fit_logreg_defaults(object): minimizer_algo = 'dummy' #minimizer_lr = 0.001 dataset = 'MNIST_1k' l1 = 0.0 l2 = 0.0 batchsize = 8 verbose = 1 def fit_logistic_regression_online(state, channel=lambda *args, **kwargs:None): #use stochastic gradient descent state.use_defaults(_fit_logreg_defaults) dataset = make(state.dataset) train = dataset.train valid = dataset.valid test = dataset.test logreg = logistic_regression( n_in=train.x.shape[1], n_out=dataset.n_classes, l2=state.l2, l1=state.l1, minimizer=make_minimizer(**state.subdict(prefix='minimizer_'))) batchsize = state.batchsize verbose = state.verbose iter = [0] def step(): # step by making a pass through the training set for j in xrange(0,len(train.x)-batchsize+1,batchsize): cost_j = logreg.minimizer.step_cost(train.x[j:j+batchsize], train.y[j:j+batchsize]) if verbose > 1: print 'estimated train cost', cost_j #TODO: consult iter[0] for periodic saving to cwd (model, minimizer, and stopper) def check(): validate = logreg.validate(valid.x, valid.y) if verbose > 0: print 'iter', iter[0], 'validate', validate sys.stdout.flush() iter[0] += 1 return validate[0] def save(): return copy.deepcopy(logreg) stopper = make_stopper(**state.subdict(prefix='stopper_')) stopper.find_min(step, check, save) state.train_01, state.train_rcost, state.train_cost = logreg.validate(train.x, train.y) state.valid_01, state.valid_rcost, state.valid_cost = logreg.validate(valid.x, valid.y) state.test_01, state.test_rcost, state.test_cost = logreg.validate(test.x, test.y) state.n_train = len(train.y) state.n_valid = len(valid.y) state.n_test = len(test.y) class LogReg2(module.FancyModule): def __init__(self, input=None, targ=None, w=None, b=None, lr=None, regularize=False): super(LogReg2, self).__init__() #boilerplate if input is not None: self.input = (input) else: self.input = T.matrix('input') if targ is not None: self.targ = (targ) else: self.targ = T.lcol() #self.input = (input) if input is not None else T.matrix('input') #self.targ = (targ) if targ is not None else T.lcol() if w is not None: self.w = (w) else: self.w = (T.dmatrix()) if b is not None: self.b = (b) else: self.b = (T.dvector()) if lr is not None: self.lr = (lr) else: self.lr = (T.scalar()) #backport #self.w = (w) if w is not None else (T.dmatrix()) #self.b = (b) if b is not None else (T.dvector()) #self.lr = (lr) if lr is not None else (T.dscalar()) self.params = [] for p in [self.w, self.b]: if p.owner is None: self.params += [p] #backport #self.params = [p for p in [self.w, self.b] if p.owner is None] output = nnet.sigmoid(T.dot(self.x, self.w) + self.b) xent = -self.targ * T.log(output) - (1.0 - self.targ) * T.log(1.0 - output) mean_xent = T.mean(xent) self.output = output self.xent = xent self.mean_xent = mean_xent self.cost = mean_xent #define the apply method self.pred = (T.dot(self.input, self.w) + self.b) > 0.0 self.apply = module.Method([self.input], self.pred) #if this module has any internal parameters, define an update function for them if self.params: gparams = T.grad(mean_xent, self.params) self.update = module.Method([self.input, self.targ], mean_xent, updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gparams))) class classification: #this would go to a file called pylearn/algorithms/classification.py @staticmethod def xent(p, q): """cross-entropy (row-wise) :type p: M x N symbolic matrix (sparse or dense) :param p: each row is a true distribution over N things :type q: M x N symbolic matrix (sparse or dense) :param q: each row is an approximating distribution over N things :rtype: symbolic vector of length M :returns: the cross entropy between each row of p and the corresponding row of q. Hint: To sum row-wise costs into a scalar value, use "xent(p, q).sum()" """ return (p * tensor.log(q)).sum(axis=1) @staticmethod def errors(target, prediction): """classification error (row-wise) :type p: M x N symbolic matrix (sparse or dense) :param p: each row is a true distribution over N things :type q: M x N symbolic matrix (sparse or dense) :param q: each row is an approximating distribution over N things :rtype: symbolic vector of length M :returns: a vector with 0 for every row pair that has a maximum in the same position, and 1 for every other row pair. Hint: Count errors with "errors(prediction, target).sum()", and get the error-rate with "errors(prediction, target).mean()" """ return tensor.neq( tensor.argmax(prediction, axis=1), tensor.argmax(target, axis=1)) class LogReg_New(module.FancyModule): """A symbolic module for performing multi-class logistic regression.""" params = property( lambda self: [p for p in [self.w, self.b] if p.owner is None], doc="WRITEME" ) def __init__(self, n_in=None, n_out=None, w=None, b=None): super(LogRegNew, self).__init__() #boilerplate if w is not None: self.w = w else: self.w = (T.dmatrix()) if b is not None: self.b = b else: self.b = (T.dvector()) self.n_in = n_in self.n_out = n_out #backport #self.w = w if w is not None else (T.dmatrix()) #self.b = b if b is not None else (T.dvector()) def _instance_initialize(self, obj): obj.w = N.zeros((self.n_in, self.n_out)) obj.b = N.zeros(self.n_out) obj.__pp_hide__ = ['params'] def l1(self): return abs(self.w).sum() def l2(self): return (self.w**2).sum() def activation(self, input): return theano.dot(input, self.w) + self.b def softmax(self, input): return nnet.softmax(self.activation(input)) def argmax(self, input): return tensor.argmax(self.activation(input)) def xent(self, input, target): return classification.xent(target, self.softmax(input)) def errors(self, input, target): return classification.errors(target, self.softmax(input))