view mlp.py @ 185:3d953844abd3

support for more int types in crossentropysoftmax1hot
author James Bergstra <bergstrj@iro.umontreal.ca>
date Tue, 13 May 2008 19:37:29 -0400
parents 25d0a0c713da
children 562f308873f0
line wrap: on
line source

"""
A straightforward classicial feedforward
one-hidden-layer neural net, with L2 regularization.
This is one of the simplest example of L{Learner}, and illustrates
the use of theano.
"""

from learner import *
from theano import tensor as t
from nnet_ops import *
import math
from misc import *

class OneHiddenLayerNNetClassifier(OnlineGradientTLearner):
    """
    Implement a straightforward classicial feedforward
    one-hidden-layer neural net, with L2 regularization.

    The predictor parameters are obtained by minibatch/online gradient descent.
    Training can proceed sequentially (with multiple calls to update with
    different disjoint subsets of the training sets).

    Hyper-parameters:
      - L2_regularizer
      - learning_rate
      - n_hidden

    For each (input_t,output_t) pair in a minibatch,::

       output_activations_t = b2+W2*tanh(b1+W1*input_t)
       output_t = softmax(output_activations_t)
       output_class_t = argmax(output_activations_t)
       class_error_t = 1_{output_class_t != target_t}
       nll_t = -log(output_t[target_t])

    and the training criterion is::

       loss = L2_regularizer*(||W1||^2 + ||W2||^2) + sum_t nll_t

    The parameters are [b1,W1,b2,W2] and are obtained by minimizing the loss by
    stochastic minibatch gradient descent::

       parameters[i] -= learning_rate * dloss/dparameters[i]
       
    The fields and attributes expected and produced by use and update are the following:

     - Input and output fields (example-wise quantities):

       - 'input' (always expected by use and update)
       - 'target' (optionally expected by use and always by update)
       - 'output' (optionally produced by use)
       - 'output_class' (optionally produced by use)
       - 'class_error' (optionally produced by use)
       - 'nll' (optionally produced by use)
       
     - optional attributes (optionally expected as input_dataset attributes)
       (warning, this may be dangerous, the 'use' method will use those provided in the 
       input_dataset rather than those learned during 'update'; currently no support
       for providing these to update):
       
       - 'L2_regularizer'
       - 'b1' 
       - 'W1'
       - 'b2' 
       - 'W2'
       - 'parameters' = [b1, W1, b2, W2]
       - 'regularization_term'

    """
    def __init__(self,n_hidden,n_classes,learning_rate,max_n_epochs,L2_regularizer=0,init_range=1.,n_inputs=None,minibatch_size=None,linker='c|py'):
        self._n_inputs = n_inputs
        self._n_outputs = n_classes
        self._n_hidden = n_hidden
        self._init_range = init_range
        self._max_n_epochs = max_n_epochs
        self._minibatch_size = minibatch_size
        self.learning_rate = learning_rate # this is the float
        self.L2_regularizer = L2_regularizer
        self._learning_rate = t.scalar('learning_rate') # this is the symbol
        self._input = t.matrix('input') # n_examples x n_inputs
        self._target = t.lmatrix('target') # n_examples x 1
        self._target_vector = self._target[:,0]
        self._L2_regularizer = t.scalar('L2_regularizer')
        self._W1 = t.matrix('W1')
        self._W2 = t.matrix('W2')
        self._b1 = t.row('b1')
        self._b2 = t.row('b2')
        self._regularization_term = self._L2_regularizer * (t.sum(self._W1*self._W1) + t.sum(self._W2*self._W2))
        self._output_activations =self._b2+t.dot(t.tanh(self._b1+t.dot(self._input,self._W1.T)),self._W2.T)
        self._nll,self._output = crossentropy_softmax_1hot(self._output_activations,self._target_vector)
        self._output_class = t.argmax(self._output,1)
        self._class_error = t.neq(self._output_class,self._target_vector)
        self._minibatch_criterion = self._nll + self._regularization_term / t.shape(self._input)[0]
        OnlineGradientTLearner.__init__(self, linker = linker)
            
    def attributeNames(self):
        return ["parameters","b1","W2","b2","W2", "L2_regularizer","regularization_term"]

    def parameterAttributes(self):
        return ["b1","W1", "b2", "W2"]
    
    def updateMinibatchInputFields(self):
        return ["input","target"]
    
    def updateMinibatchInputAttributes(self):
        return OnlineGradientTLearner.updateMinibatchInputAttributes(self)+["L2_regularizer"]
    
    def updateEndOutputAttributes(self):
        return ["regularization_term"]

    def lossAttribute(self):
        return "minibatch_criterion"
    
    def defaultOutputFields(self, input_fields):
        output_fields = ["output", "output_class",]
        if "target" in input_fields:
            output_fields += ["class_error", "nll"]
        return output_fields
        
    def updateMinibatch(self,minibatch):
        MinibatchUpdatesTLearner.updateMinibatch(self,minibatch)
        #print self.nll

    def allocate(self,minibatch):
        minibatch_n_inputs  = minibatch["input"].shape[1]
        if not self._n_inputs:
            self._n_inputs = minibatch_n_inputs
            self.b1 = numpy.zeros((1,self._n_hidden))
            self.b2 = numpy.zeros((1,self._n_outputs))
            self.forget()
        elif self._n_inputs!=minibatch_n_inputs:
            # if the input changes dimension on the fly, we resize and forget everything
            self.forget()
            
    def forget(self):
        if self._n_inputs:
            r = self._init_range/math.sqrt(self._n_inputs)
            self.W1 = numpy.random.uniform(low=-r,high=r,
                                           size=(self._n_hidden,self._n_inputs))
            r = self._init_range/math.sqrt(self._n_hidden)
            self.W2 = numpy.random.uniform(low=-r,high=r,
                                           size=(self._n_outputs,self._n_hidden))
            self.b1[:]=0
            self.b2[:]=0
            self._n_epochs=0

    def isLastEpoch(self):
        self._n_epochs +=1
        return self._n_epochs>=self._max_n_epochs

    def debug_updateMinibatch(self,minibatch):
        # make sure all required fields are allocated and initialized
        self.allocate(minibatch)
        input_attributes = self.names2attributes(self.updateMinibatchInputAttributes())
        input_fields = minibatch(*self.updateMinibatchInputFields())
        print 'input attributes', input_attributes
        print 'input fields', input_fields
        results = self.update_minibatch_function(*(input_attributes+input_fields))
        print 'output attributes', self.updateMinibatchOutputAttributes()
        print 'results', results
        self.setAttributes(self.updateMinibatchOutputAttributes(),
                           results)

        if 0:
            print 'n0', self.names2OpResults(self.updateMinibatchOutputAttributes()+ self.updateMinibatchInputFields())
            print 'n1', self.names2OpResults(self.updateMinibatchOutputAttributes())
            print 'n2', self.names2OpResults(self.updateEndInputAttributes())
            print 'n3', self.names2OpResults(self.updateEndOutputAttributes())