view examples/linear_classifier.py @ 472:69c800af1370

changed weight initialization for logistic regression
author James Bergstra <bergstrj@iro.umontreal.ca>
date Thu, 23 Oct 2008 13:26:42 -0400
parents 4060812caa22
children
line wrap: on
line source

#! /usr/bin/env python
"""
T. Bertin-Mahieux (2008) University of Montreal
bertinmt@iro.umontreal.ca

linear_classifier.py
Simple script that creates a linear_classifier, and
learns the parameters using backpropagation.

This is to illustrate how to use theano/pylearn.
Anyone who knows how to make this script simpler/clearer is welcome to
make the modifications.
"""


import os
import sys
import time
import copy
import pickle
import numpy
import numpy as N
import numpy.random as NR
from pylearn import cost
import theano
from theano import tensor as T


def cost_function(*args,**kwargs) :
    """ default cost function, quadratic """
    return cost.quadratic(*args,**kwargs)


class modelgraph() :
    """ class that contains the graph of the model """
    lr = T.scalar()                              # learning rate
    inputs = T.matrix()                          # inputs (one example per line)
    true_outputs = T.matrix()                    # outputs (one example per line)
    W = T.matrix()                               # weights input * W + b= output
    b = T.vector()                               # bias
    outputs = T.dot(inputs,W) + b                # output, one per line
    costs = cost_function(true_outputs,outputs)  # costs
    g_W = T.grad(costs,W)                        # gradient of W
    g_b = T.grad(costs,b)                        # gradient of b
    new_W = T.sub_inplace(W, lr * g_W)           # update inplace of W
    new_b = T.sub_inplace(b, lr * g_b)           # update inplace of b


class model() :
    """ 
    The model! 
    Contains needed matrices, needed functions, and a link to the model graph.
    """

    def __init__(self,input_size,output_size) :
        """ init matrix and bias, creates the graph, create a dict of compiled functions """
        # graph
        self.graph = modelgraph()
        # weights and bias, saved in self.params
        seed = 666
        r = NR.RandomState(seed)
        W = r.uniform(size = [input_size, output_size], low = -1/N.sqrt(input_size), high = 1/N.sqrt(input_size))
        b = numpy.zeros((output_size, ))
        self.params = [W,b]
        # dictionary of compiled functions
        self.func_dict = dict()
        # keep some init_infos (may not be necessary)
        self.init_params = [input_size,output_size]


    def update(self,lr,true_inputs,true_outputs) :
        """ does an update of the model, one gradient descent """
        # do we already have the proper theano function?
        if self.func_dict.has_key('update_func') :
            self.func_dict['update_func'](lr,true_inputs,true_outputs,self.params[0],self.params[1])
            return
        else :
            # create the theano function, tell him what are the inputs and outputs)
            func = theano.function([self.graph.lr,self.graph.inputs,self.graph.true_outputs,
                                self.graph.W, self.graph.b],
                               [self.graph.new_W,self.graph.new_b])
            # add function to dictionary, so we don't compile it again
            self.func_dict['update_func'] = func
            # use this function
            func(lr,true_inputs,true_outputs,self.params[0],self.params[1])
            return
    
    def costs(self,true_inputs,true_outputs) :
        """ get the costs for given examples, don't update """
        # do we already have the proper theano function?
        if self.func_dict.has_key('costs_func') :
            return self.func_dict['costs_func'](true_inputs,true_outputs,self.params[0],self.params[1])
        else :
            # create the theano function, tell him what are the inputs and outputs)
            func = theano.function([self.graph.inputs,self.graph.true_outputs,self.graph.W,self.graph.b],
                               [self.graph.costs])
            # add function to dictionary, se we don't compile it again
            self.func_dict['costs_func'] = func
            # use this function
            return func(true_inputs,true_outputs,self.params[0],self.params[1])

    def outputs(self,true_inputs) :
        """ get the output for a set of examples (could be called 'predict') """
        # do we already have the proper theano function?
        if self.func_dict.has_key('outputs_func') :
            return self.func_dict['outputs_func'](true_inputs,self.params[0],self.params[1])
        else :
            # create the theano function, tell him what are the inputs and outputs)
            func = theano.function([self.graph.inputs, self.graph.W, self.graph.b],
                               [self.graph.outputs])
            # add function to dictionary, se we don't compile it again
            self.func_dict['outputs_func'] = func
            # use this function
            return func(true_inputs,self.params[0],self.params[1])

    def __getitem__(self,inputs) :
        """ for simplicity, we can use the model this way: predictions = model[inputs] """
        return self.outputs(inputs)

    def __getstate__(self) :
        """
        To save/copy the model, used by pickle.dump() and by copy.deepcopy().
        @return a dictionnary with the params (matrix + bias)
        """
        d = dict()
        d['params'] = self.params
        d['init_params'] = self.init_params
        return d
        
    def __setstate__(self,d) :
        """
        Get the dictionary created by __getstate__(), use it to recreate the model.
        """
        self.params = d['params']
        self.init_params = d['init_params']
        self.graph = modelgraph() # we did not save the model graph 

    def __str__(self) :
        """ returns a string representing the model """
        res = "Linear regressor, input size =",str(self.init_params[0])
        res += ", output size =", str(self.init_params[1])
        return res

    def __equal__(self,other) :
        """ 
        Compares the model based on the params.
        @return True if the params are the same, False otherwise
        """
        # class
        if not isinstance(other,model) :
            return False
        # input size
        if self.params[0].shape[0] != other.params[0].shape[0] :
            return False
        # output size
        if self.params[0].shape[1] != other.params[0].shape[1] :
            return False
        # actual values
        if not (self.params[0] == other.params[0]).all():
            return False
        if not (self.params[1] == other.params[1]).all():
            return False
        # all good
        return True


def die_with_usage() :
    """ help menu """
    print 'simple script to illustrate how to use theano/pylearn'
    print 'to launch:'
    print '  python linear_classifier.py -launch'
    sys.exit(0)



#************************************************************
# main

if __name__ == '__main__' :

    if len(sys.argv) < 2 :
        die_with_usage()

    # print create data
    inputs = numpy.array([[.1,.2],
                          [.2,.8],
                          [.9,.3],
                          [.6,.5]])
    outputs = numpy.array([[0],
                           [0],
                           [1],
                           [1]])
    assert inputs.shape[0] == outputs.shape[0]

    # create model
    m = model(2,1)
    
    # predict
    print 'prediction before training:'
    print m[inputs]

    # update it for 100 iterations
    for k in range(50) :
        m.update(.1,inputs,outputs)

     # predict
    print 'prediction after training:'
    print m[inputs]

    # show points
    import pylab as P
    colors = outputs.flatten().tolist()
    x = inputs[:,0]
    y = inputs[:,1]
    P.plot(x[numpy.where(outputs==0)[0]],y[numpy.where(outputs==0)[0]],'r+')
    P.plot(x[numpy.where(outputs==1)[0]],y[numpy.where(outputs==1)[0]],'b+')
    # decision line
    p1 = (.5 - m.params[1] * 1.) / m.params[0][1,0] # abs = 0
    p2 = (.5 - m.params[1] * 1.) / m.params[0][0,0] # ord = 0
    P.plot((0,p2[0],2*p2[0]),(p1[0],0,-p1[0]),'g-')
    # show
    P.axis([-1,2,-1,2])
    P.show()