Mercurial > pylearn
view examples/linear_classifier.py @ 472:69c800af1370
changed weight initialization for logistic regression
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Thu, 23 Oct 2008 13:26:42 -0400 |
parents | 4060812caa22 |
children |
line wrap: on
line source
#! /usr/bin/env python """ T. Bertin-Mahieux (2008) University of Montreal bertinmt@iro.umontreal.ca linear_classifier.py Simple script that creates a linear_classifier, and learns the parameters using backpropagation. This is to illustrate how to use theano/pylearn. Anyone who knows how to make this script simpler/clearer is welcome to make the modifications. """ import os import sys import time import copy import pickle import numpy import numpy as N import numpy.random as NR from pylearn import cost import theano from theano import tensor as T def cost_function(*args,**kwargs) : """ default cost function, quadratic """ return cost.quadratic(*args,**kwargs) class modelgraph() : """ class that contains the graph of the model """ lr = T.scalar() # learning rate inputs = T.matrix() # inputs (one example per line) true_outputs = T.matrix() # outputs (one example per line) W = T.matrix() # weights input * W + b= output b = T.vector() # bias outputs = T.dot(inputs,W) + b # output, one per line costs = cost_function(true_outputs,outputs) # costs g_W = T.grad(costs,W) # gradient of W g_b = T.grad(costs,b) # gradient of b new_W = T.sub_inplace(W, lr * g_W) # update inplace of W new_b = T.sub_inplace(b, lr * g_b) # update inplace of b class model() : """ The model! Contains needed matrices, needed functions, and a link to the model graph. """ def __init__(self,input_size,output_size) : """ init matrix and bias, creates the graph, create a dict of compiled functions """ # graph self.graph = modelgraph() # weights and bias, saved in self.params seed = 666 r = NR.RandomState(seed) W = r.uniform(size = [input_size, output_size], low = -1/N.sqrt(input_size), high = 1/N.sqrt(input_size)) b = numpy.zeros((output_size, )) self.params = [W,b] # dictionary of compiled functions self.func_dict = dict() # keep some init_infos (may not be necessary) self.init_params = [input_size,output_size] def update(self,lr,true_inputs,true_outputs) : """ does an update of the model, one gradient descent """ # do we already have the proper theano function? if self.func_dict.has_key('update_func') : self.func_dict['update_func'](lr,true_inputs,true_outputs,self.params[0],self.params[1]) return else : # create the theano function, tell him what are the inputs and outputs) func = theano.function([self.graph.lr,self.graph.inputs,self.graph.true_outputs, self.graph.W, self.graph.b], [self.graph.new_W,self.graph.new_b]) # add function to dictionary, so we don't compile it again self.func_dict['update_func'] = func # use this function func(lr,true_inputs,true_outputs,self.params[0],self.params[1]) return def costs(self,true_inputs,true_outputs) : """ get the costs for given examples, don't update """ # do we already have the proper theano function? if self.func_dict.has_key('costs_func') : return self.func_dict['costs_func'](true_inputs,true_outputs,self.params[0],self.params[1]) else : # create the theano function, tell him what are the inputs and outputs) func = theano.function([self.graph.inputs,self.graph.true_outputs,self.graph.W,self.graph.b], [self.graph.costs]) # add function to dictionary, se we don't compile it again self.func_dict['costs_func'] = func # use this function return func(true_inputs,true_outputs,self.params[0],self.params[1]) def outputs(self,true_inputs) : """ get the output for a set of examples (could be called 'predict') """ # do we already have the proper theano function? if self.func_dict.has_key('outputs_func') : return self.func_dict['outputs_func'](true_inputs,self.params[0],self.params[1]) else : # create the theano function, tell him what are the inputs and outputs) func = theano.function([self.graph.inputs, self.graph.W, self.graph.b], [self.graph.outputs]) # add function to dictionary, se we don't compile it again self.func_dict['outputs_func'] = func # use this function return func(true_inputs,self.params[0],self.params[1]) def __getitem__(self,inputs) : """ for simplicity, we can use the model this way: predictions = model[inputs] """ return self.outputs(inputs) def __getstate__(self) : """ To save/copy the model, used by pickle.dump() and by copy.deepcopy(). @return a dictionnary with the params (matrix + bias) """ d = dict() d['params'] = self.params d['init_params'] = self.init_params return d def __setstate__(self,d) : """ Get the dictionary created by __getstate__(), use it to recreate the model. """ self.params = d['params'] self.init_params = d['init_params'] self.graph = modelgraph() # we did not save the model graph def __str__(self) : """ returns a string representing the model """ res = "Linear regressor, input size =",str(self.init_params[0]) res += ", output size =", str(self.init_params[1]) return res def __equal__(self,other) : """ Compares the model based on the params. @return True if the params are the same, False otherwise """ # class if not isinstance(other,model) : return False # input size if self.params[0].shape[0] != other.params[0].shape[0] : return False # output size if self.params[0].shape[1] != other.params[0].shape[1] : return False # actual values if not (self.params[0] == other.params[0]).all(): return False if not (self.params[1] == other.params[1]).all(): return False # all good return True def die_with_usage() : """ help menu """ print 'simple script to illustrate how to use theano/pylearn' print 'to launch:' print ' python linear_classifier.py -launch' sys.exit(0) #************************************************************ # main if __name__ == '__main__' : if len(sys.argv) < 2 : die_with_usage() # print create data inputs = numpy.array([[.1,.2], [.2,.8], [.9,.3], [.6,.5]]) outputs = numpy.array([[0], [0], [1], [1]]) assert inputs.shape[0] == outputs.shape[0] # create model m = model(2,1) # predict print 'prediction before training:' print m[inputs] # update it for 100 iterations for k in range(50) : m.update(.1,inputs,outputs) # predict print 'prediction after training:' print m[inputs] # show points import pylab as P colors = outputs.flatten().tolist() x = inputs[:,0] y = inputs[:,1] P.plot(x[numpy.where(outputs==0)[0]],y[numpy.where(outputs==0)[0]],'r+') P.plot(x[numpy.where(outputs==1)[0]],y[numpy.where(outputs==1)[0]],'b+') # decision line p1 = (.5 - m.params[1] * 1.) / m.params[0][1,0] # abs = 0 p2 = (.5 - m.params[1] * 1.) / m.params[0][0,0] # ord = 0 P.plot((0,p2[0],2*p2[0]),(p1[0],0,-p1[0]),'g-') # show P.axis([-1,2,-1,2]) P.show()