Mercurial > pylearn
diff examples/linear_classifier.py @ 428:52b4908d8971
simple example of theano
author | Thierry Bertin-Mahieux <bertinmt@iro.umontreal.ca> |
---|---|
date | Fri, 25 Jul 2008 16:59:57 -0400 |
parents | |
children | 4060812caa22 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/examples/linear_classifier.py Fri Jul 25 16:59:57 2008 -0400 @@ -0,0 +1,224 @@ +#! /usr/bin/env python +""" +T. Bertin-Mahieux (2008) University of Montreal +bertinmt@iro.umontreal.ca + +linear_classifier.py +Simple script that creates a linear_classifier, and +learns the paramters using backpropagation. + +This is to illustrate how to use theano/pylearn. +Anyone that knows how to make this script simpler/clearer is welcomed to +make the modifications. +""" + + +import os +import sys +import time +import copy +import pickle +import numpy +import numpy as N +import numpy.random as NR +from pylearn import cost +import theano +from theano import tensor as T + + +def cost_function(*args,**kwargs) : + """ default cost function, quadratic """ + return cost.quadratic(*args,**kwargs) + + +class modelgraph() : + """ class that contains the graph of the model """ + lr = T.scalar() # learning rate + inputs = T.matrix() # inputs (one example per line) + true_outputs = T.matrix() # outputs (one example per line) + W = T.matrix() # weights input * W + b= output + b = T.vector() # bias + outputs = T.dot(inputs,W) + b # output, one per line + costs = cost_function(true_outputs,outputs) # costs + g_W = T.grad(costs,W) # gradient of W + g_b = T.grad(costs,b) # gradient of b + new_W = T.sub_inplace(W, lr * g_W) # update inplace of W + new_b = T.sub_inplace(b, lr * g_b) # update inplace of b + + +class model() : + """ + The model! + Contains needed matrices, needed functions, and a link to the model graph. + """ + + def __init__(self,input_size,output_size) : + """ init matrix and bias, creates the graph, create a dict of compiled functions """ + # graph + self.graph = modelgraph() + # weights and bias, saved in self.params + seed = 666 + r = NR.RandomState(seed) + W = r.uniform(size = [input_size, output_size], low = -1/N.sqrt(input_size), high = 1/N.sqrt(input_size)) + b = numpy.zeros((output_size, )) + self.params = [W,b] + # dictionary of compiled functions + self.func_dict = dict() + # keep some init_infos (may not be necessary) + self.init_params = [input_size,output_size] + + + def update(self,lr,true_inputs,true_outputs) : + """ does an update of the model, one gradient descent """ + # do we already have the proper theano function? + if self.func_dict.has_key('update_func') : + self.func_dict['update_func'](lr,true_inputs,true_outputs,self.params[0],self.params[1]) + return + else : + # create the theano function, tell him what are the inputs and outputs) + func = theano.function([self.graph.lr,self.graph.inputs,self.graph.true_outputs, + self.graph.W, self.graph.b], + [self.graph.new_W,self.graph.new_b]) + # add function to dictionary, so we don't compile it again + self.func_dict['update_func'] = func + # use this function + func(lr,true_inputs,true_outputs,self.params[0],self.params[1]) + return + + def costs(self,true_inputs,true_outputs) : + """ get the costs for given examples, don't update """ + # do we already have the proper theano function? + if self.func_dict.has_key('costs_func') : + return self.func_dict['costs_func'](true_inputs,true_outputs,self.params[0],self.params[1]) + else : + # create the theano function, tell him what are the inputs and outputs) + func = theano.function([self.graph.inputs,self.graph.true_outputs,self.graph.W,self.graph.b], + [self.graph.costs]) + # add function to dictionary, se we don't compile it again + self.func_dict['costs_func'] = func + # use this function + return func(true_inputs,true_outputs,self.params[0],self.params[1]) + + def outputs(self,true_inputs) : + """ get the output for a set of examples (could be called 'predict') """ + # do we already have the proper theano function? + if self.func_dict.has_key('outputs_func') : + return self.func_dict['outputs_func'](true_inputs,self.params[0],self.params[1]) + else : + # create the theano function, tell him what are the inputs and outputs) + func = theano.function([self.graph.inputs, self.graph.W, self.graph.b], + [self.graph.outputs]) + # add function to dictionary, se we don't compile it again + self.func_dict['outputs_func'] = func + # use this function + return func(true_inputs,self.params[0],self.params[1]) + + def __getitem__(self,inputs) : + """ for simplicity, we can use the model this way: predictions = model[inputs] """ + return self.outputs(inputs) + + def __getstate__(self) : + """ + To save/copy the model, used by pickle.dump() and by copy.deepcopy(). + @return a dictionnary with the params (matrix + bias) + """ + d = dict() + d['params'] = self.params + d['init_params'] = self.init_params + return d + + def __setstate__(self,d) : + """ + Get the dictionary created by __getstate__(), use it to recreate the model. + """ + self.params = d['params'] + self.init_params = d['init_params'] + self.graph = modelgraph() # we did not save the model graph + + def __str__(self) : + """ returns a string representing the model """ + res = "Linear regressor, input size =",str(self.init_params[0]) + res += ", output size =", str(self.init_params[1]) + return res + + def __equal__(self,other) : + """ + Compares the model based on the params. + @return True if the params are the same, False otherwise + """ + # class + if not isinstance(other,model) : + return False + # input size + if self.params[0].shape[0] != other.params[0].shape[0] : + return False + # output size + if self.params[0].shape[1] != other.params[0].shape[1] : + return False + # actual values + if not (self.params[0] == other.params[0]).all(): + return False + if not (self.params[1] == other.params[1]).all(): + return False + # all good + return True + + +def die_with_usage() : + """ help menu """ + print 'simple script to illustrate how to use theano/pylearn' + print 'to launch:' + print ' python linear_classifier.py -launch' + sys.exit(0) + + + +#************************************************************ +# main + +if __name__ == '__main__' : + + if len(sys.argv) < 2 : + die_with_usage() + + # print create data + inputs = numpy.array([[.1,.2], + [.2,.8], + [.9,.3], + [.6,.5]]) + outputs = numpy.array([[0], + [0], + [1], + [1]]) + assert inputs.shape[0] == outputs.shape[0] + + # create model + m = model(2,1) + + # predict + print 'prediction before training:' + print m[inputs] + + # update it for 100 iterations + for k in range(50) : + m.update(.1,inputs,outputs) + + # predict + print 'prediction after training:' + print m[inputs] + + # show points + import pylab as P + colors = outputs.flatten().tolist() + x = inputs[:,0] + y = inputs[:,1] + P.plot(x[numpy.where(outputs==0)[0]],y[numpy.where(outputs==0)[0]],'r+') + P.plot(x[numpy.where(outputs==1)[0]],y[numpy.where(outputs==1)[0]],'b+') + # decision line + p1 = (.5 - m.params[1] * 1.) / m.params[0][1,0] # abs = 0 + p2 = (.5 - m.params[1] * 1.) / m.params[0][0,0] # ord = 0 + P.plot((0,p2[0],2*p2[0]),(p1[0],0,-p1[0]),'g-') + # show + P.axis([-1,2,-1,2]) + P.show() +