changeset 429:2bde0bed1919

simple example of theano
author Thierry Bertin-Mahieux <bertinmt@iro.umontreal.ca>
date Fri, 25 Jul 2008 17:02:28 -0400
parents fa4a5fee53ce
children c096e2820131
files examples/linear_classifier.py
diffstat 1 files changed, 224 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/examples/linear_classifier.py	Fri Jul 25 17:02:28 2008 -0400
@@ -0,0 +1,224 @@
+#! /usr/bin/env python
+"""
+T. Bertin-Mahieux (2008) University of Montreal
+bertinmt@iro.umontreal.ca
+
+linear_classifier.py
+Simple script that creates a linear_classifier, and
+learns the paramters using backpropagation.
+
+This is to illustrate how to use theano/pylearn.
+Anyone that knows how to make this script simpler/clearer is welcomed to
+make the modifications.
+"""
+
+
+import os
+import sys
+import time
+import copy
+import pickle
+import numpy
+import numpy as N
+import numpy.random as NR
+from pylearn import cost
+import theano
+from theano import tensor as T
+
+
+def cost_function(*args,**kwargs) :
+    """ default cost function, quadratic """
+    return cost.quadratic(*args,**kwargs)
+
+
+class modelgraph() :
+    """ class that contains the graph of the model """
+    lr = T.scalar()                              # learning rate
+    inputs = T.matrix()                          # inputs (one example per line)
+    true_outputs = T.matrix()                    # outputs (one example per line)
+    W = T.matrix()                               # weights input * W + b= output
+    b = T.vector()                               # bias
+    outputs = T.dot(inputs,W) + b                # output, one per line
+    costs = cost_function(true_outputs,outputs)  # costs
+    g_W = T.grad(costs,W)                        # gradient of W
+    g_b = T.grad(costs,b)                        # gradient of b
+    new_W = T.sub_inplace(W, lr * g_W)           # update inplace of W
+    new_b = T.sub_inplace(b, lr * g_b)           # update inplace of b
+
+
+class model() :
+    """ 
+    The model! 
+    Contains needed matrices, needed functions, and a link to the model graph.
+    """
+
+    def __init__(self,input_size,output_size) :
+        """ init matrix and bias, creates the graph, create a dict of compiled functions """
+        # graph
+        self.graph = modelgraph()
+        # weights and bias, saved in self.params
+        seed = 666
+        r = NR.RandomState(seed)
+        W = r.uniform(size = [input_size, output_size], low = -1/N.sqrt(input_size), high = 1/N.sqrt(input_size))
+        b = numpy.zeros((output_size, ))
+        self.params = [W,b]
+        # dictionary of compiled functions
+        self.func_dict = dict()
+        # keep some init_infos (may not be necessary)
+        self.init_params = [input_size,output_size]
+
+
+    def update(self,lr,true_inputs,true_outputs) :
+        """ does an update of the model, one gradient descent """
+        # do we already have the proper theano function?
+        if self.func_dict.has_key('update_func') :
+            self.func_dict['update_func'](lr,true_inputs,true_outputs,self.params[0],self.params[1])
+            return
+        else :
+            # create the theano function, tell him what are the inputs and outputs)
+            func = theano.function([self.graph.lr,self.graph.inputs,self.graph.true_outputs,
+                                self.graph.W, self.graph.b],
+                               [self.graph.new_W,self.graph.new_b])
+            # add function to dictionary, so we don't compile it again
+            self.func_dict['update_func'] = func
+            # use this function
+            func(lr,true_inputs,true_outputs,self.params[0],self.params[1])
+            return
+    
+    def costs(self,true_inputs,true_outputs) :
+        """ get the costs for given examples, don't update """
+        # do we already have the proper theano function?
+        if self.func_dict.has_key('costs_func') :
+            return self.func_dict['costs_func'](true_inputs,true_outputs,self.params[0],self.params[1])
+        else :
+            # create the theano function, tell him what are the inputs and outputs)
+            func = theano.function([self.graph.inputs,self.graph.true_outputs,self.graph.W,self.graph.b],
+                               [self.graph.costs])
+            # add function to dictionary, se we don't compile it again
+            self.func_dict['costs_func'] = func
+            # use this function
+            return func(true_inputs,true_outputs,self.params[0],self.params[1])
+
+    def outputs(self,true_inputs) :
+        """ get the output for a set of examples (could be called 'predict') """
+        # do we already have the proper theano function?
+        if self.func_dict.has_key('outputs_func') :
+            return self.func_dict['outputs_func'](true_inputs,self.params[0],self.params[1])
+        else :
+            # create the theano function, tell him what are the inputs and outputs)
+            func = theano.function([self.graph.inputs, self.graph.W, self.graph.b],
+                               [self.graph.outputs])
+            # add function to dictionary, se we don't compile it again
+            self.func_dict['outputs_func'] = func
+            # use this function
+            return func(true_inputs,self.params[0],self.params[1])
+
+    def __getitem__(self,inputs) :
+        """ for simplicity, we can use the model this way: predictions = model[inputs] """
+        return self.outputs(inputs)
+
+    def __getstate__(self) :
+        """
+        To save/copy the model, used by pickle.dump() and by copy.deepcopy().
+        @return a dictionnary with the params (matrix + bias)
+        """
+        d = dict()
+        d['params'] = self.params
+        d['init_params'] = self.init_params
+        return d
+        
+    def __setstate__(self,d) :
+        """
+        Get the dictionary created by __getstate__(), use it to recreate the model.
+        """
+        self.params = d['params']
+        self.init_params = d['init_params']
+        self.graph = modelgraph() # we did not save the model graph 
+
+    def __str__(self) :
+        """ returns a string representing the model """
+        res = "Linear regressor, input size =",str(self.init_params[0])
+        res += ", output size =", str(self.init_params[1])
+        return res
+
+    def __equal__(self,other) :
+        """ 
+        Compares the model based on the params.
+        @return True if the params are the same, False otherwise
+        """
+        # class
+        if not isinstance(other,model) :
+            return False
+        # input size
+        if self.params[0].shape[0] != other.params[0].shape[0] :
+            return False
+        # output size
+        if self.params[0].shape[1] != other.params[0].shape[1] :
+            return False
+        # actual values
+        if not (self.params[0] == other.params[0]).all():
+            return False
+        if not (self.params[1] == other.params[1]).all():
+            return False
+        # all good
+        return True
+
+
+def die_with_usage() :
+    """ help menu """
+    print 'simple script to illustrate how to use theano/pylearn'
+    print 'to launch:'
+    print '  python linear_classifier.py -launch'
+    sys.exit(0)
+
+
+
+#************************************************************
+# main
+
+if __name__ == '__main__' :
+
+    if len(sys.argv) < 2 :
+        die_with_usage()
+
+    # print create data
+    inputs = numpy.array([[.1,.2],
+                          [.2,.8],
+                          [.9,.3],
+                          [.6,.5]])
+    outputs = numpy.array([[0],
+                           [0],
+                           [1],
+                           [1]])
+    assert inputs.shape[0] == outputs.shape[0]
+
+    # create model
+    m = model(2,1)
+    
+    # predict
+    print 'prediction before training:'
+    print m[inputs]
+
+    # update it for 100 iterations
+    for k in range(50) :
+        m.update(.1,inputs,outputs)
+
+     # predict
+    print 'prediction after training:'
+    print m[inputs]
+
+    # show points
+    import pylab as P
+    colors = outputs.flatten().tolist()
+    x = inputs[:,0]
+    y = inputs[:,1]
+    P.plot(x[numpy.where(outputs==0)[0]],y[numpy.where(outputs==0)[0]],'r+')
+    P.plot(x[numpy.where(outputs==1)[0]],y[numpy.where(outputs==1)[0]],'b+')
+    # decision line
+    p1 = (.5 - m.params[1] * 1.) / m.params[0][1,0] # abs = 0
+    p2 = (.5 - m.params[1] * 1.) / m.params[0][0,0] # ord = 0
+    P.plot((0,p2[0],2*p2[0]),(p1[0],0,-p1[0]),'g-')
+    # show
+    P.axis([-1,2,-1,2])
+    P.show()
+