Mercurial > pylearn
comparison examples/linear_classifier.py @ 428:52b4908d8971
simple example of theano
author | Thierry Bertin-Mahieux <bertinmt@iro.umontreal.ca> |
---|---|
date | Fri, 25 Jul 2008 16:59:57 -0400 |
parents | |
children | 4060812caa22 |
comparison
equal
deleted
inserted
replaced
425:e2b46a8f2b7b | 428:52b4908d8971 |
---|---|
1 #! /usr/bin/env python | |
2 """ | |
3 T. Bertin-Mahieux (2008) University of Montreal | |
4 bertinmt@iro.umontreal.ca | |
5 | |
6 linear_classifier.py | |
7 Simple script that creates a linear_classifier, and | |
8 learns the paramters using backpropagation. | |
9 | |
10 This is to illustrate how to use theano/pylearn. | |
11 Anyone that knows how to make this script simpler/clearer is welcomed to | |
12 make the modifications. | |
13 """ | |
14 | |
15 | |
16 import os | |
17 import sys | |
18 import time | |
19 import copy | |
20 import pickle | |
21 import numpy | |
22 import numpy as N | |
23 import numpy.random as NR | |
24 from pylearn import cost | |
25 import theano | |
26 from theano import tensor as T | |
27 | |
28 | |
29 def cost_function(*args,**kwargs) : | |
30 """ default cost function, quadratic """ | |
31 return cost.quadratic(*args,**kwargs) | |
32 | |
33 | |
34 class modelgraph() : | |
35 """ class that contains the graph of the model """ | |
36 lr = T.scalar() # learning rate | |
37 inputs = T.matrix() # inputs (one example per line) | |
38 true_outputs = T.matrix() # outputs (one example per line) | |
39 W = T.matrix() # weights input * W + b= output | |
40 b = T.vector() # bias | |
41 outputs = T.dot(inputs,W) + b # output, one per line | |
42 costs = cost_function(true_outputs,outputs) # costs | |
43 g_W = T.grad(costs,W) # gradient of W | |
44 g_b = T.grad(costs,b) # gradient of b | |
45 new_W = T.sub_inplace(W, lr * g_W) # update inplace of W | |
46 new_b = T.sub_inplace(b, lr * g_b) # update inplace of b | |
47 | |
48 | |
49 class model() : | |
50 """ | |
51 The model! | |
52 Contains needed matrices, needed functions, and a link to the model graph. | |
53 """ | |
54 | |
55 def __init__(self,input_size,output_size) : | |
56 """ init matrix and bias, creates the graph, create a dict of compiled functions """ | |
57 # graph | |
58 self.graph = modelgraph() | |
59 # weights and bias, saved in self.params | |
60 seed = 666 | |
61 r = NR.RandomState(seed) | |
62 W = r.uniform(size = [input_size, output_size], low = -1/N.sqrt(input_size), high = 1/N.sqrt(input_size)) | |
63 b = numpy.zeros((output_size, )) | |
64 self.params = [W,b] | |
65 # dictionary of compiled functions | |
66 self.func_dict = dict() | |
67 # keep some init_infos (may not be necessary) | |
68 self.init_params = [input_size,output_size] | |
69 | |
70 | |
71 def update(self,lr,true_inputs,true_outputs) : | |
72 """ does an update of the model, one gradient descent """ | |
73 # do we already have the proper theano function? | |
74 if self.func_dict.has_key('update_func') : | |
75 self.func_dict['update_func'](lr,true_inputs,true_outputs,self.params[0],self.params[1]) | |
76 return | |
77 else : | |
78 # create the theano function, tell him what are the inputs and outputs) | |
79 func = theano.function([self.graph.lr,self.graph.inputs,self.graph.true_outputs, | |
80 self.graph.W, self.graph.b], | |
81 [self.graph.new_W,self.graph.new_b]) | |
82 # add function to dictionary, so we don't compile it again | |
83 self.func_dict['update_func'] = func | |
84 # use this function | |
85 func(lr,true_inputs,true_outputs,self.params[0],self.params[1]) | |
86 return | |
87 | |
88 def costs(self,true_inputs,true_outputs) : | |
89 """ get the costs for given examples, don't update """ | |
90 # do we already have the proper theano function? | |
91 if self.func_dict.has_key('costs_func') : | |
92 return self.func_dict['costs_func'](true_inputs,true_outputs,self.params[0],self.params[1]) | |
93 else : | |
94 # create the theano function, tell him what are the inputs and outputs) | |
95 func = theano.function([self.graph.inputs,self.graph.true_outputs,self.graph.W,self.graph.b], | |
96 [self.graph.costs]) | |
97 # add function to dictionary, se we don't compile it again | |
98 self.func_dict['costs_func'] = func | |
99 # use this function | |
100 return func(true_inputs,true_outputs,self.params[0],self.params[1]) | |
101 | |
102 def outputs(self,true_inputs) : | |
103 """ get the output for a set of examples (could be called 'predict') """ | |
104 # do we already have the proper theano function? | |
105 if self.func_dict.has_key('outputs_func') : | |
106 return self.func_dict['outputs_func'](true_inputs,self.params[0],self.params[1]) | |
107 else : | |
108 # create the theano function, tell him what are the inputs and outputs) | |
109 func = theano.function([self.graph.inputs, self.graph.W, self.graph.b], | |
110 [self.graph.outputs]) | |
111 # add function to dictionary, se we don't compile it again | |
112 self.func_dict['outputs_func'] = func | |
113 # use this function | |
114 return func(true_inputs,self.params[0],self.params[1]) | |
115 | |
116 def __getitem__(self,inputs) : | |
117 """ for simplicity, we can use the model this way: predictions = model[inputs] """ | |
118 return self.outputs(inputs) | |
119 | |
120 def __getstate__(self) : | |
121 """ | |
122 To save/copy the model, used by pickle.dump() and by copy.deepcopy(). | |
123 @return a dictionnary with the params (matrix + bias) | |
124 """ | |
125 d = dict() | |
126 d['params'] = self.params | |
127 d['init_params'] = self.init_params | |
128 return d | |
129 | |
130 def __setstate__(self,d) : | |
131 """ | |
132 Get the dictionary created by __getstate__(), use it to recreate the model. | |
133 """ | |
134 self.params = d['params'] | |
135 self.init_params = d['init_params'] | |
136 self.graph = modelgraph() # we did not save the model graph | |
137 | |
138 def __str__(self) : | |
139 """ returns a string representing the model """ | |
140 res = "Linear regressor, input size =",str(self.init_params[0]) | |
141 res += ", output size =", str(self.init_params[1]) | |
142 return res | |
143 | |
144 def __equal__(self,other) : | |
145 """ | |
146 Compares the model based on the params. | |
147 @return True if the params are the same, False otherwise | |
148 """ | |
149 # class | |
150 if not isinstance(other,model) : | |
151 return False | |
152 # input size | |
153 if self.params[0].shape[0] != other.params[0].shape[0] : | |
154 return False | |
155 # output size | |
156 if self.params[0].shape[1] != other.params[0].shape[1] : | |
157 return False | |
158 # actual values | |
159 if not (self.params[0] == other.params[0]).all(): | |
160 return False | |
161 if not (self.params[1] == other.params[1]).all(): | |
162 return False | |
163 # all good | |
164 return True | |
165 | |
166 | |
167 def die_with_usage() : | |
168 """ help menu """ | |
169 print 'simple script to illustrate how to use theano/pylearn' | |
170 print 'to launch:' | |
171 print ' python linear_classifier.py -launch' | |
172 sys.exit(0) | |
173 | |
174 | |
175 | |
176 #************************************************************ | |
177 # main | |
178 | |
179 if __name__ == '__main__' : | |
180 | |
181 if len(sys.argv) < 2 : | |
182 die_with_usage() | |
183 | |
184 # print create data | |
185 inputs = numpy.array([[.1,.2], | |
186 [.2,.8], | |
187 [.9,.3], | |
188 [.6,.5]]) | |
189 outputs = numpy.array([[0], | |
190 [0], | |
191 [1], | |
192 [1]]) | |
193 assert inputs.shape[0] == outputs.shape[0] | |
194 | |
195 # create model | |
196 m = model(2,1) | |
197 | |
198 # predict | |
199 print 'prediction before training:' | |
200 print m[inputs] | |
201 | |
202 # update it for 100 iterations | |
203 for k in range(50) : | |
204 m.update(.1,inputs,outputs) | |
205 | |
206 # predict | |
207 print 'prediction after training:' | |
208 print m[inputs] | |
209 | |
210 # show points | |
211 import pylab as P | |
212 colors = outputs.flatten().tolist() | |
213 x = inputs[:,0] | |
214 y = inputs[:,1] | |
215 P.plot(x[numpy.where(outputs==0)[0]],y[numpy.where(outputs==0)[0]],'r+') | |
216 P.plot(x[numpy.where(outputs==1)[0]],y[numpy.where(outputs==1)[0]],'b+') | |
217 # decision line | |
218 p1 = (.5 - m.params[1] * 1.) / m.params[0][1,0] # abs = 0 | |
219 p2 = (.5 - m.params[1] * 1.) / m.params[0][0,0] # ord = 0 | |
220 P.plot((0,p2[0],2*p2[0]),(p1[0],0,-p1[0]),'g-') | |
221 # show | |
222 P.axis([-1,2,-1,2]) | |
223 P.show() | |
224 |