comparison linear_regression.py @ 111:88257dfedf8c

Added another work in progress, for mlp's
author bengioy@bengiomac.local
date Wed, 07 May 2008 09:16:04 -0400
parents 8fa1ef2411a0
children d0a1bd0378c6
comparison
equal deleted inserted replaced
110:8fa1ef2411a0 111:88257dfedf8c
1 1
2 from learner import * 2 from learner import *
3 from theano import tensor as t 3 from theano import tensor as t
4 from compile import Function
5 from theano.scalar import as_scalar 4 from theano.scalar import as_scalar
6 5
7 # this is one of the simplest example of learner, and illustrates 6 # this is one of the simplest example of learner, and illustrates
8 # the use of theano 7 # the use of theano
9 class LinearRegression(OneShotTLearner): 8 class LinearRegression(MinibatchUpdatesTLearner):
10 """ 9 """
11 Implement linear regression, with or without L2 regularization 10 Implement linear regression, with or without L2 regularization
12 (the former is called Ridge Regression and the latter Ordinary Least Squares). 11 (the former is called Ridge Regression and the latter Ordinary Least Squares).
13 12
14 The predictor parameters are obtained analytically from the training set. 13 The predictor parameters are obtained analytically from the training set.
16 different disjoint subsets of the training sets). After each call to 15 different disjoint subsets of the training sets). After each call to
17 update the predictor is ready to be used (and optimized for the union 16 update the predictor is ready to be used (and optimized for the union
18 of all the training sets passed to update since construction or since 17 of all the training sets passed to update since construction or since
19 the last call to forget). 18 the last call to forget).
20 19
21 The L2 regularization coefficient is obtained analytically.
22 For each (input[t],output[t]) pair in a minibatch,:: 20 For each (input[t],output[t]) pair in a minibatch,::
23 21
24 output_t = b + W * input_t 22 output_t = b + W * input_t
25 23
26 where b and W are obtained by minimizing:: 24 where b and W are obtained by minimizing::
27 25
28 lambda sum_{ij} W_{ij}^2 + sum_t ||output_t - target_t||^2 26 L2_regularizer sum_{ij} W_{ij}^2 + sum_t ||output_t - target_t||^2
29 27
30 Let X be the whole training set inputs matrix (one input example per row), 28 Let X be the whole training set inputs matrix (one input example per row),
31 with the first column full of 1's, and Let Y the whole training set 29 with the first column full of 1's, and Let Y the whole training set
32 targets matrix (one example's target vector per row). 30 targets matrix (one example's target vector per row).
33 Let theta = the matrix with b in its first column and W in the others, 31 Let theta = the matrix with b in its first column and W in the others,
34 then each theta[:,i] is the solution of the linear system:: 32 then each theta[:,i] is the solution of the linear system::
35 33
36 XtX * theta[:,i] = XtY[:,i] 34 XtX * theta[:,i] = XtY[:,i]
37 35
38 where XtX is a (n_inputs+1)x(n_inputs+1) matrix containing X'*X 36 where XtX is a (n_inputs+1)x(n_inputs+1) matrix containing X'*X
39 plus lambda on the diagonal except at (0,0), 37 plus L2_regularizer on the diagonal except at (0,0),
40 and XtY is a (n_inputs+1)*n_outputs matrix containing X'*Y. 38 and XtY is a (n_inputs+1)*n_outputs matrix containing X'*Y.
41 39
42 The fields and attributes expected and produced by use and update are the following: 40 The fields and attributes expected and produced by use and update are the following:
43 41
44 - Input and output fields (example-wise quantities): 42 - Input and output fields (example-wise quantities):
51 - optional attributes (optionally expected as input_dataset attributes) 49 - optional attributes (optionally expected as input_dataset attributes)
52 (warning, this may be dangerous, the 'use' method will use those provided in the 50 (warning, this may be dangerous, the 'use' method will use those provided in the
53 input_dataset rather than those learned during 'update'; currently no support 51 input_dataset rather than those learned during 'update'; currently no support
54 for providing these to update): 52 for providing these to update):
55 53
56 - 'lambda' 54 - 'L2_regularizer'
57 - 'b' 55 - 'b'
58 - 'W' 56 - 'W'
59 - 'parameters' = (b, W) tuple 57 - 'parameters' = [b, W]
60 - 'regularization_term' 58 - 'regularization_term'
61 - 'XtX' 59 - 'XtX'
62 - 'XtY' 60 - 'XtY'
63 61
64 """ 62 """
65 63
66 def attributeNames(self): 64 def attributeNames(self):
67 return ["lambda","parameters","b","W","regularization_term","XtX","XtY"] 65 return ["L2_regularizer","parameters","b","W","regularization_term","XtX","XtY"]
68 66
69 def useInputAttributes(self): 67 def useInputAttributes(self):
70 return ["b","W"] 68 return ["b","W"]
71 69
72 def useOutputAttributes(self): 70 def useOutputAttributes(self):
73 return [] 71 return []
74 72
75 def updateInputAttributes(self): 73 def updateInputAttributes(self):
76 return ["lambda","XtX","XtY"] 74 return ["L2_regularizer","XtX","XtY"]
77
78 def updateOutputAttributes(self):
79 return ["parameters"] + self.updateMinibatchOutputAttributes() + self.updateEndOutputAttributes()
80 75
81 def updateMinibatchInputFields(self): 76 def updateMinibatchInputFields(self):
82 return ["input","target"] 77 return ["input","target"]
83 78
84 def updateMinibatchInputAttributes(self): 79 def updateMinibatchInputAttributes(self):
91 return ["theta","XtX","XtY"] 86 return ["theta","XtX","XtY"]
92 87
93 def updateEndOutputAttributes(self): 88 def updateEndOutputAttributes(self):
94 return ["new_theta","b","W","regularization_term"] # CHECK: WILL b AND W CONTAIN OLD OR NEW THETA? @todo i.e. order of computation = ? 89 return ["new_theta","b","W","regularization_term"] # CHECK: WILL b AND W CONTAIN OLD OR NEW THETA? @todo i.e. order of computation = ?
95 90
91 def parameterAttributes(self):
92 return ["b","W"]
93
96 def defaultOutputFields(self, input_fields): 94 def defaultOutputFields(self, input_fields):
97 output_fields = ["output"] 95 output_fields = ["output"]
98 if "target" in input_fields: 96 if "target" in input_fields:
99 output_fields.append("squared_error") 97 output_fields.append("squared_error")
100 return output_fields 98 return output_fields
101 99
102 def __init__(self): 100 def __init__(self):
103 self._input = t.matrix('input') # n_examples x n_inputs 101 self._input = t.matrix('input') # n_examples x n_inputs
104 self._target = t.matrix('target') # n_examples x n_outputs 102 self._target = t.matrix('target') # n_examples x n_outputs
105 self._lambda = as_scalar(0.,'lambda') 103 self._L2_regularizer = as_scalar(0.,'L2_regularizer')
106 self._theta = t.matrix('theta') 104 self._theta = t.matrix('theta')
107 self._W = self._theta[:,1:] 105 self._W = self._theta[:,1:]
108 self._b = self._theta[:,0] 106 self._b = self._theta[:,0]
109 self._XtX = t.matrix('XtX') 107 self._XtX = t.matrix('XtX')
110 self._XtY = t.matrix('XtY') 108 self._XtY = t.matrix('XtY')
111 self._extended_input = t.prepend_one_to_each_row(self._input) 109 self._extended_input = t.prepend_one_to_each_row(self._input)
112 self._output = t.dot(self._input,self._W.T) + self._b # (n_examples , n_outputs) matrix 110 self._output = t.dot(self._input,self._W.T) + self._b # (n_examples , n_outputs) matrix
113 self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector 111 self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector
114 self._regularizer = self._lambda * t.dot(self._W,self._W) 112 self._regularizer = self._L2_regularizer * t.dot(self._W,self._W)
115 self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input)) 113 self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input))
116 self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target)) 114 self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target))
117 self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY) 115 self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY)
118 116
119 OneShotTLearner.__init__(self) 117 OneShotTLearner.__init__(self)
120 self.allocate()
121 118
122 def allocate(self,minibatch): 119 def allocate(self,minibatch):
123 minibatch_n_inputs = minibatch["input"].shape[1] 120 minibatch_n_inputs = minibatch["input"].shape[1]
124 minibatch_n_outputs = minibatch["target"].shape[1] 121 minibatch_n_outputs = minibatch["target"].shape[1]
125 if not self._n_inputs: 122 if not self._n_inputs:
128 self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs)) 125 self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs))
129 self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs)) 126 self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs))
130 self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs)) 127 self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs))
131 self.forget() 128 self.forget()
132 elif self._n_inputs!=minibatch_n_inputs or self._n_outputs!=minibatch_n_outputs: 129 elif self._n_inputs!=minibatch_n_inputs or self._n_outputs!=minibatch_n_outputs:
133 # if the input or target changes dimension on the fly, we forget everything 130 # if the input or target changes dimension on the fly, we resize and forget everything
134 self.forget() 131 self.forget()
135 132
136 def forget(self): 133 def forget(self):
137 if self._n_inputs and self._n_outputs: 134 if self._n_inputs and self._n_outputs:
138 self.XtX.resize((1+self.n_inputs,1+self.n_inputs)) 135 self.XtX.resize((1+self.n_inputs,1+self.n_inputs))
139 self.XtY.resize((1+self.n_inputs,self.n_outputs)) 136 self.XtY.resize((1+self.n_inputs,self.n_outputs))
140 self.XtX.data[:,:]=0 137 self.XtX.data[:,:]=0
141 self.XtY.data[:,:]=0 138 self.XtY.data[:,:]=0
142 numpy.diag(self.XtX.data)[1:]=self.lambda 139 numpy.diag(self.XtX.data)[1:]=self.L2_regularizer
143 140
144 def updateEnd(self):
145 TLearner.updateEnd(self)
146 self.parameters = (self.W,self.b)
147