Mercurial > pylearn
comparison linear_regression.py @ 111:88257dfedf8c
Added another work in progress, for mlp's
author | bengioy@bengiomac.local |
---|---|
date | Wed, 07 May 2008 09:16:04 -0400 |
parents | 8fa1ef2411a0 |
children | d0a1bd0378c6 |
comparison
equal
deleted
inserted
replaced
110:8fa1ef2411a0 | 111:88257dfedf8c |
---|---|
1 | 1 |
2 from learner import * | 2 from learner import * |
3 from theano import tensor as t | 3 from theano import tensor as t |
4 from compile import Function | |
5 from theano.scalar import as_scalar | 4 from theano.scalar import as_scalar |
6 | 5 |
7 # this is one of the simplest example of learner, and illustrates | 6 # this is one of the simplest example of learner, and illustrates |
8 # the use of theano | 7 # the use of theano |
9 class LinearRegression(OneShotTLearner): | 8 class LinearRegression(MinibatchUpdatesTLearner): |
10 """ | 9 """ |
11 Implement linear regression, with or without L2 regularization | 10 Implement linear regression, with or without L2 regularization |
12 (the former is called Ridge Regression and the latter Ordinary Least Squares). | 11 (the former is called Ridge Regression and the latter Ordinary Least Squares). |
13 | 12 |
14 The predictor parameters are obtained analytically from the training set. | 13 The predictor parameters are obtained analytically from the training set. |
16 different disjoint subsets of the training sets). After each call to | 15 different disjoint subsets of the training sets). After each call to |
17 update the predictor is ready to be used (and optimized for the union | 16 update the predictor is ready to be used (and optimized for the union |
18 of all the training sets passed to update since construction or since | 17 of all the training sets passed to update since construction or since |
19 the last call to forget). | 18 the last call to forget). |
20 | 19 |
21 The L2 regularization coefficient is obtained analytically. | |
22 For each (input[t],output[t]) pair in a minibatch,:: | 20 For each (input[t],output[t]) pair in a minibatch,:: |
23 | 21 |
24 output_t = b + W * input_t | 22 output_t = b + W * input_t |
25 | 23 |
26 where b and W are obtained by minimizing:: | 24 where b and W are obtained by minimizing:: |
27 | 25 |
28 lambda sum_{ij} W_{ij}^2 + sum_t ||output_t - target_t||^2 | 26 L2_regularizer sum_{ij} W_{ij}^2 + sum_t ||output_t - target_t||^2 |
29 | 27 |
30 Let X be the whole training set inputs matrix (one input example per row), | 28 Let X be the whole training set inputs matrix (one input example per row), |
31 with the first column full of 1's, and Let Y the whole training set | 29 with the first column full of 1's, and Let Y the whole training set |
32 targets matrix (one example's target vector per row). | 30 targets matrix (one example's target vector per row). |
33 Let theta = the matrix with b in its first column and W in the others, | 31 Let theta = the matrix with b in its first column and W in the others, |
34 then each theta[:,i] is the solution of the linear system:: | 32 then each theta[:,i] is the solution of the linear system:: |
35 | 33 |
36 XtX * theta[:,i] = XtY[:,i] | 34 XtX * theta[:,i] = XtY[:,i] |
37 | 35 |
38 where XtX is a (n_inputs+1)x(n_inputs+1) matrix containing X'*X | 36 where XtX is a (n_inputs+1)x(n_inputs+1) matrix containing X'*X |
39 plus lambda on the diagonal except at (0,0), | 37 plus L2_regularizer on the diagonal except at (0,0), |
40 and XtY is a (n_inputs+1)*n_outputs matrix containing X'*Y. | 38 and XtY is a (n_inputs+1)*n_outputs matrix containing X'*Y. |
41 | 39 |
42 The fields and attributes expected and produced by use and update are the following: | 40 The fields and attributes expected and produced by use and update are the following: |
43 | 41 |
44 - Input and output fields (example-wise quantities): | 42 - Input and output fields (example-wise quantities): |
51 - optional attributes (optionally expected as input_dataset attributes) | 49 - optional attributes (optionally expected as input_dataset attributes) |
52 (warning, this may be dangerous, the 'use' method will use those provided in the | 50 (warning, this may be dangerous, the 'use' method will use those provided in the |
53 input_dataset rather than those learned during 'update'; currently no support | 51 input_dataset rather than those learned during 'update'; currently no support |
54 for providing these to update): | 52 for providing these to update): |
55 | 53 |
56 - 'lambda' | 54 - 'L2_regularizer' |
57 - 'b' | 55 - 'b' |
58 - 'W' | 56 - 'W' |
59 - 'parameters' = (b, W) tuple | 57 - 'parameters' = [b, W] |
60 - 'regularization_term' | 58 - 'regularization_term' |
61 - 'XtX' | 59 - 'XtX' |
62 - 'XtY' | 60 - 'XtY' |
63 | 61 |
64 """ | 62 """ |
65 | 63 |
66 def attributeNames(self): | 64 def attributeNames(self): |
67 return ["lambda","parameters","b","W","regularization_term","XtX","XtY"] | 65 return ["L2_regularizer","parameters","b","W","regularization_term","XtX","XtY"] |
68 | 66 |
69 def useInputAttributes(self): | 67 def useInputAttributes(self): |
70 return ["b","W"] | 68 return ["b","W"] |
71 | 69 |
72 def useOutputAttributes(self): | 70 def useOutputAttributes(self): |
73 return [] | 71 return [] |
74 | 72 |
75 def updateInputAttributes(self): | 73 def updateInputAttributes(self): |
76 return ["lambda","XtX","XtY"] | 74 return ["L2_regularizer","XtX","XtY"] |
77 | |
78 def updateOutputAttributes(self): | |
79 return ["parameters"] + self.updateMinibatchOutputAttributes() + self.updateEndOutputAttributes() | |
80 | 75 |
81 def updateMinibatchInputFields(self): | 76 def updateMinibatchInputFields(self): |
82 return ["input","target"] | 77 return ["input","target"] |
83 | 78 |
84 def updateMinibatchInputAttributes(self): | 79 def updateMinibatchInputAttributes(self): |
91 return ["theta","XtX","XtY"] | 86 return ["theta","XtX","XtY"] |
92 | 87 |
93 def updateEndOutputAttributes(self): | 88 def updateEndOutputAttributes(self): |
94 return ["new_theta","b","W","regularization_term"] # CHECK: WILL b AND W CONTAIN OLD OR NEW THETA? @todo i.e. order of computation = ? | 89 return ["new_theta","b","W","regularization_term"] # CHECK: WILL b AND W CONTAIN OLD OR NEW THETA? @todo i.e. order of computation = ? |
95 | 90 |
91 def parameterAttributes(self): | |
92 return ["b","W"] | |
93 | |
96 def defaultOutputFields(self, input_fields): | 94 def defaultOutputFields(self, input_fields): |
97 output_fields = ["output"] | 95 output_fields = ["output"] |
98 if "target" in input_fields: | 96 if "target" in input_fields: |
99 output_fields.append("squared_error") | 97 output_fields.append("squared_error") |
100 return output_fields | 98 return output_fields |
101 | 99 |
102 def __init__(self): | 100 def __init__(self): |
103 self._input = t.matrix('input') # n_examples x n_inputs | 101 self._input = t.matrix('input') # n_examples x n_inputs |
104 self._target = t.matrix('target') # n_examples x n_outputs | 102 self._target = t.matrix('target') # n_examples x n_outputs |
105 self._lambda = as_scalar(0.,'lambda') | 103 self._L2_regularizer = as_scalar(0.,'L2_regularizer') |
106 self._theta = t.matrix('theta') | 104 self._theta = t.matrix('theta') |
107 self._W = self._theta[:,1:] | 105 self._W = self._theta[:,1:] |
108 self._b = self._theta[:,0] | 106 self._b = self._theta[:,0] |
109 self._XtX = t.matrix('XtX') | 107 self._XtX = t.matrix('XtX') |
110 self._XtY = t.matrix('XtY') | 108 self._XtY = t.matrix('XtY') |
111 self._extended_input = t.prepend_one_to_each_row(self._input) | 109 self._extended_input = t.prepend_one_to_each_row(self._input) |
112 self._output = t.dot(self._input,self._W.T) + self._b # (n_examples , n_outputs) matrix | 110 self._output = t.dot(self._input,self._W.T) + self._b # (n_examples , n_outputs) matrix |
113 self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector | 111 self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector |
114 self._regularizer = self._lambda * t.dot(self._W,self._W) | 112 self._regularizer = self._L2_regularizer * t.dot(self._W,self._W) |
115 self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input)) | 113 self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input)) |
116 self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target)) | 114 self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target)) |
117 self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY) | 115 self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY) |
118 | 116 |
119 OneShotTLearner.__init__(self) | 117 OneShotTLearner.__init__(self) |
120 self.allocate() | |
121 | 118 |
122 def allocate(self,minibatch): | 119 def allocate(self,minibatch): |
123 minibatch_n_inputs = minibatch["input"].shape[1] | 120 minibatch_n_inputs = minibatch["input"].shape[1] |
124 minibatch_n_outputs = minibatch["target"].shape[1] | 121 minibatch_n_outputs = minibatch["target"].shape[1] |
125 if not self._n_inputs: | 122 if not self._n_inputs: |
128 self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs)) | 125 self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs)) |
129 self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs)) | 126 self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs)) |
130 self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs)) | 127 self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs)) |
131 self.forget() | 128 self.forget() |
132 elif self._n_inputs!=minibatch_n_inputs or self._n_outputs!=minibatch_n_outputs: | 129 elif self._n_inputs!=minibatch_n_inputs or self._n_outputs!=minibatch_n_outputs: |
133 # if the input or target changes dimension on the fly, we forget everything | 130 # if the input or target changes dimension on the fly, we resize and forget everything |
134 self.forget() | 131 self.forget() |
135 | 132 |
136 def forget(self): | 133 def forget(self): |
137 if self._n_inputs and self._n_outputs: | 134 if self._n_inputs and self._n_outputs: |
138 self.XtX.resize((1+self.n_inputs,1+self.n_inputs)) | 135 self.XtX.resize((1+self.n_inputs,1+self.n_inputs)) |
139 self.XtY.resize((1+self.n_inputs,self.n_outputs)) | 136 self.XtY.resize((1+self.n_inputs,self.n_outputs)) |
140 self.XtX.data[:,:]=0 | 137 self.XtX.data[:,:]=0 |
141 self.XtY.data[:,:]=0 | 138 self.XtY.data[:,:]=0 |
142 numpy.diag(self.XtX.data)[1:]=self.lambda | 139 numpy.diag(self.XtX.data)[1:]=self.L2_regularizer |
143 | 140 |
144 def updateEnd(self): | |
145 TLearner.updateEnd(self) | |
146 self.parameters = (self.W,self.b) | |
147 |