Mercurial > pylearn
comparison linear_regression.py @ 385:db28ff3fb887
merge
author | Joseph Turian <turian@gmail.com> |
---|---|
date | Tue, 08 Jul 2008 02:00:14 -0400 |
parents | 74b402b5a81b |
children | efb797c5efc0 |
comparison
equal
deleted
inserted
replaced
384:edec18614a70 | 385:db28ff3fb887 |
---|---|
2 Implementation of linear regression, with or without L2 regularization. | 2 Implementation of linear regression, with or without L2 regularization. |
3 This is one of the simplest example of L{learner}, and illustrates | 3 This is one of the simplest example of L{learner}, and illustrates |
4 the use of theano. | 4 the use of theano. |
5 """ | 5 """ |
6 | 6 |
7 from learner import * | 7 from pylearn import OfflineLearningAlgorithm |
8 from theano import tensor as t | 8 from theano import tensor as T |
9 from theano.scalar import as_scalar | 9 from theano.scalar import as_scalar |
10 from common.autoname import AutoName | |
10 | 11 |
11 class LinearRegression(MinibatchUpdatesTLearner): | 12 class LinearRegression(OfflineLearningAlgorithm): |
12 """ | 13 """ |
13 Implement linear regression, with or without L2 regularization | 14 Implement linear regression, with or without L2 regularization |
14 (the former is called Ridge Regression and the latter Ordinary Least Squares). | 15 (the former is called Ridge Regression and the latter Ordinary Least Squares). |
15 | 16 |
16 The predictor parameters are obtained analytically from the training set. | 17 The predictor parameters are obtained analytically from the training set. |
38 | 39 |
39 where XtX is a (n_inputs+1)x(n_inputs+1) matrix containing X'*X | 40 where XtX is a (n_inputs+1)x(n_inputs+1) matrix containing X'*X |
40 plus L2_regularizer on the diagonal except at (0,0), | 41 plus L2_regularizer on the diagonal except at (0,0), |
41 and XtY is a (n_inputs+1)*n_outputs matrix containing X'*Y. | 42 and XtY is a (n_inputs+1)*n_outputs matrix containing X'*Y. |
42 | 43 |
43 The fields and attributes expected and produced by use and update are the following: | 44 The dataset fields expected and produced by the learning algorithm and the trained model |
45 are the following: | |
44 | 46 |
45 - Input and output fields (example-wise quantities): | 47 - Input and output dataset fields (example-wise quantities): |
46 | 48 |
47 - 'input' (always expected by use and update as an input_dataset field) | 49 - 'input' (always expected as an input_dataset field) |
48 - 'target' (optionally expected by use and update as an input_dataset field) | 50 - 'target' (always expected by the learning algorithm, optional for learned model) |
49 - 'output' (optionally produced by use as an output dataset field) | 51 - 'output' (always produced by learned model) |
50 - 'squared_error' (optionally produced by use as an output dataset field, needs 'target') = example-wise squared error | 52 - 'squared_error' (optionally produced by learned model if 'target' is provided) |
53 = example-wise squared error | |
54 """ | |
55 def __init__(self, L2_regularizer=0): | |
56 self.predictor = LinearPredictor(None,None | |
57 self.L2_regularizer=L2_regularizer | |
58 self._XtX = T.matrix('XtX') | |
59 self._XtY = T.matrix('XtY') | |
60 self._extended_input = T.prepend_one_to_each_row(self._input) | |
51 | 61 |
52 - optional attributes (optionally expected as input_dataset attributes) | 62 class LinearPredictorEquations(AutoName): |
53 (warning, this may be dangerous, the 'use' method will use those provided in the | 63 inputs = T.matrix() # minibatchsize x n_inputs |
54 input_dataset rather than those learned during 'update'; currently no support | 64 targets = T.matrix() # minibatchsize x n_outputs |
55 for providing these to update): | 65 theta = T.matrix() # (n_inputs+1) x n_outputs |
56 | 66 b = theta[0] |
57 - 'L2_regularizer' | 67 Wt = theta[1:,:] |
58 - 'b' | 68 outputs = T.dot(inputs,Wt) + b # minibatchsize x n_outputs |
59 - 'W' | 69 squared_errors = T.sum(T.sqr(targets-outputs),axis=1) |
60 - 'parameters' = [b, W] | |
61 - 'regularization_term' | |
62 - 'XtX' | |
63 - 'XtY' | |
64 | 70 |
71 __compiled = False | |
72 @classmethod | |
73 def compile(cls,linker='c|py'): | |
74 if cls.__compiled: | |
75 return | |
76 def fn(input_vars,output_vars): | |
77 return staticmethod(theano.function(input_vars,output_vars, linker=linker)) | |
78 | |
79 cls.compute_outputs = fn([inputs,theta],[outputs]) | |
80 cls.compute_errors = fn([outputs,targets],[squared_errors]) | |
81 | |
82 cls.__compiled = True | |
83 | |
84 def __init__(self) | |
85 self.compile() | |
86 | |
87 class LinearRegressionEquations(LinearPredictorEquations): | |
88 P = LinearPredictorEquations | |
89 XtX = T.matrix() # (n_inputs+1) x (n_inputs+1) | |
90 XtY = T.matrix() # (n_inputs+1) x n_outputs | |
91 extended_input = T.prepend_scalar_to_each_row(1.,P.inputs) | |
92 new_XtX = add_inplace(XtX,T.dot(extended_input.T,extended_input)) | |
93 new_XtY = add_inplace(XtY,T.dot(extended_input.T,P.targets)) | |
94 new_theta = T.Cholesky_solve_inplace(P.theta,XtX,XtY) # solve linear system XtX theta = XtY | |
95 | |
96 class LinearPredictor(object): | |
65 """ | 97 """ |
98 A linear predictor has parameters theta (a bias vector and a weight matrix) | |
99 it can use to make a linear prediction (according to the LinearPredictorEquations). | |
100 It can compute its output (bias + weight * input) and a squared error (||output - target||^2). | |
101 """ | |
102 def __init__(self, theta): | |
103 self.theta=theta | |
104 self.n_inputs=theta.shape[0]-1 | |
105 self.n_outputs=theta.shape[1] | |
106 self.predict_equations = LinearPredictorEquations() | |
66 | 107 |
67 def attributeNames(self): | 108 def compute_outputs(self,inputs): |
68 return ["L2_regularizer","parameters","b","W","regularization_term","XtX","XtY"] | 109 return self.predict_equations.compute_outputs(inputs,self.theta) |
110 def compute_errors(self,inputs,targets): | |
111 return self.predict_equations.compute_errors(self.compute_outputs(inputs),targets) | |
112 def compute_outputs_and_errors(self,inputs,targets): | |
113 outputs = self.compute_outputs(inputs) | |
114 return [outputs,self.predict_equations.compute_errors(outputs,targets)] | |
115 | |
116 def __call__(self,dataset,output_fieldnames=None,cached_output_dataset=False): | |
117 assert dataset.hasFields(["input"]) | |
118 if output_fieldnames is None: | |
119 if dataset.hasFields(["target"]): | |
120 output_fieldnames = ["output","squared_error"] | |
121 else: | |
122 output_fieldnames = ["output"] | |
123 output_fieldnames.sort() | |
124 if output_fieldnames == ["squared_error"]: | |
125 f = self.compute_errors | |
126 elif output_fieldnames == ["output"]: | |
127 f = self.compute_outputs | |
128 elif output_fieldnames == ["output","squared_error"]: | |
129 f = self.compute_outputs_and_errors | |
130 else: | |
131 raise ValueError("unknown field(s) in output_fieldnames: "+str(output_fieldnames)) | |
132 | |
133 ds=ApplyFunctionDataSet(dataset,f,output_fieldnames) | |
134 if cached_output_dataset: | |
135 return CachedDataSet(ds) | |
136 else: | |
137 return ds | |
138 | |
69 | 139 |
70 def useInputAttributes(self): | 140 self._XtX = T.matrix('XtX') |
71 return ["b","W"] | 141 self._XtY = T.matrix('XtY') |
142 self._extended_input = T.prepend_one_to_each_row(self._input) | |
143 self._output = T.dot(self._input,self._W.T) + self._b # (n_examples , n_outputs) matrix | |
144 self._squared_error = T.sum_within_rows(T.sqr(self._output-self._target)) # (n_examples ) vector | |
145 self._regularizer = self._L2_regularizer * T.dot(self._W,self._W) | |
146 self._new_XtX = add_inplace(self._XtX,T.dot(self._extended_input.T,self._extended_input)) | |
147 self._new_XtY = add_inplace(self._XtY,T.dot(self._extended_input.T,self._target)) | |
148 self._new_theta = T.solve_inplace(self._theta,self._XtX,self._XtY) | |
72 | 149 |
73 def useOutputAttributes(self): | 150 def allocate(self,dataset): |
74 return [] | 151 dataset_n_inputs = dataset["input"].shape[1] |
75 | 152 dataset_n_outputs = dataset["target"].shape[1] |
76 def updateInputAttributes(self): | |
77 return ["L2_regularizer","XtX","XtY"] | |
78 | |
79 def updateMinibatchInputFields(self): | |
80 return ["input","target"] | |
81 | |
82 def updateMinibatchInputAttributes(self): | |
83 return ["XtX","XtY"] | |
84 | |
85 def updateMinibatchOutputAttributes(self): | |
86 return ["new_XtX","new_XtY"] | |
87 | |
88 def updateEndInputAttributes(self): | |
89 return ["theta","XtX","XtY"] | |
90 | |
91 def updateEndOutputAttributes(self): | |
92 return ["new_theta","b","W","regularization_term"] # CHECK: WILL b AND W CONTAIN OLD OR NEW THETA? @todo i.e. order of computation = ? | |
93 | |
94 def parameterAttributes(self): | |
95 return ["b","W"] | |
96 | |
97 def defaultOutputFields(self, input_fields): | |
98 output_fields = ["output"] | |
99 if "target" in input_fields: | |
100 output_fields.append("squared_error") | |
101 return output_fields | |
102 | |
103 def __init__(self): | |
104 self._input = t.matrix('input') # n_examples x n_inputs | |
105 self._target = t.matrix('target') # n_examples x n_outputs | |
106 self._L2_regularizer = as_scalar(0.,'L2_regularizer') | |
107 self._theta = t.matrix('theta') | |
108 self._W = self._theta[:,1:] | |
109 self._b = self._theta[:,0] | |
110 self._XtX = t.matrix('XtX') | |
111 self._XtY = t.matrix('XtY') | |
112 self._extended_input = t.prepend_one_to_each_row(self._input) | |
113 self._output = t.dot(self._input,self._W.T) + self._b # (n_examples , n_outputs) matrix | |
114 self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector | |
115 self._regularizer = self._L2_regularizer * t.dot(self._W,self._W) | |
116 self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input)) | |
117 self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target)) | |
118 self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY) | |
119 | |
120 MinibatchUpdatesTLearner.__init__(self) | |
121 | |
122 def allocate(self,minibatch): | |
123 minibatch_n_inputs = minibatch["input"].shape[1] | |
124 minibatch_n_outputs = minibatch["target"].shape[1] | |
125 if not self._n_inputs: | 153 if not self._n_inputs: |
126 self._n_inputs = minibatch_n_inputs | 154 self._n_inputs = dataset_n_inputs |
127 self._n_outputs = minibatch_n_outputs | 155 self._n_outputs = dataset_n_outputs |
128 self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs)) | 156 self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs)) |
129 self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs)) | 157 self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs)) |
130 self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs)) | 158 self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs)) |
131 self.forget() | 159 self.forget() |
132 elif self._n_inputs!=minibatch_n_inputs or self._n_outputs!=minibatch_n_outputs: | 160 elif self._n_inputs!=dataset_n_inputs or self._n_outputs!=dataset_n_outputs: |
133 # if the input or target changes dimension on the fly, we resize and forget everything | 161 # if the input or target changes dimension on the fly, we resize and forget everything |
134 self.forget() | 162 self.forget() |
135 | 163 |
136 def forget(self): | 164 def forget(self): |
137 if self._n_inputs and self._n_outputs: | 165 if self._n_inputs and self._n_outputs: |
139 self.XtY.resize((1+self.n_inputs,self.n_outputs)) | 167 self.XtY.resize((1+self.n_inputs,self.n_outputs)) |
140 self.XtX.data[:,:]=0 | 168 self.XtX.data[:,:]=0 |
141 self.XtY.data[:,:]=0 | 169 self.XtY.data[:,:]=0 |
142 numpy.diag(self.XtX.data)[1:]=self.L2_regularizer | 170 numpy.diag(self.XtX.data)[1:]=self.L2_regularizer |
143 | 171 |
172 def __call__(self,dataset): | |
173 | |
174 |