Mercurial > pylearn
comparison linear_regression.py @ 390:efb797c5efc0
First non-crashing draft of LinearRegression
author | Yoshua Bengio <bengioy@iro.umontreal.ca> |
---|---|
date | Tue, 08 Jul 2008 17:49:44 -0400 |
parents | 74b402b5a81b |
children | 273e5c03003e |
comparison
equal
deleted
inserted
replaced
386:a474341861fa | 390:efb797c5efc0 |
---|---|
2 Implementation of linear regression, with or without L2 regularization. | 2 Implementation of linear regression, with or without L2 regularization. |
3 This is one of the simplest example of L{learner}, and illustrates | 3 This is one of the simplest example of L{learner}, and illustrates |
4 the use of theano. | 4 the use of theano. |
5 """ | 5 """ |
6 | 6 |
7 from pylearn import OfflineLearningAlgorithm | 7 from pylearn.learner import OfflineLearningAlgorithm |
8 from theano import tensor as T | 8 from theano import tensor as T |
9 from theano.others_ops import prepend_1_to_each_row | |
9 from theano.scalar import as_scalar | 10 from theano.scalar import as_scalar |
10 from common.autoname import AutoName | 11 from common.autoname import AutoName |
12 import theano | |
13 import numpy | |
11 | 14 |
12 class LinearRegression(OfflineLearningAlgorithm): | 15 class LinearRegression(OfflineLearningAlgorithm): |
13 """ | 16 """ |
14 Implement linear regression, with or without L2 regularization | 17 Implement linear regression, with or without L2 regularization |
15 (the former is called Ridge Regression and the latter Ordinary Least Squares). | 18 (the former is called Ridge Regression and the latter Ordinary Least Squares). |
19 | |
20 Usage: | |
21 | |
22 linear_regressor=LinearRegression(L2_regularizer=0.1) | |
23 linear_predictor=linear_regression(training_set) | |
24 all_results_dataset=linear_predictor(test_set) # creates a dataset with "output" and "squared_error" field | |
25 outputs = linear_predictor.compute_outputs(inputs) # inputs and outputs are numpy arrays | |
26 outputs, errors = linear_predictor.compute_outputs_and_errors(inputs,targets) | |
27 errors = linear_predictor.compute_errors(inputs,targets) | |
28 mse = linear_predictor.compute_mse(inputs,targets) | |
29 | |
30 | |
31 | |
32 The training_set must have fields "input" and "target". | |
33 The test_set must have field "input", and needs "target" if | |
34 we want to compute the squared errors. | |
16 | 35 |
17 The predictor parameters are obtained analytically from the training set. | 36 The predictor parameters are obtained analytically from the training set. |
18 Training can proceed sequentially (with multiple calls to update with | 37 Training can proceed sequentially (with multiple calls to update with |
19 different disjoint subsets of the training sets). After each call to | 38 different disjoint subsets of the training sets). After each call to |
20 update the predictor is ready to be used (and optimized for the union | 39 update the predictor is ready to be used (and optimized for the union |
50 - 'target' (always expected by the learning algorithm, optional for learned model) | 69 - 'target' (always expected by the learning algorithm, optional for learned model) |
51 - 'output' (always produced by learned model) | 70 - 'output' (always produced by learned model) |
52 - 'squared_error' (optionally produced by learned model if 'target' is provided) | 71 - 'squared_error' (optionally produced by learned model if 'target' is provided) |
53 = example-wise squared error | 72 = example-wise squared error |
54 """ | 73 """ |
55 def __init__(self, L2_regularizer=0): | 74 def __init__(self, L2_regularizer=0,minibatch_size=10000): |
56 self.predictor = LinearPredictor(None,None | |
57 self.L2_regularizer=L2_regularizer | 75 self.L2_regularizer=L2_regularizer |
58 self._XtX = T.matrix('XtX') | 76 self.equations = LinearRegressionEquations() |
59 self._XtY = T.matrix('XtY') | 77 self.minibatch_size=1000 |
60 self._extended_input = T.prepend_one_to_each_row(self._input) | 78 |
79 def __call__(self,trainset): | |
80 first_example = trainset[0] | |
81 n_inputs = first_example['input'].size | |
82 n_outputs = first_example['target'].size | |
83 XtX = numpy.zeros((n_inputs+1,n_inputs+1)) | |
84 XtY = numpy.zeros((n_inputs+1,n_outputs)) | |
85 for i in xrange(n_inputs): | |
86 XtX[i+1,i+1]=self.L2_regularizer | |
87 mbs=min(self.minibatch_size,len(trainset)) | |
88 for inputs,targets in trainset.minibatches(["input","target"],minibatch_size=mbs): | |
89 XtX,XtY=self.equations.update(XtX,XtY,inputs,targets) | |
90 theta=numpy.linalg.solve(XtX,XtY) | |
91 return LinearPredictor(theta) | |
61 | 92 |
62 class LinearPredictorEquations(AutoName): | 93 class LinearPredictorEquations(AutoName): |
63 inputs = T.matrix() # minibatchsize x n_inputs | 94 inputs = T.matrix() # minibatchsize x n_inputs |
64 targets = T.matrix() # minibatchsize x n_outputs | 95 targets = T.matrix() # minibatchsize x n_outputs |
65 theta = T.matrix() # (n_inputs+1) x n_outputs | 96 theta = T.matrix() # (n_inputs+1) x n_outputs |
74 if cls.__compiled: | 105 if cls.__compiled: |
75 return | 106 return |
76 def fn(input_vars,output_vars): | 107 def fn(input_vars,output_vars): |
77 return staticmethod(theano.function(input_vars,output_vars, linker=linker)) | 108 return staticmethod(theano.function(input_vars,output_vars, linker=linker)) |
78 | 109 |
79 cls.compute_outputs = fn([inputs,theta],[outputs]) | 110 cls.compute_outputs = fn([cls.inputs,cls.theta],[cls.outputs]) |
80 cls.compute_errors = fn([outputs,targets],[squared_errors]) | 111 cls.compute_errors = fn([cls.outputs,cls.targets],[cls.squared_errors]) |
81 | 112 |
82 cls.__compiled = True | 113 cls.__compiled = True |
83 | 114 |
84 def __init__(self) | 115 def __init__(self): |
85 self.compile() | 116 self.compile() |
86 | 117 |
87 class LinearRegressionEquations(LinearPredictorEquations): | 118 class LinearRegressionEquations(LinearPredictorEquations): |
88 P = LinearPredictorEquations | 119 P = LinearPredictorEquations |
89 XtX = T.matrix() # (n_inputs+1) x (n_inputs+1) | 120 XtX = T.matrix() # (n_inputs+1) x (n_inputs+1) |
90 XtY = T.matrix() # (n_inputs+1) x n_outputs | 121 XtY = T.matrix() # (n_inputs+1) x n_outputs |
91 extended_input = T.prepend_scalar_to_each_row(1.,P.inputs) | 122 extended_input = prepend_1_to_each_row(P.inputs) |
92 new_XtX = add_inplace(XtX,T.dot(extended_input.T,extended_input)) | 123 new_XtX = T.add_inplace(XtX,T.dot(extended_input.T,extended_input)) |
93 new_XtY = add_inplace(XtY,T.dot(extended_input.T,P.targets)) | 124 new_XtY = T.add_inplace(XtY,T.dot(extended_input.T,P.targets)) |
94 new_theta = T.Cholesky_solve_inplace(P.theta,XtX,XtY) # solve linear system XtX theta = XtY | 125 |
126 __compiled = False | |
127 | |
128 @classmethod | |
129 def compile(cls,linker='c|py'): | |
130 if cls.__compiled: | |
131 return | |
132 def fn(input_vars,output_vars): | |
133 return staticmethod(theano.function(input_vars,output_vars, linker=linker)) | |
134 | |
135 cls.update = fn([cls.XtX,cls.XtY,cls.P.inputs,cls.P.targets],[cls.new_XtX,cls.new_XtY]) | |
136 | |
137 cls.__compiled = True | |
138 | |
139 def __init__(self): | |
140 self.compile() | |
95 | 141 |
96 class LinearPredictor(object): | 142 class LinearPredictor(object): |
97 """ | 143 """ |
98 A linear predictor has parameters theta (a bias vector and a weight matrix) | 144 A linear predictor has parameters theta (a bias vector and a weight matrix) |
99 it can use to make a linear prediction (according to the LinearPredictorEquations). | 145 it can use to make a linear prediction (according to the LinearPredictorEquations). |
101 """ | 147 """ |
102 def __init__(self, theta): | 148 def __init__(self, theta): |
103 self.theta=theta | 149 self.theta=theta |
104 self.n_inputs=theta.shape[0]-1 | 150 self.n_inputs=theta.shape[0]-1 |
105 self.n_outputs=theta.shape[1] | 151 self.n_outputs=theta.shape[1] |
106 self.predict_equations = LinearPredictorEquations() | 152 self.equations = LinearPredictorEquations() |
107 | 153 |
108 def compute_outputs(self,inputs): | 154 def compute_outputs(self,inputs): |
109 return self.predict_equations.compute_outputs(inputs,self.theta) | 155 return self.equations.compute_outputs(inputs,self.theta) |
110 def compute_errors(self,inputs,targets): | 156 def compute_errors(self,inputs,targets): |
111 return self.predict_equations.compute_errors(self.compute_outputs(inputs),targets) | 157 return self.equations.compute_errors(self.compute_outputs(inputs),targets) |
112 def compute_outputs_and_errors(self,inputs,targets): | 158 def compute_outputs_and_errors(self,inputs,targets): |
113 outputs = self.compute_outputs(inputs) | 159 outputs = self.compute_outputs(inputs) |
114 return [outputs,self.predict_equations.compute_errors(outputs,targets)] | 160 return [outputs,self.equations.compute_errors(outputs,targets)] |
161 def compute_mse(self,inputs,targets): | |
162 errors = self.compute_errors(inputs,targets) | |
163 return numpy.sum(errors)/errors.size | |
115 | 164 |
116 def __call__(self,dataset,output_fieldnames=None,cached_output_dataset=False): | 165 def __call__(self,dataset,output_fieldnames=None,cached_output_dataset=False): |
117 assert dataset.hasFields(["input"]) | 166 assert dataset.hasFields(["input"]) |
118 if output_fieldnames is None: | 167 if output_fieldnames is None: |
119 if dataset.hasFields(["target"]): | 168 if dataset.hasFields(["target"]): |
135 return CachedDataSet(ds) | 184 return CachedDataSet(ds) |
136 else: | 185 else: |
137 return ds | 186 return ds |
138 | 187 |
139 | 188 |
140 self._XtX = T.matrix('XtX') | |
141 self._XtY = T.matrix('XtY') | |
142 self._extended_input = T.prepend_one_to_each_row(self._input) | |
143 self._output = T.dot(self._input,self._W.T) + self._b # (n_examples , n_outputs) matrix | |
144 self._squared_error = T.sum_within_rows(T.sqr(self._output-self._target)) # (n_examples ) vector | |
145 self._regularizer = self._L2_regularizer * T.dot(self._W,self._W) | |
146 self._new_XtX = add_inplace(self._XtX,T.dot(self._extended_input.T,self._extended_input)) | |
147 self._new_XtY = add_inplace(self._XtY,T.dot(self._extended_input.T,self._target)) | |
148 self._new_theta = T.solve_inplace(self._theta,self._XtX,self._XtY) | |
149 | |
150 def allocate(self,dataset): | |
151 dataset_n_inputs = dataset["input"].shape[1] | |
152 dataset_n_outputs = dataset["target"].shape[1] | |
153 if not self._n_inputs: | |
154 self._n_inputs = dataset_n_inputs | |
155 self._n_outputs = dataset_n_outputs | |
156 self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs)) | |
157 self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs)) | |
158 self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs)) | |
159 self.forget() | |
160 elif self._n_inputs!=dataset_n_inputs or self._n_outputs!=dataset_n_outputs: | |
161 # if the input or target changes dimension on the fly, we resize and forget everything | |
162 self.forget() | |
163 | |
164 def forget(self): | |
165 if self._n_inputs and self._n_outputs: | |
166 self.XtX.resize((1+self.n_inputs,1+self.n_inputs)) | |
167 self.XtY.resize((1+self.n_inputs,self.n_outputs)) | |
168 self.XtX.data[:,:]=0 | |
169 self.XtY.data[:,:]=0 | |
170 numpy.diag(self.XtX.data)[1:]=self.L2_regularizer | |
171 | |
172 def __call__(self,dataset): | |
173 | |
174 |