comparison linear_regression.py @ 390:efb797c5efc0

First non-crashing draft of LinearRegression
author Yoshua Bengio <bengioy@iro.umontreal.ca>
date Tue, 08 Jul 2008 17:49:44 -0400
parents 74b402b5a81b
children 273e5c03003e
comparison
equal deleted inserted replaced
386:a474341861fa 390:efb797c5efc0
2 Implementation of linear regression, with or without L2 regularization. 2 Implementation of linear regression, with or without L2 regularization.
3 This is one of the simplest example of L{learner}, and illustrates 3 This is one of the simplest example of L{learner}, and illustrates
4 the use of theano. 4 the use of theano.
5 """ 5 """
6 6
7 from pylearn import OfflineLearningAlgorithm 7 from pylearn.learner import OfflineLearningAlgorithm
8 from theano import tensor as T 8 from theano import tensor as T
9 from theano.others_ops import prepend_1_to_each_row
9 from theano.scalar import as_scalar 10 from theano.scalar import as_scalar
10 from common.autoname import AutoName 11 from common.autoname import AutoName
12 import theano
13 import numpy
11 14
12 class LinearRegression(OfflineLearningAlgorithm): 15 class LinearRegression(OfflineLearningAlgorithm):
13 """ 16 """
14 Implement linear regression, with or without L2 regularization 17 Implement linear regression, with or without L2 regularization
15 (the former is called Ridge Regression and the latter Ordinary Least Squares). 18 (the former is called Ridge Regression and the latter Ordinary Least Squares).
19
20 Usage:
21
22 linear_regressor=LinearRegression(L2_regularizer=0.1)
23 linear_predictor=linear_regression(training_set)
24 all_results_dataset=linear_predictor(test_set) # creates a dataset with "output" and "squared_error" field
25 outputs = linear_predictor.compute_outputs(inputs) # inputs and outputs are numpy arrays
26 outputs, errors = linear_predictor.compute_outputs_and_errors(inputs,targets)
27 errors = linear_predictor.compute_errors(inputs,targets)
28 mse = linear_predictor.compute_mse(inputs,targets)
29
30
31
32 The training_set must have fields "input" and "target".
33 The test_set must have field "input", and needs "target" if
34 we want to compute the squared errors.
16 35
17 The predictor parameters are obtained analytically from the training set. 36 The predictor parameters are obtained analytically from the training set.
18 Training can proceed sequentially (with multiple calls to update with 37 Training can proceed sequentially (with multiple calls to update with
19 different disjoint subsets of the training sets). After each call to 38 different disjoint subsets of the training sets). After each call to
20 update the predictor is ready to be used (and optimized for the union 39 update the predictor is ready to be used (and optimized for the union
50 - 'target' (always expected by the learning algorithm, optional for learned model) 69 - 'target' (always expected by the learning algorithm, optional for learned model)
51 - 'output' (always produced by learned model) 70 - 'output' (always produced by learned model)
52 - 'squared_error' (optionally produced by learned model if 'target' is provided) 71 - 'squared_error' (optionally produced by learned model if 'target' is provided)
53 = example-wise squared error 72 = example-wise squared error
54 """ 73 """
55 def __init__(self, L2_regularizer=0): 74 def __init__(self, L2_regularizer=0,minibatch_size=10000):
56 self.predictor = LinearPredictor(None,None
57 self.L2_regularizer=L2_regularizer 75 self.L2_regularizer=L2_regularizer
58 self._XtX = T.matrix('XtX') 76 self.equations = LinearRegressionEquations()
59 self._XtY = T.matrix('XtY') 77 self.minibatch_size=1000
60 self._extended_input = T.prepend_one_to_each_row(self._input) 78
79 def __call__(self,trainset):
80 first_example = trainset[0]
81 n_inputs = first_example['input'].size
82 n_outputs = first_example['target'].size
83 XtX = numpy.zeros((n_inputs+1,n_inputs+1))
84 XtY = numpy.zeros((n_inputs+1,n_outputs))
85 for i in xrange(n_inputs):
86 XtX[i+1,i+1]=self.L2_regularizer
87 mbs=min(self.minibatch_size,len(trainset))
88 for inputs,targets in trainset.minibatches(["input","target"],minibatch_size=mbs):
89 XtX,XtY=self.equations.update(XtX,XtY,inputs,targets)
90 theta=numpy.linalg.solve(XtX,XtY)
91 return LinearPredictor(theta)
61 92
62 class LinearPredictorEquations(AutoName): 93 class LinearPredictorEquations(AutoName):
63 inputs = T.matrix() # minibatchsize x n_inputs 94 inputs = T.matrix() # minibatchsize x n_inputs
64 targets = T.matrix() # minibatchsize x n_outputs 95 targets = T.matrix() # minibatchsize x n_outputs
65 theta = T.matrix() # (n_inputs+1) x n_outputs 96 theta = T.matrix() # (n_inputs+1) x n_outputs
74 if cls.__compiled: 105 if cls.__compiled:
75 return 106 return
76 def fn(input_vars,output_vars): 107 def fn(input_vars,output_vars):
77 return staticmethod(theano.function(input_vars,output_vars, linker=linker)) 108 return staticmethod(theano.function(input_vars,output_vars, linker=linker))
78 109
79 cls.compute_outputs = fn([inputs,theta],[outputs]) 110 cls.compute_outputs = fn([cls.inputs,cls.theta],[cls.outputs])
80 cls.compute_errors = fn([outputs,targets],[squared_errors]) 111 cls.compute_errors = fn([cls.outputs,cls.targets],[cls.squared_errors])
81 112
82 cls.__compiled = True 113 cls.__compiled = True
83 114
84 def __init__(self) 115 def __init__(self):
85 self.compile() 116 self.compile()
86 117
87 class LinearRegressionEquations(LinearPredictorEquations): 118 class LinearRegressionEquations(LinearPredictorEquations):
88 P = LinearPredictorEquations 119 P = LinearPredictorEquations
89 XtX = T.matrix() # (n_inputs+1) x (n_inputs+1) 120 XtX = T.matrix() # (n_inputs+1) x (n_inputs+1)
90 XtY = T.matrix() # (n_inputs+1) x n_outputs 121 XtY = T.matrix() # (n_inputs+1) x n_outputs
91 extended_input = T.prepend_scalar_to_each_row(1.,P.inputs) 122 extended_input = prepend_1_to_each_row(P.inputs)
92 new_XtX = add_inplace(XtX,T.dot(extended_input.T,extended_input)) 123 new_XtX = T.add_inplace(XtX,T.dot(extended_input.T,extended_input))
93 new_XtY = add_inplace(XtY,T.dot(extended_input.T,P.targets)) 124 new_XtY = T.add_inplace(XtY,T.dot(extended_input.T,P.targets))
94 new_theta = T.Cholesky_solve_inplace(P.theta,XtX,XtY) # solve linear system XtX theta = XtY 125
126 __compiled = False
127
128 @classmethod
129 def compile(cls,linker='c|py'):
130 if cls.__compiled:
131 return
132 def fn(input_vars,output_vars):
133 return staticmethod(theano.function(input_vars,output_vars, linker=linker))
134
135 cls.update = fn([cls.XtX,cls.XtY,cls.P.inputs,cls.P.targets],[cls.new_XtX,cls.new_XtY])
136
137 cls.__compiled = True
138
139 def __init__(self):
140 self.compile()
95 141
96 class LinearPredictor(object): 142 class LinearPredictor(object):
97 """ 143 """
98 A linear predictor has parameters theta (a bias vector and a weight matrix) 144 A linear predictor has parameters theta (a bias vector and a weight matrix)
99 it can use to make a linear prediction (according to the LinearPredictorEquations). 145 it can use to make a linear prediction (according to the LinearPredictorEquations).
101 """ 147 """
102 def __init__(self, theta): 148 def __init__(self, theta):
103 self.theta=theta 149 self.theta=theta
104 self.n_inputs=theta.shape[0]-1 150 self.n_inputs=theta.shape[0]-1
105 self.n_outputs=theta.shape[1] 151 self.n_outputs=theta.shape[1]
106 self.predict_equations = LinearPredictorEquations() 152 self.equations = LinearPredictorEquations()
107 153
108 def compute_outputs(self,inputs): 154 def compute_outputs(self,inputs):
109 return self.predict_equations.compute_outputs(inputs,self.theta) 155 return self.equations.compute_outputs(inputs,self.theta)
110 def compute_errors(self,inputs,targets): 156 def compute_errors(self,inputs,targets):
111 return self.predict_equations.compute_errors(self.compute_outputs(inputs),targets) 157 return self.equations.compute_errors(self.compute_outputs(inputs),targets)
112 def compute_outputs_and_errors(self,inputs,targets): 158 def compute_outputs_and_errors(self,inputs,targets):
113 outputs = self.compute_outputs(inputs) 159 outputs = self.compute_outputs(inputs)
114 return [outputs,self.predict_equations.compute_errors(outputs,targets)] 160 return [outputs,self.equations.compute_errors(outputs,targets)]
161 def compute_mse(self,inputs,targets):
162 errors = self.compute_errors(inputs,targets)
163 return numpy.sum(errors)/errors.size
115 164
116 def __call__(self,dataset,output_fieldnames=None,cached_output_dataset=False): 165 def __call__(self,dataset,output_fieldnames=None,cached_output_dataset=False):
117 assert dataset.hasFields(["input"]) 166 assert dataset.hasFields(["input"])
118 if output_fieldnames is None: 167 if output_fieldnames is None:
119 if dataset.hasFields(["target"]): 168 if dataset.hasFields(["target"]):
135 return CachedDataSet(ds) 184 return CachedDataSet(ds)
136 else: 185 else:
137 return ds 186 return ds
138 187
139 188
140 self._XtX = T.matrix('XtX')
141 self._XtY = T.matrix('XtY')
142 self._extended_input = T.prepend_one_to_each_row(self._input)
143 self._output = T.dot(self._input,self._W.T) + self._b # (n_examples , n_outputs) matrix
144 self._squared_error = T.sum_within_rows(T.sqr(self._output-self._target)) # (n_examples ) vector
145 self._regularizer = self._L2_regularizer * T.dot(self._W,self._W)
146 self._new_XtX = add_inplace(self._XtX,T.dot(self._extended_input.T,self._extended_input))
147 self._new_XtY = add_inplace(self._XtY,T.dot(self._extended_input.T,self._target))
148 self._new_theta = T.solve_inplace(self._theta,self._XtX,self._XtY)
149
150 def allocate(self,dataset):
151 dataset_n_inputs = dataset["input"].shape[1]
152 dataset_n_outputs = dataset["target"].shape[1]
153 if not self._n_inputs:
154 self._n_inputs = dataset_n_inputs
155 self._n_outputs = dataset_n_outputs
156 self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs))
157 self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs))
158 self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs))
159 self.forget()
160 elif self._n_inputs!=dataset_n_inputs or self._n_outputs!=dataset_n_outputs:
161 # if the input or target changes dimension on the fly, we resize and forget everything
162 self.forget()
163
164 def forget(self):
165 if self._n_inputs and self._n_outputs:
166 self.XtX.resize((1+self.n_inputs,1+self.n_inputs))
167 self.XtY.resize((1+self.n_inputs,self.n_outputs))
168 self.XtX.data[:,:]=0
169 self.XtY.data[:,:]=0
170 numpy.diag(self.XtX.data)[1:]=self.L2_regularizer
171
172 def __call__(self,dataset):
173
174