Mercurial > pylearn
comparison linear_regression.py @ 77:1e2bb5bad636
toying with different ways to implement learners
author | bengioy@bengiomac.local |
---|---|
date | Sun, 04 May 2008 15:09:22 -0400 |
parents | 90e4c0784d6e |
children | 3499918faa9d |
comparison
equal
deleted
inserted
replaced
76:ccd6ae89a7c4 | 77:1e2bb5bad636 |
---|---|
51 | 51 |
52 - optional output attributes (available in self and optionally in output dataset) | 52 - optional output attributes (available in self and optionally in output dataset) |
53 | 53 |
54 - 'b' (only set by update) | 54 - 'b' (only set by update) |
55 - 'W' (only set by update) | 55 - 'W' (only set by update) |
56 - 'total_squared_error' (set by use and by update) = sum over examples of example_wise_squared_error | 56 - 'regularization_term' (only set by update) |
57 - 'total_loss' (set by use and by update) = regularizer + total_squared_error | |
58 - 'XtX' (only set by update) | 57 - 'XtX' (only set by update) |
59 - 'XtY' (only set by update) | 58 - 'XtY' (only set by update) |
60 | 59 |
61 """ | 60 """ |
62 | 61 |
63 def __init__(self,lambda=0.): | 62 # definitions specifiques a la regression lineaire: |
63 | |
64 def global_inputs(self): | |
65 self.lambda = as_scalar(0.,'lambda') | |
66 self.theta = t.matrix('theta') | |
67 self.W = self.theta[:,1:] | |
68 self.b = self.theta[:,0] | |
69 self.XtX = t.matrix('XtX') | |
70 self.XtY = t.matrix('XtY') | |
71 | |
72 def global_outputs(self): | |
73 self.regularizer = self.lambda * t.dot(self.W,self.W) | |
74 self.loss = self.regularizer + t.sum(self.squared_error) # this only makes sense if the whole training set fits in memory in a minibatch | |
75 self.loss_function = Function([self.W,self.lambda,self.squared_error],[self.loss]) | |
76 | |
77 def initialize(self): | |
78 self.XtX.resize((1+self.n_inputs,1+self.n_inputs)) | |
79 self.XtY.resize((1+self.n_inputs,self.n_outputs)) | |
80 self.XtX.data[:,:]=0 | |
81 self.XtY.data[:,:]=0 | |
82 numpy.diag(self.XtX.data)[1:]=self.lambda.data | |
83 | |
84 def updated_variables(self): | |
85 self.new_XtX = self.XtX + t.dot(self.extended_input.T,self.extended_input) | |
86 self.new_XtY = self.XtY + t.dot(self.extended_input.T,self.target) | |
87 self.new_theta = t.solve(self.XtX,self.XtY) | |
88 | |
89 def minibatch_wise_inputs(self): | |
90 self.input = t.matrix('input') # n_examples x n_inputs | |
91 self.target = t.matrix('target') # n_examples x n_outputs | |
92 | |
93 def minibatch_wise_outputs(self): | |
94 # self.input is a (n_examples, n_inputs) minibatch matrix | |
95 self.extended_input = t.prepend_one_to_each_row(self.input) | |
96 self.output = t.dot(self.input,self.W.T) + self.b # (n_examples , n_outputs) matrix | |
97 self.squared_error = t.sum_within_rows(t.sqr(self.output-self.target)) # (n_examples ) vector | |
98 | |
99 def attribute_names(self): | |
100 return ["lambda","b","W","regularization_term","XtX","XtY"] | |
101 | |
102 def default_output_fields(self, input_fields): | |
103 output_fields = ["output"] | |
104 if "target" in input_fields: | |
105 output_fields.append("squared_error") | |
106 return output_fields | |
107 | |
108 # poutine generale basee sur ces fonctions | |
109 | |
110 def minibatchwise_use_functions(self, input_fields, output_fields): | |
111 if not output_fields: | |
112 output_fields = self.default_output_fields(input_fields) | |
113 key = (input_fields,output_fields) | |
114 if key not in use_functions_dictionary: | |
115 use_functions_dictionary[key]=Function(self.names2attributes(input_fields), | |
116 self.names2attributes(output_fields)) | |
117 return use_functions_dictionary[key] | |
118 | |
119 def names2attributes(self,names,return_Result=True): | |
120 if return_Result: | |
121 return [self.__getattr__(name) for name in names] | |
122 else: | |
123 return [self.__getattr__(name).data for name in names] | |
124 | |
125 def use(self,input_dataset,output_fieldnames=None,test_stats_collector=None,copy_inputs=True): | |
126 minibatchwise_use_function = use_functions(input_dataset.fieldNames(),output_fieldnames) | |
127 virtual_output_dataset = ApplyFunctionDataSet(input_dataset, | |
128 minibatchwise_use_function, | |
129 True,DataSet.numpy_vstack, | |
130 DataSet.numpy_hstack) | |
131 # actually force the computation | |
132 output_dataset = CachedDataSet(virtual_output_dataset,True) | |
133 if copy_inputs: | |
134 output_dataset = input_dataset | output_dataset | |
135 # compute the attributes that should be copied in the dataset | |
136 for attribute in self.attribute_names(): | |
137 # .data assumes that all attributes are Result objects | |
138 output_dataset.__setattr__(attribute) = copy.deepcopy(self.__getattr__(attribute).data) | |
139 if test_stats_collector: | |
140 test_stats_collector.update(output_dataset) | |
141 for attribute in test_stats_collector.attribute_names(): | |
142 output_dataset[attribute] = copy.deepcopy(test_stats_collector[attribute]) | |
143 return output_dataset | |
144 | |
145 def update(self,training_set,train_stats_collector=None): | |
146 | |
147 | |
148 def __init__(self,lambda=0.,max_memory_use=500): | |
64 """ | 149 """ |
65 @type lambda: float | 150 @type lambda: float |
66 @param lambda: regularization coefficient | 151 @param lambda: regularization coefficient |
67 """ | 152 """ |
68 | 153 |
105 if output_fieldname=="output": | 190 if output_fieldname=="output": |
106 use_functions.append(self.output_function) | 191 use_functions.append(self.output_function) |
107 elif output_fieldname=="squared_error": | 192 elif output_fieldname=="squared_error": |
108 use_functions.append(lambda self.output_function) | 193 use_functions.append(lambda self.output_function) |
109 | 194 |
195 n_examples = len(input_dataset) | |
196 | |
197 for minibatch in input_dataset.minibatches(minibatch_size=minibatch_size, allow_odd_last_minibatch=True): | |
198 use_function( | |
199 |