Mercurial > pylearn
view statscollector.py @ 216:4b7e89b75e2b
Modified ArrayDataSet's handling of column fields.
Previously, if a fieldname were associated with an integer column index (by
opposition to a column range or slice) then it would be returned as a Nx1
matrix.
Now if a fieldname is associated with an integer column index, then it will
make a field which is a vector of length N.
The old behaviour can still be achieved by associating a fieldname with
the slice(col, col+1).
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Thu, 22 May 2008 19:07:51 -0400 |
parents | 50a8302addaf |
children | fe57b96f33d4 |
line wrap: on
line source
# Here is how I see stats collectors: # def my_stats((residue,nll),(regularizer)): # mse=examplewise_mean(square_norm(residue)) # training_loss=regularizer+examplewise_sum(nll) # set_names(locals()) # return ((residue,nll),(regularizer),(),(mse,training_loss)) # my_stats_collector = make_stats_collector(my_stats) # # where make_stats_collector calls my_stats(examplewise_fields, attributes) to # construct its update function, and figure out what are the input fields (here "residue" # and "nll") and input attributes (here "regularizer") it needs, and the output # attributes that it computes (here "mse" and "training_loss"). Remember that # fields are examplewise quantities, but attributes are not, in my jargon. # In the above example, I am highlighting that some operations done in my_stats # are examplewise and some are not. I am hoping that theano Ops can do these # kinds of internal side-effect operations (and proper initialization of these hidden # variables). I expect that a StatsCollector (returned by make_stats_collector) # knows the following methods: # stats_collector.input_fieldnames # stats_collector.input_attribute_names # stats_collector.output_attribute_names # stats_collector.update(mini_dataset) # stats_collector['mse'] # where mini_dataset has the input_fieldnames() as fields and the input_attribute_names() # as attributes, and in the resulting dataset the output_attribute_names() are set to the # proper numeric values. import theano from theano import tensor as t from Learner import Learner from lookup_list import LookupList class StatsCollectorModel(AttributesHolder): def __init__(self,stats_collector): self.stats_collector = stats_collector self.outputs = LookupList(stats_collector.output_names,[None for name in stats_collector.output_names]) # the statistics get initialized here self.update_function = theano.function(input_attributes+input_fields,output_attributes+output_fields,linker="c|py") for name,value in self.outputs.items(): self.__setattribute__(name,value) def update(self,dataset): input_fields = dataset.fields()(self.stats_collector.input_field_names) input_attributes = dataset.getAttributes(self.stats_collector.input_attribute_names) self.outputs._values = self.update_function(input_attributes+input_fields) for name,value in self.outputs.items(): self.__setattribute__(name,value) def __call__(self): return self.outputs def attributeNames(self): return self.outputs.keys() class StatsCollector(AttributesHolder): def __init__(self,input_attributes, input_fields, outputs): self.input_attributes = input_attributes self.input_fields = input_fields self.outputs = outputs self.input_attribute_names = [v.name for v in input_attributes] self.input_field_names = [v.name for v in input_fields] self.output_names = [v.name for v in output_attributes] def __call__(self,dataset=None): model = StatsCollectorModel(self) if dataset: self.update(dataset) return model if __name__ == '__main__': def my_statscollector(): regularizer = t.scalar() nll = t.matrix() class_error = t.matrix() total_loss = regularizer+t.examplewise_sum(nll) avg_nll = t.examplewise_mean(nll) avg_class_error = t.examplewise_mean(class_error) for name,val in locals().items(): val.name = name return StatsCollector([regularizer],[nll,class_error],[total_loss,avg_nll,avg_class_error]) # OLD DESIGN: # # class StatsCollector(object): # """A StatsCollector object is used to record performance statistics during training # or testing of a learner. It can be configured to measure different things and # accumulate the appropriate statistics. From these statistics it can be interrogated # to obtain performance measures of interest (such as maxima, minima, mean, standard # deviation, standard error, etc.). Optionally, the observations can be weighted # (yielded weighted mean, weighted variance, etc., where applicable). The statistics # that are desired can be specified among a list supported by the StatsCollector # class or subclass. When some statistics are requested, others become automatically # available (e.g., sum or mean).""" # # default_statistics = [mean,standard_deviation,min,max] # # __init__(self,n_quantities_observed, statistics=default_statistics): # self.n_quantities_observed=n_quantities_observed # # clear(self): # raise NotImplementedError # # update(self,observations): # """The observations is a numpy vector of length n_quantities_observed. Some # entries can be 'missing' (with a NaN entry) and will not be counted in the # statistics.""" # raise NotImplementedError # # __getattr__(self, statistic) # """Return a particular statistic, which may be inferred from the collected statistics. # The argument is a string naming that statistic."""