Mercurial > pylearn
annotate statscollector.py @ 228:6f55e301c687
optimisation of ArrayDataSet
author | Frederic Bastien <bastienf@iro.umontreal.ca> |
---|---|
date | Fri, 16 May 2008 16:38:07 -0400 |
parents | f62a03c9d485 |
children | 50a8302addaf |
rev | line source |
---|---|
1
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
diff
changeset
|
1 |
192
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
2 # Here is how I see stats collectors: |
1
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
diff
changeset
|
3 |
192
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
4 # def my_stats((residue,nll),(regularizer)): |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
5 # mse=examplewise_mean(square_norm(residue)) |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
6 # training_loss=regularizer+examplewise_sum(nll) |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
7 # set_names(locals()) |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
8 # return ((residue,nll),(regularizer),(),(mse,training_loss)) |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
9 # my_stats_collector = make_stats_collector(my_stats) |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
10 # |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
11 # where make_stats_collector calls my_stats(examplewise_fields, attributes) to |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
12 # construct its update function, and figure out what are the input fields (here "residue" |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
13 # and "nll") and input attributes (here "regularizer") it needs, and the output |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
14 # attributes that it computes (here "mse" and "training_loss"). Remember that |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
15 # fields are examplewise quantities, but attributes are not, in my jargon. |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
16 # In the above example, I am highlighting that some operations done in my_stats |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
17 # are examplewise and some are not. I am hoping that theano Ops can do these |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
18 # kinds of internal side-effect operations (and proper initialization of these hidden |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
19 # variables). I expect that a StatsCollector (returned by make_stats_collector) |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
20 # knows the following methods: |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
21 # stats_collector.input_fieldnames |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
22 # stats_collector.input_attribute_names |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
23 # stats_collector.output_attribute_names |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
24 # stats_collector.update(mini_dataset) |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
25 # stats_collector['mse'] |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
26 # where mini_dataset has the input_fieldnames() as fields and the input_attribute_names() |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
27 # as attributes, and in the resulting dataset the output_attribute_names() are set to the |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
28 # proper numeric values. |
1
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
diff
changeset
|
29 |
192
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
30 |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
31 |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
32 import theano |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
33 from theano import tensor as t |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
34 from Learner import Learner |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
35 from lookup_list import LookupList |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
36 |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
37 class StatsCollectorModel(AttributesHolder): |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
38 def __init__(self,stats_collector): |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
39 self.stats_collector = stats_collector |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
40 self.outputs = LookupList(stats_collector.output_names,[None for name in stats_collector.output_names]) |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
41 # the statistics get initialized here |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
42 self.update_function = theano.function(input_attributes+input_fields,output_attributes+output_fields,linker="c|py") |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
43 for name,value in self.outputs.items(): |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
44 self.__setattribute__(name,value) |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
45 def update(self,dataset): |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
46 input_fields = dataset.fields()(self.stats_collector.input_field_names) |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
47 input_attributes = dataset.getAttributes(self.stats_collector.input_attribute_names) |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
48 self.outputs._values = self.update_function(input_attributes+input_fields) |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
49 for name,value in self.outputs.items(): |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
50 self.__setattribute__(name,value) |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
51 def __call__(self): |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
52 return self.outputs |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
53 def attributeNames(self): |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
54 return self.outputs.keys() |
1
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
diff
changeset
|
55 |
192
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
56 class StatsCollector(AttributesHolder): |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
57 |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
58 def __init__(self,input_attributes, input_fields, outputs): |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
59 self.input_attributes = input_attributes |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
60 self.input_fields = input_fields |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
61 self.outputs = outputs |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
62 self.input_attribute_names = [v.name for v in input_attributes] |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
63 self.input_field_names = [v.name for v in input_fields] |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
64 self.output_names = [v.name for v in output_attributes] |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
65 |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
66 def __call__(self,dataset=None): |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
67 model = StatsCollectorModel(self) |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
68 if dataset: |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
69 self.update(dataset) |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
70 return model |
1
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
diff
changeset
|
71 |
192
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
72 if __name__ == '__main__': |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
73 def my_statscollector(): |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
74 regularizer = t.scalar() |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
75 nll = t.matrix() |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
76 class_error = t.matrix() |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
77 total_loss = regularizer+t.examplewise_sum(nll) |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
78 avg_nll = t.examplewise_mean(nll) |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
79 avg_class_error = t.examplewise_mean(class_error) |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
80 for name,val in locals(): val.name = name |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
81 return StatsCollector([regularizer],[nll,class_error],[total_loss,avg_nll,avg_class_error]) |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
82 |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
83 |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
84 |
1
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
diff
changeset
|
85 |
192
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
86 # OLD DESIGN: |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
87 # |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
88 # class StatsCollector(object): |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
89 # """A StatsCollector object is used to record performance statistics during training |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
90 # or testing of a learner. It can be configured to measure different things and |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
91 # accumulate the appropriate statistics. From these statistics it can be interrogated |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
92 # to obtain performance measures of interest (such as maxima, minima, mean, standard |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
93 # deviation, standard error, etc.). Optionally, the observations can be weighted |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
94 # (yielded weighted mean, weighted variance, etc., where applicable). The statistics |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
95 # that are desired can be specified among a list supported by the StatsCollector |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
96 # class or subclass. When some statistics are requested, others become automatically |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
97 # available (e.g., sum or mean).""" |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
98 # |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
99 # default_statistics = [mean,standard_deviation,min,max] |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
100 # |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
101 # __init__(self,n_quantities_observed, statistics=default_statistics): |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
102 # self.n_quantities_observed=n_quantities_observed |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
103 # |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
104 # clear(self): |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
105 # raise NotImplementedError |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
106 # |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
107 # update(self,observations): |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
108 # """The observations is a numpy vector of length n_quantities_observed. Some |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
109 # entries can be 'missing' (with a NaN entry) and will not be counted in the |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
110 # statistics.""" |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
111 # raise NotImplementedError |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
112 # |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
113 # __getattr__(self, statistic) |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
114 # """Return a particular statistic, which may be inferred from the collected statistics. |
f62a03c9d485
Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
1
diff
changeset
|
115 # The argument is a string naming that statistic.""" |
1
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
diff
changeset
|
116 |
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
diff
changeset
|
117 |
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
diff
changeset
|
118 |
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
diff
changeset
|
119 |
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
diff
changeset
|
120 |
2cd82666b9a7
Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
diff
changeset
|
121 |