annotate statscollector.py @ 221:58e17421c69c

tester on iterator consistency now triggers a bug in dataset, linked to the combination of minibatch and slicing
author Thierry Bertin-Mahieux <bertinmt@iro.umontreal.ca>
date Fri, 23 May 2008 14:07:53 -0400
parents 50a8302addaf
children fe57b96f33d4
rev   line source
1
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
diff changeset
1
192
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
2 # Here is how I see stats collectors:
1
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
diff changeset
3
192
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
4 # def my_stats((residue,nll),(regularizer)):
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
5 # mse=examplewise_mean(square_norm(residue))
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
6 # training_loss=regularizer+examplewise_sum(nll)
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
7 # set_names(locals())
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
8 # return ((residue,nll),(regularizer),(),(mse,training_loss))
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
9 # my_stats_collector = make_stats_collector(my_stats)
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
10 #
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
11 # where make_stats_collector calls my_stats(examplewise_fields, attributes) to
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
12 # construct its update function, and figure out what are the input fields (here "residue"
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
13 # and "nll") and input attributes (here "regularizer") it needs, and the output
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
14 # attributes that it computes (here "mse" and "training_loss"). Remember that
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
15 # fields are examplewise quantities, but attributes are not, in my jargon.
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
16 # In the above example, I am highlighting that some operations done in my_stats
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
17 # are examplewise and some are not. I am hoping that theano Ops can do these
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
18 # kinds of internal side-effect operations (and proper initialization of these hidden
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
19 # variables). I expect that a StatsCollector (returned by make_stats_collector)
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
20 # knows the following methods:
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
21 # stats_collector.input_fieldnames
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
22 # stats_collector.input_attribute_names
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
23 # stats_collector.output_attribute_names
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
24 # stats_collector.update(mini_dataset)
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
25 # stats_collector['mse']
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
26 # where mini_dataset has the input_fieldnames() as fields and the input_attribute_names()
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
27 # as attributes, and in the resulting dataset the output_attribute_names() are set to the
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
28 # proper numeric values.
1
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
diff changeset
29
192
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
30
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
31
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
32 import theano
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
33 from theano import tensor as t
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
34 from Learner import Learner
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
35 from lookup_list import LookupList
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
36
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
37 class StatsCollectorModel(AttributesHolder):
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
38 def __init__(self,stats_collector):
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
39 self.stats_collector = stats_collector
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
40 self.outputs = LookupList(stats_collector.output_names,[None for name in stats_collector.output_names])
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
41 # the statistics get initialized here
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
42 self.update_function = theano.function(input_attributes+input_fields,output_attributes+output_fields,linker="c|py")
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
43 for name,value in self.outputs.items():
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
44 self.__setattribute__(name,value)
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
45 def update(self,dataset):
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
46 input_fields = dataset.fields()(self.stats_collector.input_field_names)
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
47 input_attributes = dataset.getAttributes(self.stats_collector.input_attribute_names)
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
48 self.outputs._values = self.update_function(input_attributes+input_fields)
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
49 for name,value in self.outputs.items():
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
50 self.__setattribute__(name,value)
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
51 def __call__(self):
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
52 return self.outputs
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
53 def attributeNames(self):
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
54 return self.outputs.keys()
1
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
diff changeset
55
192
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
56 class StatsCollector(AttributesHolder):
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
57
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
58 def __init__(self,input_attributes, input_fields, outputs):
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
59 self.input_attributes = input_attributes
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
60 self.input_fields = input_fields
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
61 self.outputs = outputs
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
62 self.input_attribute_names = [v.name for v in input_attributes]
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
63 self.input_field_names = [v.name for v in input_fields]
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
64 self.output_names = [v.name for v in output_attributes]
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
65
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
66 def __call__(self,dataset=None):
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
67 model = StatsCollectorModel(self)
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
68 if dataset:
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
69 self.update(dataset)
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
70 return model
1
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
diff changeset
71
192
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
72 if __name__ == '__main__':
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
73 def my_statscollector():
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
74 regularizer = t.scalar()
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
75 nll = t.matrix()
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
76 class_error = t.matrix()
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
77 total_loss = regularizer+t.examplewise_sum(nll)
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
78 avg_nll = t.examplewise_mean(nll)
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
79 avg_class_error = t.examplewise_mean(class_error)
209
50a8302addaf template statscollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 192
diff changeset
80 for name,val in locals().items(): val.name = name
192
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
81 return StatsCollector([regularizer],[nll,class_error],[total_loss,avg_nll,avg_class_error])
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
82
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
83
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
84
1
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
diff changeset
85
192
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
86 # OLD DESIGN:
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
87 #
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
88 # class StatsCollector(object):
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
89 # """A StatsCollector object is used to record performance statistics during training
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
90 # or testing of a learner. It can be configured to measure different things and
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
91 # accumulate the appropriate statistics. From these statistics it can be interrogated
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
92 # to obtain performance measures of interest (such as maxima, minima, mean, standard
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
93 # deviation, standard error, etc.). Optionally, the observations can be weighted
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
94 # (yielded weighted mean, weighted variance, etc., where applicable). The statistics
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
95 # that are desired can be specified among a list supported by the StatsCollector
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
96 # class or subclass. When some statistics are requested, others become automatically
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
97 # available (e.g., sum or mean)."""
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
98 #
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
99 # default_statistics = [mean,standard_deviation,min,max]
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
100 #
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
101 # __init__(self,n_quantities_observed, statistics=default_statistics):
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
102 # self.n_quantities_observed=n_quantities_observed
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
103 #
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
104 # clear(self):
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
105 # raise NotImplementedError
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
106 #
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
107 # update(self,observations):
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
108 # """The observations is a numpy vector of length n_quantities_observed. Some
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
109 # entries can be 'missing' (with a NaN entry) and will not be counted in the
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
110 # statistics."""
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
111 # raise NotImplementedError
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
112 #
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
113 # __getattr__(self, statistic)
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
114 # """Return a particular statistic, which may be inferred from the collected statistics.
f62a03c9d485 Redesign of StatsCollector
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 1
diff changeset
115 # The argument is a string naming that statistic."""
1
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
diff changeset
116
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
diff changeset
117
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
diff changeset
118
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
diff changeset
119
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
diff changeset
120
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents:
diff changeset
121