annotate learner.py @ 99:a8da709eb6a9

in ArrayDataSet.__init__ if a columns is an index, we change it to be a list that containt only this index. This way, we remove the special case where the columns is an index for all subsequent call. This was possing trouble with numpy.vstack() called by MinibatchWrapAroundIterator.next
author Frederic Bastien <bastienf@iro.umontreal.ca>
date Tue, 06 May 2008 13:57:36 -0400
parents c4726e19b8ec
children c4916445e025
rev   line source
1
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents: 0
diff changeset
1
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents: 0
diff changeset
2 from dataset import *
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents: 0
diff changeset
3
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents: 0
diff changeset
4 class Learner(object):
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents: 0
diff changeset
5 """Base class for learning algorithms, provides an interface
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents: 0
diff changeset
6 that allows various algorithms to be applicable to generic learning
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents: 0
diff changeset
7 algorithms.
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents: 0
diff changeset
8
10
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
9 A Learner can be seen as a learning algorithm, a function that when
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
10 applied to training data returns a learned function, an object that
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
11 can be applied to other data and return some output data.
1
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents: 0
diff changeset
12 """
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents: 0
diff changeset
13
10
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
14 def __init__(self):
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
15 pass
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
16
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
17 def forget(self):
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
18 """
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
19 Reset the state of the learner to a blank slate, before seeing
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
20 training data. The operation may be non-deterministic if the
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
21 learner has a random number generator that is set to use a
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
22 different seed each time it forget() is called.
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
23 """
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
24 raise NotImplementedError
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
25
14
5ede27026e05 Working on gradient_based_learner
bengioy@bengiomac.local
parents: 13
diff changeset
26 def update(self,training_set,train_stats_collector=None):
10
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
27 """
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
28 Continue training a learner, with the evidence provided by the given training set.
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
29 Hence update can be called multiple times. This is particularly useful in the
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
30 on-line setting or the sequential (Bayesian or not) settings.
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
31 The result is a function that can be applied on data, with the same
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
32 semantics of the Learner.use method.
75
90e4c0784d6e Added draft of LinearRegression learner
bengioy@bengiomac.local
parents: 20
diff changeset
33
14
5ede27026e05 Working on gradient_based_learner
bengioy@bengiomac.local
parents: 13
diff changeset
34 The user may optionally provide a training StatsCollector that is used to record
75
90e4c0784d6e Added draft of LinearRegression learner
bengioy@bengiomac.local
parents: 20
diff changeset
35 some statistics of the outputs computed during training. It is update(d) during
90e4c0784d6e Added draft of LinearRegression learner
bengioy@bengiomac.local
parents: 20
diff changeset
36 training.
10
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
37 """
13
633453635d51 Starting to work on gradient_based_learner.py
bengioy@bengiomac.local
parents: 10
diff changeset
38 return self.use # default behavior is 'non-adaptive', i.e. update does not do anything
10
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
39
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
40
20
266c68cb6136 Minor editions, plus adding untested ApplyFunctionDataset for GradientLearner in the works.
bengioy@bengiomac.local
parents: 14
diff changeset
41 def __call__(self,training_set,train_stats_collector=None):
10
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
42 """
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
43 Train a learner from scratch using the provided training set,
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
44 and return the learned function.
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
45 """
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
46 self.forget()
20
266c68cb6136 Minor editions, plus adding untested ApplyFunctionDataset for GradientLearner in the works.
bengioy@bengiomac.local
parents: 14
diff changeset
47 return self.update(learning_task,train_stats_collector)
1
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents: 0
diff changeset
48
20
266c68cb6136 Minor editions, plus adding untested ApplyFunctionDataset for GradientLearner in the works.
bengioy@bengiomac.local
parents: 14
diff changeset
49 def use(self,input_dataset,output_fields=None,copy_inputs=True):
10
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
50 """Once a Learner has been trained by one or more call to 'update', it can
1
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents: 0
diff changeset
51 be used with one or more calls to 'use'. The argument is a DataSet (possibly
10
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
52 containing a single example) and the result is a DataSet of the same length.
1
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents: 0
diff changeset
53 If output_fields is specified, it may be use to indicate which fields should
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents: 0
diff changeset
54 be constructed in the output DataSet (for example ['output','classification_error']).
20
266c68cb6136 Minor editions, plus adding untested ApplyFunctionDataset for GradientLearner in the works.
bengioy@bengiomac.local
parents: 14
diff changeset
55 Optionally, if copy_inputs, the input fields (of the input_dataset) can be made
75
90e4c0784d6e Added draft of LinearRegression learner
bengioy@bengiomac.local
parents: 20
diff changeset
56 visible in the output DataSet returned by this method.
10
80bf5492e571 Rewrote learner.py according to the specs in the wiki for learners.
bengioy@esprit.iro.umontreal.ca
parents: 1
diff changeset
57 """
1
2cd82666b9a7 Added statscollector and started writing dataset and learner.
bengioy@esprit.iro.umontreal.ca
parents: 0
diff changeset
58 raise NotImplementedError
77
1e2bb5bad636 toying with different ways to implement learners
bengioy@bengiomac.local
parents: 75
diff changeset
59
92
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
60 def attributeNames(self):
78
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
61 """
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
62 A Learner may have attributes that it wishes to export to other objects. To automate
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
63 such export, sub-classes should define here the names (list of strings) of these attributes.
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
64 """
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
65 return []
77
1e2bb5bad636 toying with different ways to implement learners
bengioy@bengiomac.local
parents: 75
diff changeset
66
78
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
67 class TLearner(Learner):
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
68 """
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
69 TLearner is a virtual class of Learners that attempts to factor out of the definition
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
70 of a learner the steps that are common to many implementations of learning algorithms,
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
71 so as to leave only "the equations" to define in particular sub-classes, using Theano.
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
72
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
73 In the default implementations of use and update, it is assumed that the 'use' and 'update' methods
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
74 visit examples in the input dataset sequentially. In the 'use' method only one pass through the dataset is done,
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
75 whereas the sub-learner may wish to iterate over the examples multiple times. Subclasses where this
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
76 basic model is not appropriate can simply redefine update or use.
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
77
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
78 Sub-classes must provide the following functions and functionalities:
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
79 - attributeNames(): defines all the names of attributes which can be used as fields or
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
80 attributes in input/output datasets or in stats collectors.
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
81 All these attributes are expected to be theano.Result objects
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
82 (with a .data property and recognized by theano.Function for compilation).
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
83 The sub-class constructor defines the relations between
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
84 the Theano variables that may be used by 'use' and 'update'
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
85 or by a stats collector.
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
86 - defaultOutputFields(input_fields): return a list of default dataset output fields when
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
87 None are provided by the caller of use.
92
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
88 - update_start(), update_end(), update_minibatch(minibatch): functions
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
89 executed at the beginning, the end, and in the middle
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
90 (for each minibatch) of the update method. This model only
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
91 works for 'online' or one-short learning that requires
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
92 going only once through the training data. For more complicated
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
93 models, more specialized subclasses of TLearner should be used
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
94 or a learning-algorithm specific update method should be defined.
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
95
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
96 The following naming convention is assumed and important.
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
97 Attributes whose names are listed in attributeNames() can be of any type,
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
98 but those that can be referenced as input/output dataset fields or as
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
99 output attributes in 'use' or as input attributes in the stats collector
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
100 should be associated with a Theano Result variable. If the exported attribute
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
101 name is <name>, the corresponding Result name (an internal attribute of
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
102 the TLearner, created in the sub-class constructor) should be _<name>.
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
103 Typically <name> will be numpy ndarray and _<name> will be the corresponding
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
104 Theano Tensor (for symbolic manipulation).
78
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
105 """
92
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
106
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
107 def __init__(self):
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
108 Learner.__init__(self)
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
109
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
110 def _minibatchwise_use_functions(self, input_fields, output_fields, stats_collector):
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
111 """
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
112 Private helper function called by the generic TLearner.use. It returns a function
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
113 that can map the given input fields to the given output fields (along with the
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
114 attributes that the stats collector needs for its computation.
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
115 """
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
116 if not output_fields:
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
117 output_fields = self.defaultOutputFields(input_fields)
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
118 if stats_collector:
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
119 stats_collector_inputs = stats_collector.inputUpdateAttributes()
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
120 for attribute in stats_collector_inputs:
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
121 if attribute not in input_fields:
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
122 output_fields.append(attribute)
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
123 key = (input_fields,output_fields)
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
124 if key not in self.use_functions_dictionary:
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
125 self.use_functions_dictionary[key]=Function(self._names2attributes(input_fields),
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
126 self._names2attributes(output_fields))
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
127 return self.use_functions_dictionary[key]
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
128
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
129 def attributes(self,return_copy=False):
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
130 """
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
131 Return a list with the values of the learner's attributes (or optionally, a deep copy).
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
132 """
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
133 return self.names2attributes(self.attributeNames())
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
134
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
135 def _names2attributes(self,names,return_Result=False, return_copy=False):
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
136 """
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
137 Private helper function that maps a list of attribute names to a list
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
138 of (optionally copies) values or of the Result objects that own these values.
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
139 """
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
140 if return_Result:
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
141 if return_copy:
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
142 return [copy.deepcopy(self.__getattr__(name)) for name in names]
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
143 else:
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
144 return [self.__getattr__(name) for name in names]
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
145 else:
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
146 if return_copy:
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
147 return [copy.deepcopy(self.__getattr__(name).data) for name in names]
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
148 else:
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
149 return [self.__getattr__(name).data for name in names]
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
150
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
151 def use(self,input_dataset,output_fieldnames=None,output_attributes=None,
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
152 test_stats_collector=None,copy_inputs=True):
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
153 """
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
154 The learner tries to compute in the output dataset the output fields specified
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
155 """
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
156 minibatchwise_use_function = _minibatchwise_use_functions(input_dataset.fieldNames(),
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
157 output_fieldnames,
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
158 test_stats_collector)
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
159 virtual_output_dataset = ApplyFunctionDataSet(input_dataset,
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
160 minibatchwise_use_function,
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
161 True,DataSet.numpy_vstack,
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
162 DataSet.numpy_hstack)
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
163 # actually force the computation
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
164 output_dataset = CachedDataSet(virtual_output_dataset,True)
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
165 if copy_inputs:
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
166 output_dataset = input_dataset | output_dataset
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
167 # copy the wanted attributes in the dataset
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
168 if output_attributes:
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
169 assert set(output_attributes) <= set(self.attributeNames())
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
170 output_dataset.setAttributes(output_attributes,
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
171 self._names2attributes(output_attributes,return_copy=True))
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
172 if test_stats_collector:
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
173 test_stats_collector.update(output_dataset)
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
174 output_dataset.setAttributes(test_stats_collector.attributeNames(),
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
175 test_stats_collector.attributes())
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
176 return output_dataset
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
177
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
178 def update_start(self): pass
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
179 def update_end(self): pass
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
180 def update_minibatch(self,minibatch):
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
181 raise AbstractFunction()
78
3499918faa9d In the middle of designing TLearner
bengioy@bengiomac.local
parents: 77
diff changeset
182
92
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
183 def update(self,training_set,train_stats_collector=None):
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
184
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
185 self.update_start()
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
186 for minibatch in training_set.minibatches(self.training_set_input_fields,
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
187 minibatch_size=self.minibatch_size):
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
188 self.update_minibatch(minibatch)
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
189 if train_stats_collector:
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
190 minibatch_set = minibatch.examples()
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
191 minibatch_set.setAttributes(self.attributeNames(),self.attributes())
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
192 train_stats_collector.update(minibatch_set)
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
193 self.update_end()
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
194 return self.use
c4726e19b8ec Finished first draft of TLearner
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents: 78
diff changeset
195