Mercurial > pylearn

--- a/dataset.py	Thu May 15 13:10:21 2008 -0400
+++ b/dataset.py	Thu May 15 15:21:00 2008 -0400
@@ -26,6 +26,17 @@
         else:
             for name,value in zip(attribute_names,attribute_values):
                 self.__setattr__(name,value)
+
+    def getAttributes(self,attribute_names=None, return_copy=False):
+        """
+        Return all (if attribute_names=None, in the order of attributeNames()) or a specified subset of attributes.
+        """
+        if attribute_names is None:
+            attribute_names = self.attributeNames()
+        if return_copy:
+            return [copy.copy(self.__getattribute__(name)) for name in attribute_names]
+        else:
+            return [self.__getattribute__(name) for name in attribute_names]


 class DataSet(AttributesHolder):
@@ -207,6 +218,9 @@
         """
         return DataSet.MinibatchToSingleExampleIterator(self.minibatches(None, minibatch_size = 1))

+    def __contains__(self, fieldname):
+        return (fieldname in self.fieldNames()) \
+                or (fieldname in self.attributeNames())

     class MinibatchWrapAroundIterator(object):
         """
@@ -937,13 +951,14 @@
     values are (N-2)-dimensional objects (i.e. ordinary numbers if N=2).
     """

-    def __init__(self, data_array, fields_columns):
+    def __init__(self, data_array, fields_columns, **kwargs):
         """
         Construct an ArrayDataSet from the underlying numpy array (data) and
         a map (fields_columns) from fieldnames to field columns. The columns of a field are specified
         using the standard arguments for indexing/slicing: integer for a column index,
         slice for an interval of columns (with possible stride), or iterable of column indices.
         """
+        ArrayFieldsDataSet.__init__(self, **kwargs)
         self.data=data_array
         self.fields_columns=fields_columns
--- a/learner.py	Thu May 15 13:10:21 2008 -0400
+++ b/learner.py	Thu May 15 15:21:00 2008 -0400
@@ -1,424 +1,103 @@

 from exceptions import *
-from dataset import AttributesHolder,ApplyFunctionDataSet,DataSet,CachedDataSet
-import theano
-from theano import compile
-from theano import tensor as t
-
-class Learner(AttributesHolder):
+
+
+class LearningAlgorithm(object):
     """
     Base class for learning algorithms, provides an interface
     that allows various algorithms to be applicable to generic learning
-    algorithms.
+    algorithms. It is only given here to define the expected semantics.

     A L{Learner} can be seen as a learning algorithm, a function that when
     applied to training data returns a learned function (which is an object that
     can be applied to other data and return some output data).
+
+    There are two main ways of using a learning algorithms, and some learning
+    algorithms only support one of them. The first is the way of the standard
+    machine learning framework, in which a learning algorithm is applied
+    to a training dataset,
+
+       model = learning_algorithm(training_set)
+
+    resulting in a fully trained model that can be applied to another dataset:
+
+        output_dataset = model(input_dataset)
+
+    Note that the application of a dataset has no side-effect on the model.
+    In that example, the training set may for example have 'input' and 'target'
+    fields while the input dataset may have only 'input' (or both 'input' and
+    'target') and the output dataset would contain some default output fields defined
+    by the learning algorithm (e.g. 'output' and 'error').
+
+    The second way of using a learning algorithm is in the online or
+    adaptive framework, where the training data are only revealed in pieces
+    (maybe one example or a batch of example at a time):
+
+       model = learning_algorithm()
+
+    results in a fresh model. The model can be adapted by presenting
+    it with some training data,
+
+       model.update(some_training_data)
+       ...
+       model.update(some_more_training_data)
+       ...
+       model.update(yet_more_training_data)
+
+    and at any point one can use the model to perform some computation:
+
+       output_dataset = model(input_dataset)
+
     """
+
+    def __init__(self): pass
+
+    def __call__(self, training_dataset=None):
+        """
+        Return a LearnerModel, either fresh (if training_dataset is None) or fully trained (otherwise).
+        """
+        raise AbstractFunction()

+class LearnerModel(AttributesHolder):
+    """
+    LearnerModel is a base class for models returned by instances of a LearningAlgorithm subclass.
+    It is only given here to define the expected semantics.
+    """
     def __init__(self):
         pass

-    def forget(self):
-        """
-        Reset the state of the learner to a blank slate, before seeing
-        training data. The operation may be non-deterministic if the
-        learner has a random number generator that is set to use a
-        different seed each time it forget() is called.
-        """
-        raise NotImplementedError
-
     def update(self,training_set,train_stats_collector=None):
         """
         Continue training a learner, with the evidence provided by the given training set.
-        Hence update can be called multiple times. This is particularly useful in the
+        Hence update can be called multiple times. This is the main method used for training in the
         on-line setting or the sequential (Bayesian or not) settings.
-        The result is a function that can be applied on data, with the same
-        semantics of the Learner.use method.
+
+        This function has as side effect that self(data) will behave differently,
+        according to the adaptation achieved by update().

         The user may optionally provide a training L{StatsCollector} that is used to record
         some statistics of the outputs computed during training. It is update(d) during
         training.
         """
-        return self.use # default behavior is 'non-adaptive', i.e. update does not do anything
-
+        raise AbstractFunction()

-    def __call__(self,training_set,train_stats_collector=None):
-        """
-        Train a learner from scratch using the provided training set,
-        and return the learned function.
+    def __call__(self,input_dataset,output_fieldnames=None,
+                 test_stats_collector=None,copy_inputs=False,
+                 put_stats_in_output_dataset=True,
+                 output_attributes=[]):
         """
-        self.forget()
-        return self.update(training_set,train_stats_collector)
-
-    def use(self,input_dataset,output_fieldnames=None,
-            test_stats_collector=None,copy_inputs=False,
-            put_stats_in_output_dataset=True,
-            output_attributes=[]):
-        """
-        Once a L{Learner} has been trained by one or more call to 'update', it can
-        be used with one or more calls to 'use'. The argument is an input L{DataSet} (possibly
+        A trained or partially trained L{Model} can be used with
+        with one or more calls to it. The argument is an input L{DataSet} (possibly
         containing a single example) and the result is an output L{DataSet} of the same length.
         If output_fieldnames is specified, it may be use to indicate which fields should
         be constructed in the output L{DataSet} (for example ['output','classification_error']).
-        Otherwise, self.defaultOutputFields is called to choose the output fields.
+        Otherwise, some default output fields are produced (possibly depending on the input
+        fields available in the input_dataset).
         Optionally, if copy_inputs, the input fields (of the input_dataset) can be made
         visible in the output L{DataSet} returned by this method.
         Optionally, attributes of the learner can be copied in the output dataset,
         and statistics computed by the stats collector also put in the output dataset.
         Note the distinction between fields (which are example-wise quantities, e.g. 'input')
         and attributes (which are not, e.g. 'regularization_term').
-
-        We provide here a default implementation that does all this using
-        a sub-class defined method: minibatchwiseUseFunction.
-
-        @todo check if some of the learner attributes are actually SPECIFIED
-        as attributes of the input_dataset, and if so use their values instead
-        of the ones in the learner.
-
-        The learner tries to compute in the output dataset the output fields specified.
-        If None is specified then self.defaultOutputFields(input_dataset.fieldNames())
-        is called to determine the output fields.
-
-        Attributes of the learner can also optionally be copied into the output dataset.
-        If output_attributes is None then all of the attributes in self.AttributeNames()
-        are copied in the output dataset, but if it is [] (the default), then none are copied.
-        If a test_stats_collector is provided, then its attributes (test_stats_collector.AttributeNames())
-        are also copied into the output dataset attributes.
-        """
-        input_fieldnames = input_dataset.fieldNames()
-        if not output_fieldnames:
-            output_fieldnames = self.defaultOutputFields(input_fieldnames)
-
-        minibatchwise_use_function = self.minibatchwiseUseFunction(input_fieldnames,
-                                                                   output_fieldnames,
-                                                                   test_stats_collector)
-        virtual_output_dataset = ApplyFunctionDataSet(input_dataset,
-                                                      minibatchwise_use_function,
-                                                      output_fieldnames,
-                                                      True,DataSet.numpy_vstack,
-                                                      DataSet.numpy_hstack)
-        # actually force the computation
-        output_dataset = CachedDataSet(virtual_output_dataset,True)
-        if copy_inputs:
-            output_dataset = input_dataset | output_dataset
-        # copy the wanted attributes in the dataset
-        if output_attributes is None:
-            output_attributes = self.attributeNames()
-        if output_attributes:
-            assert set(attribute_names) <= set(self.attributeNames())
-            output_dataset.setAttributes(output_attributes,
-                                         self.names2attributes(output_attributes,return_copy=True))
-        if test_stats_collector:
-            test_stats_collector.update(output_dataset)
-            if put_stats_in_output_dataset:
-                output_dataset.setAttributes(test_stats_collector.attributeNames(),
-                                             test_stats_collector.attributes())
-        return output_dataset
-
-    def minibatchwiseUseFunction(self, input_fields, output_fields, stats_collector):
-        """
-        Returns a function that can map the given input fields to the given output fields
-        and to the attributes that the stats collector needs for its computation.
-        That function is expected to operate on minibatches.
-        The function returned makes use of the self.useInputAttributes() and
-        sets the attributes specified by self.useOutputAttributes().
-        """
-        raise AbstractFunction()
-
-    def attributeNames(self):
-        """
-        A Learner may have attributes that it wishes to export to other objects. To automate
-        such export, sub-classes should define here the names (list of strings) of these attributes.
-
-        @todo By default, attributeNames looks for all dictionary entries whose name does not start with _.
-        """
-        return []
-
-    def attributes(self,return_copy=False):
-        """
-        Return a list with the values of the learner's attributes (or optionally, a deep copy).
-        """
-        return self.names2attributes(self.attributeNames(),return_copy)
-
-    def names2attributes(self,names):
-        """
-        Private helper function that maps a list of attribute names to a list
-        of (optionally copies) values of attributes.
-        """
-        res=[]
-        for name in names:
-            assert name in names
-            res.append(self.__getattribute__(name))
-        return res
-
-    def useInputAttributes(self):
-        """
-        A subset of self.attributeNames() which are the names of attributes needed by use() in order
-        to do its work.
-        """
-        raise AbstractFunction()
-
-    def useOutputAttributes(self):
-        """
-        A subset of self.attributeNames() which are the names of attributes modified/created by use() in order
-        to do its work.
-        """
-        raise AbstractFunction()
-
-
-class TLearner(Learner):
-    """
-    TLearner is a virtual class of L{Learner}s that attempts to factor
-    out of the definition of a learner the steps that are common to
-    many implementations of learning algorithms, so as to leave only
-    'the equations' to define in particular sub-classes, using Theano.
-
-    In the default implementations of use and update, it is assumed
-    that the 'use' and 'update' methods visit examples in the input
-    dataset sequentially. In the 'use' method only one pass through the
-    dataset is done, whereas the sub-learner may wish to iterate over
-    the examples multiple times. Subclasses where this basic model is
-    not appropriate can simply redefine update or use.
-
-    Sub-classes must provide the following functions and functionalities:
-      - attributeNames(): defines all the names of attributes which can
-      be used as fields or
-                          attributes in input/output datasets or in
-                          stats collectors.  All these attributes
-                          are expected to be theano.Result objects
-                          (with a .data property and recognized by
-                          theano.Function for compilation).  The sub-class
-                          constructor defines the relations between the
-                          Theano variables that may be used by 'use'
-                          and 'update' or by a stats collector.
-      - defaultOutputFields(input_fields): return a list of default
-      dataset output fields when
-                          None are provided by the caller of use.
-    The following naming convention is assumed and important.  Attributes
-    whose names are listed in attributeNames() can be of any type,
-    but those that can be referenced as input/output dataset fields or
-    as output attributes in 'use' or as input attributes in the stats
-    collector should be associated with a Theano Result variable. If the
-    exported attribute name is <name>, the corresponding Result name
-    (an internal attribute of the TLearner, created in the sub-class
-    constructor) should be _<name>.  Typically <name> will be numpy
-    ndarray and _<name> will be the corresponding Theano Tensor (for
-    symbolic manipulation).
-
-    @todo pousser dans Learner toute la poutine qui peut l'etre sans etre
-    dependant de Theano
-    """
-
-    def __init__(self):
-        Learner.__init__(self)
-        self.use_functions_dictionary={}
-
-    def defaultOutputFields(self, input_fields):
-        """
-        Return a default list of output field names (to put in the output dataset).
-        This will be used when None are provided (as output_fields) by the caller of the 'use' method.
-        This may involve looking at the input_fields (names) available in the
-        input_dataset.
         """
         raise AbstractFunction()
-
-    def minibatchwiseUseFunction(self, input_fields, output_fields, stats_collector):
-        """
-        Implement minibatchwiseUseFunction by exploiting Theano compilation
-        and the expression graph defined by a sub-class constructor.
-        """
-        if stats_collector:
-            stats_collector_inputs = stats_collector.input2UpdateAttributes()
-            for attribute in stats_collector_inputs:
-                if attribute not in input_fields:
-                    output_fields.append(attribute)
-        key = (tuple(input_fields),tuple(output_fields))
-        if key not in self.use_functions_dictionary:
-            use_input_attributes = self.useInputAttributes()
-            use_output_attributes = self.useOutputAttributes()
-            complete_f = compile.function(self.names2OpResults(input_fields+use_input_attributes),
-                                          self.names2OpResults(output_fields+use_output_attributes))
-            def f(*input_field_values):
-                input_attribute_values = self.names2attributes(use_input_attributes)
-                results = complete_f(*(list(input_field_values) + input_attribute_values))
-                output_field_values = results[0:len(output_fields)]
-                output_attribute_values = results[len(output_fields):len(results)]
-                if use_output_attributes:
-                    self.setAttributes(use_output_attributes,output_attribute_values)
-                return output_field_values
-            self.use_functions_dictionary[key]=f
-        return self.use_functions_dictionary[key]
-
-    def names2OpResults(self,names):
-        """
-        Private helper function that maps a list of attribute names to a list
-        of corresponding Op Results (with the same name but with a '_' prefix).
-        """
-        return [self.__getattribute__('_'+name) for name in names]
-
-
-class MinibatchUpdatesTLearner(TLearner):
-    """
-    This adds the following functions to a L{TLearner}:
-      - updateStart(), updateEnd(), updateMinibatch(minibatch), isLastEpoch():
-      functions executed at the beginning, the end, in the middle (for
-      each minibatch) of the update method, and at the end of each
-      epoch. This model only works for 'online' or one-shot learning
-      that requires going only once through the training data. For more
-      complicated models, more specialized subclasses of TLearner should
-      be used or a learning-algorithm specific update method should
-      be defined.
-
-      - a 'parameters' attribute which is a list of parameters
-      (whose names are specified by the user's subclass with the
-      parameterAttributes() method)
-
-    """
-
-    def __init__(self):
-        TLearner.__init__(self)
-        self.update_minibatch_function = compile.function(self.names2OpResults(self.updateMinibatchOutputAttributes()+
-                                                                               self.updateMinibatchInputFields()),
-                                                          self.names2OpResults(self.updateMinibatchOutputAttributes()))
-        self.update_end_function = compile.function(self.names2OpResults(self.updateEndInputAttributes()),
-                                                    self.names2OpResults(self.updateEndOutputAttributes()))
-
-    def allocate(self, minibatch):
-        """
-        This function is called at the beginning of each L{updateMinibatch}
-        and should be used to check that all required attributes have been
-        allocated and initialized (usually this function calls forget()
-        when it has to do an initialization).
-        """
-        raise AbstractFunction()
-
-    def updateMinibatchInputFields(self):
-        raise AbstractFunction()
-
-    def updateMinibatchInputAttributes(self):
-        raise AbstractFunction()
-
-    def updateMinibatchOutputAttributes(self):
-        raise AbstractFunction()
-
-    def updateEndInputAttributes(self):
-        raise AbstractFunction()
-
-    def updateEndOutputAttributes(self):
-        raise AbstractFunction()
-
-    def parameterAttributes(self):
-        raise AbstractFunction()
-
-    def updateStart(self,training_set):
-        pass
-
-    def updateEnd(self):
-        self.setAttributes(self.updateEndOutputAttributes(),
-                           self.update_end_function(*self.names2attributes(self.updateEndInputAttributes())))
-        self.parameters = self.names2attributes(self.parameterAttributes())
-
-    def updateMinibatch(self,minibatch):
-        # make sure all required fields are allocated and initialized
-        self.allocate(minibatch)
-        input_attributes = self.names2attributes(self.updateMinibatchInputAttributes())
-        input_fields = minibatch(*self.updateMinibatchInputFields())
-        self.setAttributes(self.updateMinibatchOutputAttributes(),
-                           # concatenate the attribute values and field values and then apply update fn
-                           self.update_minibatch_function(*(input_attributes+input_fields)))
-
-    def isLastEpoch(self):
-        """
-        This method is called at the end of each epoch (cycling over the training set).
-        It returns a boolean to indicate if this is the last epoch.
-        By default just do one epoch.
-        """
-        return True
-
-    def update(self,training_set,train_stats_collector=None):
-        """
-        @todo check if some of the learner attributes are actually SPECIFIED
-        in as attributes of the training_set.
-        """
-        self.updateStart(training_set)
-        stop=False
-        if hasattr(self,'_minibatch_size') and self._minibatch_size:
-            minibatch_size=self._minibatch_size
-        else:
-            minibatch_size=min(100,len(training_set))
-        while not stop:
-            if train_stats_collector:
-                train_stats_collector.forget() # restart stats collectin at the beginning of each epoch
-            for minibatch in training_set.minibatches(minibatch_size=minibatch_size):
-                self.updateMinibatch(minibatch)
-                if train_stats_collector:
-                    minibatch_set = minibatch.examples()
-                    minibatch_set.setAttributes(self.attributeNames(),self.attributes())
-                    train_stats_collector.update(minibatch_set)
-            stop = self.isLastEpoch()
-        self.updateEnd()
-        return self.use
-
-class OnlineGradientTLearner(MinibatchUpdatesTLearner):
-    """
-    Specialization of L{MinibatchUpdatesTLearner} in which the minibatch updates
-    are obtained by performing an online (minibatch-based) gradient step.
-
-    Sub-classes must define the following:
-      - self._learning_rate (may be changed by the sub-class between epochs or minibatches)
-      - self.lossAttribute()  = name of the loss field
-    """
-    def __init__(self,truly_online=False):
-        """
-        If truly_online then only one pass is made through the training set passed to update().
-
-        SUBCLASSES SHOULD CALL THIS CONSTRUCTOR ONLY AFTER HAVING DEFINED ALL THEIR THEANO FORMULAS
-        """
-        self.truly_online=truly_online
-
-        # create the formulas for the gradient update
-        old_params = [self.__getattribute__("_"+name) for name in self.parameterAttributes()]
-        new_params_names = ["_new_"+name for name in self.parameterAttributes()]
-        loss = self.__getattribute__("_"+self.lossAttribute())
-        self.setAttributes(new_params_names,
-                           [t.add_inplace(param,self._learning_rate*t.grad(loss,param))
-                            for param in old_params])
-        MinibatchUpdatesTLearner.__init__(self)
-
-
-    def namesOfAttributesToComputeOutputs(self,output_names):
-        """
-        The output_names are attribute names (not the corresponding Result names, which have leading _).
-        Return the corresponding input names
-        """
-        all_inputs = t.gof.graph.inputs(self.names2OpResults(output_names))
-        # remove constants and leading '_' in name
-
-        return [r.name for r in all_inputs if isinstance(r,theano.Result) and \
-                not isinstance(r,theano.Constant) and not isinstance(r,theano.Value)]
-        #inputs = []
-        #for r in all_inputs:
-        #    if isinstance(r,theano.Result) and \
-        #    not isinstance(r,theano.Constant) and not isinstance(r,theano.Value):
-        #       inputs.append(r.name)
-        #return inputs
-
-    def isLastEpoch(self):
-        return self.truly_online
-
-    def updateMinibatchInputAttributes(self):
-        return self.parameterAttributes()
-
-    def updateMinibatchOutputAttributes(self):
-        return ["new_"+name for name in self.parameterAttributes()]
-
-    def updateEndInputAttributes(self):
-        return self.namesOfAttributesToComputeOutputs(self.updateEndOutputAttributes())
-
-    def useInputAttributes(self):
-        return self.parameterAttributes()
-
-    def useOutputAttributes(self):
-        return []
-
--- a/lookup_list.py	Thu May 15 13:10:21 2008 -0400
+++ b/lookup_list.py	Thu May 15 15:21:00 2008 -0400
@@ -111,6 +111,8 @@
         """
         Return a list of values associated with the given names (which must all be keys of the lookup list).
         """
+        if names == self._names:
+            return self._values
         return [self[name] for name in names]
--- a/misc.py	Thu May 15 13:10:21 2008 -0400
+++ b/misc.py	Thu May 15 15:21:00 2008 -0400
@@ -1,3 +1,24 @@
+
+import theano
+
+class Print(theano.Op):
+    def __init__(self,message=""):
+        self.message=message
+        self.view_map={0:[0]}
+
+    def make_node(self,xin):
+        xout = xin.type.make_result()
+        return theano.Apply(op = self, inputs = [xin], outputs=[xout])
+
+    def perform(self,node,inputs,output_storage):
+        xin, = inputs
+        xout, = output_storage
+        xout[0] = xin
+        print self.message,xin
+
+    def grad(self,input,output_gradients):
+        return output_gradients
+

 def unique_elements_list_intersection(list1,list2):
     """
--- a/mlp.py	Thu May 15 13:10:21 2008 -0400
+++ b/mlp.py	Thu May 15 15:21:00 2008 -0400
@@ -9,6 +9,77 @@
 from theano import tensor as t
 from nnet_ops import *
 import math
+from misc import *
+
+def function(inputs, outputs, linker='c&py'):
+    return theano.function(inputs, outputs, unpack_single=False,linker=linker)
+
+def randshape(*shape): return (numpy.random.rand(*shape) -0.5) * 0.001
+
+class ManualNNet(object):
+    def __init__(self, ninputs, nhid, nclass, lr, nepochs,
+            linker='c&yp',
+            hidden_layer=None):
+        class Vars:
+            def __init__(self, lr, l2coef=0.0):
+                lr = t.constant(lr)
+                l2coef = t.constant(l2coef)
+                input = t.matrix('input') # n_examples x n_inputs
+                target = t.ivector('target') # n_examples x 1
+                W2 = t.matrix('W2')
+                b2 = t.vector('b2')
+
+                if hidden_layer:
+                    hid, hid_params, hid_ivals, hid_regularization = hidden_layer(input)
+                else:
+                    W1 = t.matrix('W1')
+                    b1 = t.vector('b1')
+                    hid = t.tanh(b1 + t.dot(input, W1))
+                    hid_params = [W1, b1]
+                    hid_regularization = l2coef * t.sum(W1*W1)
+                    hid_ivals = [randshape(ninputs, nhid), randshape(nhid)]
+
+                params = [W2, b2] + hid_params
+                ivals = [randshape(nhid, nclass), randshape(nclass)]\
+                        + hid_ivals
+                nll, predictions = crossentropy_softmax_1hot( b2 + t.dot(hid, W2), target)
+                regularization = l2coef * t.sum(W2*W2) + hid_regularization
+                output_class = t.argmax(predictions,1)
+                loss_01 = t.neq(output_class, target)
+                g_params = t.grad(nll + regularization, params)
+                new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)]
+                self.__dict__.update(locals()); del self.self
+        self.nhid = nhid
+        self.nclass = nclass
+        self.nepochs = nepochs
+        self.v = Vars(lr)
+        self.params = None
+
+    def update(self, trainset):
+        params = self.v.ivals
+        update_fn = function(
+                [self.v.input, self.v.target] + self.v.params,
+                [self.v.nll] + self.v.new_params)
+        for i in xrange(self.nepochs):
+            for input, target in trainset.minibatches(['input', 'target'],
+                    minibatch_size=min(32, len(trainset))):
+                dummy = update_fn(input, target[:,0], *params)
+                if 0: print dummy[0] #the nll
+        return self.use
+    __call__ = update
+
+    def use(self, dset,
+            output_fieldnames=['output_class'],
+            test_stats_collector=None,
+            copy_inputs=False,
+            put_stats_in_output_dataset=True,
+            output_attributes=[]):
+        inputs = [self.v.input, self.v.target] + self.v.params
+        fn = function(inputs, [getattr(self.v, name) for name in output_fieldnames])
+        target = dset.fields()['target'] if ('target' in dset.fields()) else numpy.zeros((1,1),dtype='int64')
+        return ApplyFunctionDataSet(dset,
+            lambda input, target: fn(input, target[:,0], *self.v.ivals),
+            output_fieldnames)


 class OneHiddenLayerNNetClassifier(OnlineGradientTLearner):
@@ -67,8 +138,7 @@
        - 'regularization_term'

     """
-
-    def __init__(self,n_hidden,n_classes,learning_rate,max_n_epochs,L2_regularizer=0,init_range=1.,n_inputs=None,minibatch_size=None):
+    def __init__(self,n_hidden,n_classes,learning_rate,max_n_epochs,L2_regularizer=0,init_range=1.,n_inputs=None,minibatch_size=None,linker='c|py'):
         self._n_inputs = n_inputs
         self._n_outputs = n_classes
         self._n_hidden = n_hidden
@@ -79,7 +149,7 @@
         self.L2_regularizer = L2_regularizer
         self._learning_rate = t.scalar('learning_rate') # this is the symbol
         self._input = t.matrix('input') # n_examples x n_inputs
-        self._target = t.imatrix('target') # n_examples x 1
+        self._target = t.lmatrix('target') # n_examples x 1
         self._target_vector = self._target[:,0]
         self._L2_regularizer = t.scalar('L2_regularizer')
         self._W1 = t.matrix('W1')
@@ -92,7 +162,7 @@
         self._output_class = t.argmax(self._output,1)
         self._class_error = t.neq(self._output_class,self._target_vector)
         self._minibatch_criterion = self._nll + self._regularization_term / t.shape(self._input)[0]
-        OnlineGradientTLearner.__init__(self)
+        OnlineGradientTLearner.__init__(self, linker = linker)

     def attributeNames(self):
         return ["parameters","b1","W2","b2","W2", "L2_regularizer","regularization_term"]
@@ -103,6 +173,9 @@
     def updateMinibatchInputFields(self):
         return ["input","target"]

+    def updateMinibatchInputAttributes(self):
+        return OnlineGradientTLearner.updateMinibatchInputAttributes(self)+["L2_regularizer"]
+
     def updateEndOutputAttributes(self):
         return ["regularization_term"]

@@ -115,6 +188,10 @@
             output_fields += ["class_error", "nll"]
         return output_fields

+    def updateMinibatch(self,minibatch):
+        MinibatchUpdatesTLearner.updateMinibatch(self,minibatch)
+        #print self.nll
+
     def allocate(self,minibatch):
         minibatch_n_inputs  = minibatch["input"].shape[1]
         if not self._n_inputs:
@@ -142,135 +219,22 @@
         self._n_epochs +=1
         return self._n_epochs>=self._max_n_epochs

-class MLP(MinibatchUpdatesTLearner):
-    """
-    Implement a feedforward multi-layer perceptron, with or without L1 and/or L2 regularization.
-
-    The predictor parameters are obtained by minibatch/online gradient descent.
-    Training can proceed sequentially (with multiple calls to update with
-    different disjoint subsets of the training sets).
-
-    Hyper-parameters:
-      - L1_regularizer
-      - L2_regularizer
-      - neuron_sparsity_regularizer
-      - initial_learning_rate
-      - learning_rate_decrease_rate
-      - n_hidden_per_layer (a list of integers)
-      - activation_function ("sigmoid","tanh", or "ratio")
-
-    The output/task type (classification, regression, etc.) is obtained by specializing MLP.
-
-    For each (input[t],output[t]) pair in a minibatch,::
-
-       activation[0] = input_t
-       for k=1 to n_hidden_layers:
-          activation[k]=activation_function(b[k]+ W[k]*activation[k-1])
-       output_t = output_activation_function(b[n_hidden_layers+1]+W[n_hidden_layers+1]*activation[n_hidden_layers])
-
-    and the b and W are obtained by minimizing the following by stochastic minibatch gradient descent::
-
-       L2_regularizer sum_{ijk} W_{kij}^2  + L1_regularizer sum_{kij} |W_{kij}|
-       + neuron_sparsity_regularizer sum_{ki} |b_{ki} + infinity|
-       - sum_t log P_{output_model}(target_t | output_t)
-
-    The fields and attributes expected and produced by use and update are the following:
-
-     - Input and output fields (example-wise quantities):
-
-       - 'input' (always expected by use and update)
-       - 'target' (optionally expected by use and always by update)
-       - 'output' (optionally produced by use)
-       - error fields produced by sub-class of MLP
-
-     - optional attributes (optionally expected as input_dataset attributes)
-       (warning, this may be dangerous, the 'use' method will use those provided in the
-       input_dataset rather than those learned during 'update'; currently no support
-       for providing these to update):
-
-       - 'L1_regularizer'
-       - 'L2_regularizer'
-       - 'b'
-       - 'W'
-       - 'parameters' = [b[1], W[1], b[2], W[2], ...]
-       - 'regularization_term'
-
-    """
-
-    def attributeNames(self):
-        return ["parameters","b","W","L1_regularizer","L2_regularizer","neuron_sparsity_regularizer","regularization_term"]
-
-    def useInputAttributes(self):
-        return ["b","W"]
-
-    def useOutputAttributes(self):
-        return []
+    def debug_updateMinibatch(self,minibatch):
+        # make sure all required fields are allocated and initialized
+        self.allocate(minibatch)
+        input_attributes = self.names2attributes(self.updateMinibatchInputAttributes())
+        input_fields = minibatch(*self.updateMinibatchInputFields())
+        print 'input attributes', input_attributes
+        print 'input fields', input_fields
+        results = self.update_minibatch_function(*(input_attributes+input_fields))
+        print 'output attributes', self.updateMinibatchOutputAttributes()
+        print 'results', results
+        self.setAttributes(self.updateMinibatchOutputAttributes(),
+                           results)

-    def updateInputAttributes(self):
-        return ["b","W","L1_regularizer","L2_regularizer","neuron_sparsity_regularizer"]
-
-    def updateMinibatchInputFields(self):
-        return ["input","target"]
-
-    def updateMinibatchInputAttributes(self):
-        return ["b","W"]
-
-    def updateMinibatchOutputAttributes(self):
-        return ["new_XtX","new_XtY"]
-
-    def updateEndInputAttributes(self):
-        return ["theta","XtX","XtY"]
-
-    def updateEndOutputAttributes(self):
-        return ["new_theta","b","W","regularization_term"] # CHECK: WILL b AND W CONTAIN OLD OR NEW THETA? @todo i.e. order of computation = ?
+        if 0:
+            print 'n0', self.names2OpResults(self.updateMinibatchOutputAttributes()+ self.updateMinibatchInputFields())
+            print 'n1', self.names2OpResults(self.updateMinibatchOutputAttributes())
+            print 'n2', self.names2OpResults(self.updateEndInputAttributes())
+            print 'n3', self.names2OpResults(self.updateEndOutputAttributes())

-    def parameterAttributes(self):
-        return ["b","W"]
-
-    def defaultOutputFields(self, input_fields):
-        output_fields = ["output"]
-        if "target" in input_fields:
-            output_fields.append("squared_error")
-        return output_fields
-
-    def __init__(self):
-        self._input = t.matrix('input') # n_examples x n_inputs
-        self._target = t.matrix('target') # n_examples x n_outputs
-        self._L2_regularizer = t.scalar('L2_regularizer')
-        self._theta = t.matrix('theta')
-        self._W = self._theta[:,1:]
-        self._b = self._theta[:,0]
-        self._XtX = t.matrix('XtX')
-        self._XtY = t.matrix('XtY')
-        self._extended_input = t.prepend_one_to_each_row(self._input)
-        self._output = t.dot(self._input,self._W.T) + self._b  # (n_examples , n_outputs) matrix
-        self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector
-        self._regularizer = self._L2_regularizer * t.dot(self._W,self._W)
-        self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input))
-        self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target))
-        self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY)
-
-        OneShotTLearner.__init__(self)
-
-    def allocate(self,minibatch):
-        minibatch_n_inputs  = minibatch["input"].shape[1]
-        minibatch_n_outputs = minibatch["target"].shape[1]
-        if not self._n_inputs:
-            self._n_inputs = minibatch_n_inputs
-            self._n_outputs = minibatch_n_outputs
-            self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs))
-            self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs))
-            self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs))
-            self.forget()
-        elif self._n_inputs!=minibatch_n_inputs or self._n_outputs!=minibatch_n_outputs:
-            # if the input or target changes dimension on the fly, we resize and forget everything
-            self.forget()
-
-    def forget(self):
-        if self._n_inputs and self._n_outputs:
-            self.XtX.resize((1+self.n_inputs,1+self.n_inputs))
-            self.XtY.resize((1+self.n_inputs,self.n_outputs))
-            self.XtX.data[:,:]=0
-            self.XtY.data[:,:]=0
-            numpy.diag(self.XtX.data)[1:]=self.L2_regularizer
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mlp_factory_approach.py	Thu May 15 15:21:00 2008 -0400
@@ -0,0 +1,172 @@
+import copy, sys
+import numpy
+
+import theano
+from theano import tensor as t
+
+from tlearn import dataset, nnet_ops, stopper
+
+def _randshape(*shape):
+    return (numpy.random.rand(*shape) -0.5) * 0.001
+
+def _cache(d, key, valfn):
+    #valfn() is only evaluated if key isn't in dictionary d
+    if key not in d:
+        d[key] = valfn()
+    return d[key]
+
+class _Model(object):
+    def __init__(self, algo, params):
+        self.algo = algo
+        self.params = params
+        v = algo.v
+        self.update_fn = algo._fn([v.input, v.target] + v.params, [v.nll] + v.new_params)
+        self._fn_cache = {}
+
+    def __copy__(self):
+        return _Model(self.algo, [copy.copy(p) for p in params])
+
+    def update(self, input_target):
+        """Update this model from more training data."""
+        params = self.params
+        #TODO: why should we have to unpack target like this?
+        for input, target in input_target:
+            self.update_fn(input, target[:,0], *params)
+
+    def __call__(self, testset, fieldnames=['output_class']):
+        """Apply this model (as a function) to new data"""
+        #TODO: cache fn between calls
+        assert 'input' == testset.fieldNames()[0]
+        assert len(testset.fieldNames()) <= 2
+        v = self.algo.v
+        outputs = [getattr(v, name) for name in fieldnames]
+        inputs = [v.input] + ([v.target] if 'target' in testset else [])
+        inputs.extend(v.params)
+        theano_fn = _cache(self._fn_cache, (tuple(inputs), tuple(outputs)),
+                lambda: self.algo._fn(inputs, outputs))
+        lambda_fn = lambda *args: theano_fn(*(list(args) + self.params))
+        return dataset.ApplyFunctionDataSet(testset, lambda_fn, fieldnames)
+
+class AutonameVars(object):
+    def __init__(self, dct):
+        for key, val in dct.items():
+            if type(key) is str and hasattr(val, 'name'):
+                val.name = key
+        self.__dict__.update(dct)
+
+class MultiLayerPerceptron(object):
+
+    def __init__(self, ninputs, nhid, nclass, lr,
+            l2coef=0.0,
+            linker='c&py',
+            hidden_layer=None,
+            early_stopper=None,
+            validation_portion=0.2,
+            V_extern=None):
+        class V_intern(AutonameVars):
+            def __init__(v_self, lr, l2coef, **kwargs):
+                lr = t.constant(lr)
+                l2coef = t.constant(l2coef)
+                input = t.matrix() # n_examples x n_inputs
+                target = t.ivector() # len: n_examples
+                W2, b2 = t.matrix(), t.vector()
+
+                if hidden_layer:
+                    hid, hid_params, hid_ivals, hid_regularization = hidden_layer(input)
+                else:
+                    W1, b1 = t.matrix(), t.vector()
+                    hid = t.tanh(b1 + t.dot(input, W1))
+                    hid_params = [W1, b1]
+                    hid_regularization = l2coef * t.sum(W1*W1)
+                    hid_ivals = lambda : [_randshape(ninputs, nhid), _randshape(nhid)]
+
+                params = [W2, b2] + hid_params
+                activations = b2 + t.dot(hid, W2)
+                nll, predictions = nnet_ops.crossentropy_softmax_1hot(activations, target)
+                regularization = l2coef * t.sum(W2*W2) + hid_regularization
+                output_class = t.argmax(activations,1)
+                loss_01 = t.neq(output_class, target)
+                g_params = t.grad(nll + regularization, params)
+                new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)]
+                self.__dict__.update(locals()); del self.self
+                AutonameVars.__init__(v_self, locals())
+        self.nhid = nhid
+        self.nclass = nclass
+        self.v = V_intern(**locals()) if V_extern is None else V_extern(**locals())
+        self.linker = linker
+        self.early_stopper = early_stopper if early_stopper is not None else lambda: stopper.NStages(10,1)
+        self.validation_portion = validation_portion
+
+    def _fn(self, inputs, outputs):
+        # Caching here would hamper multi-threaded apps
+        # prefer caching in _Model.__call__
+        return theano.function(inputs, outputs, unpack_single=False, linker=self.linker)
+
+    def __call__(self, trainset=None, iparams=None):
+        """Allocate and optionally train a model"""
+        if iparams is None:
+            iparams = [_randshape(self.nhid, self.nclass), _randshape(self.nclass)]\
+                    + self.v.hid_ivals()
+        rval = _Model(self, iparams)
+        if trainset:
+            if len(trainset) == sys.maxint:
+                raise NotImplementedError('Learning from infinite streams is not supported')
+            nval = int(self.validation_portion * len(trainset))
+            nmin = len(trainset) - nval
+            assert nmin >= 0
+            minset = trainset[:nmin] #real training set for minimizing loss
+            valset = trainset[nmin:] #validation set for early stopping
+            best = rval
+            for stp in self.early_stopper():
+                rval.update(
+                    trainset.minibatches(['input', 'target'], minibatch_size=min(32,
+                        len(trainset))))
+                if stp.set_score:
+                    stp.score = rval(valset, ['loss_01'])
+                    if (stp.score < stp.best_score):
+                        best = copy.copy(rval)
+            rval = best
+        return rval
+
+
+import unittest
+
+class TestMLP(unittest.TestCase):
+    def test0(self):
+
+        training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
+                                                         [0, 1, 1],
+                                                         [1, 0, 1],
+                                                         [1, 1, 1]]),
+                                            {'input':slice(2),'target':2})
+        training_set2 = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
+                                                         [0, 1, 1],
+                                                         [1, 0, 0],
+                                                         [1, 1, 1]]),
+                                            {'input':slice(2),'target':2})
+        test_data = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
+                                                         [0, 1, 1],
+                                                         [1, 0, 0],
+                                                         [1, 1, 1]]),
+                                            {'input':slice(2)})
+
+        learn_algo = MultiLayerPerceptron(2, 10, 2, .1
+                , linker='c&py'
+                , early_stopper = lambda:stopper.NStages(100,1))
+
+        model1 = learn_algo(training_set1)
+
+        model2 = learn_algo(training_set2)
+
+        n_match = 0
+        for o1, o2 in zip(model1(test_data), model2(test_data)):
+            #print o1
+            #print o2
+            n_match += (o1 == o2)
+
+        assert n_match ==  (numpy.sum(training_set1.fields()['target'] ==
+                training_set2.fields()['target']))
+
+if __name__ == '__main__':
+    unittest.main()
+
--- a/nnet_ops.py	Thu May 15 13:10:21 2008 -0400
+++ b/nnet_ops.py	Thu May 15 15:21:00 2008 -0400
@@ -20,15 +20,15 @@
     def grad(self, (x,), (gz,)):
         y = scalar_sigmoid(x)
         return [gz * y * (1.0 - y)]
-    def c_code(self, (x,), (z,), sub):
-        if 'float' in self.inputs[0].dtype:
+    def c_code(self, node, name, (x,), (z,), sub):
+        if node.inputs[0].type in [scalar.float32, scalar.float64]:
             return """%(z)s =
                 %(x)s < -30.0
                 ? 0.0
                 : %(x)s > 30.0
                    ? 1.0
                    : 1.0 /(1.0+exp(-%(x)s));""" % locals()
-        return NotImplemented#Error('only floatingpoint is implemented')
+        raise NotImplementedError('only floatingpoint is implemented')
 scalar_sigmoid = ScalarSigmoid(scalar.upgrade_to_float, name='scalar_sigmoid')
 sigmoid = tensor.Elemwise(scalar_sigmoid, name='sigmoid')

@@ -44,15 +44,15 @@
         return ScalarSoftplus.static_impl(x)
     def grad(self, (x,), (gz,)):
         return [gz * scalar_sigmoid(x)]
-    def c_code(self, (x,), (z,), sub):
-        if 'float' in self.inputs[0].dtype:
+    def c_code(self, name, node, (x,), (z,), sub):
+        if node.inputs[0].type in [scalar.float32, scalar.float64]:
             return """%(z)s =
                 %(x)s < -30.0
                 ? 0.0
                 : %(x)s > 30.0
                    ? %(x)s
                    : log1p(exp(%(x)s));""" % locals()
-        return NotImplemented#Error('only floating point x is implemented')
+        raise NotImplementedError('only floating point x is implemented')
 scalar_softplus = ScalarSoftplus(scalar.upgrade_to_float, name='scalar_softplus')
 softplus = tensor.Elemwise(scalar_softplus, name='softplus')

@@ -101,7 +101,7 @@
                 or x.type.dtype not in ['float32', 'float64']:
             raise ValueError('b must be 1-d tensor of floats')
         if y_idx.type.ndim != 1 \
-                or y_idx.type.dtype not in ['int32', 'int64']:
+                or y_idx.type.dtype not in ['int8', 'int16', 'int32', 'int64']:
             raise ValueError('y_idx must be 1-d tensor of ints')

 #       TODO: Is this correct? It used to be y, not y_idx
@@ -109,7 +109,7 @@
                 y_idx.type.broadcastable).make_result()
 #        nll = Tensor(x.dtype, y.broadcastable)
         sm = x.type.make_result()
-        return theano.Apply(self, [x, b, y_idx],[nll, sm])
+        return theano.Apply(self, [x, b, y_idx], [nll, sm])
     def perform(self, node, input_storage, output_storage):
         x, b, y_idx = input_storage
         if b.shape[0] != x.shape[1]:
@@ -135,7 +135,7 @@
         return dx, db, None

     def c_headers(self): return ['<iostream>']
-    def c_code(self,  (x, b, y_idx), (nll, sm), sub):
+    def c_code(self, node, name, (x, b, y_idx), (nll, sm), sub):
         # this implementation was lifted from
         # /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx

@@ -144,6 +144,9 @@

         #TODO: set error messages for failures in this code

+        #TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
+        y_idx_type = node.inputs[2].type.dtype_specs()[1]
+
         return """
         npy_intp* Nx = %(x)s->dimensions;

@@ -172,9 +175,12 @@
             PyErr_SetString(PyExc_TypeError, "b not float64");
             %(fail)s;
         }
-        if (%(y_idx)s->descr->type_num != PyArray_INT64)
+        if ((%(y_idx)s->descr->type_num != PyArray_INT64)
+            && (%(y_idx)s->descr->type_num != PyArray_INT32)
+            && (%(y_idx)s->descr->type_num != PyArray_INT16)
+            && (%(y_idx)s->descr->type_num != PyArray_INT8))
         {
-            PyErr_SetString(PyExc_TypeError, "y_idx not int64");
+            PyErr_SetString(PyExc_TypeError, "y_idx not int8, int16, int32, or int64");
             %(fail)s;
         }
         if ((%(x)s->dimensions[1] != %(b)s->dimensions[0])
@@ -217,7 +223,7 @@

             const double* __restrict__ x_i = (double*)(%(x)s->data + %(x)s->strides[0] * i);
             const double* __restrict__ b_i = (double*)(%(b)s->data);
-            const long int y_i = ((long int*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
+            const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
             double* __restrict__ sm_i = (double*)(%(sm)s->data + %(sm)s->strides[0] * i);
             double* __restrict__ nll_i = (double*)(%(nll)s->data + %(nll)s->strides[0] * i);

@@ -302,16 +308,25 @@
         output_storage[0][0] = dx
     def grad(self, *args):
         raise NotImplementedError()
-    def c_code(self,  (dnll, sm, y_idx), (dx,), sub):
+    def c_code(self, node, name, (dnll, sm, y_idx), (dx,), sub):
+        y_idx_type = node.inputs[2].type.dtype_specs()[1]
         return """

         if ((%(dnll)s->descr->type_num != PyArray_DOUBLE)
             || (%(sm)s->descr->type_num != PyArray_DOUBLE)
-            || (%(y_idx)s->descr->type_num != PyArray_INT64))
+            )
         {
             PyErr_SetString(PyExc_TypeError, "types should be float64, float64, int64");
             %(fail)s;
         }
+        if ((%(y_idx)s->descr->type_num != PyArray_INT64)
+            && (%(y_idx)s->descr->type_num != PyArray_INT32)
+            && (%(y_idx)s->descr->type_num != PyArray_INT16)
+            && (%(y_idx)s->descr->type_num != PyArray_INT8))
+        {
+            PyErr_SetString(PyExc_TypeError, "y_idx not int8, int16, int32, or int64");
+            %(fail)s;
+        }
         if ((%(dnll)s->nd != 1)
             || (%(sm)s->nd != 2)
             || (%(y_idx)s->nd != 1))
@@ -341,7 +356,7 @@
         {
             const double dnll_i = ((double*)(%(dnll)s->data + %(dnll)s->strides[0] * i))[0];

-            const long int y_i = ((long int*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
+            const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];

             const double* __restrict__ sm_i = (double*)(%(sm)s->data + %(sm)s->strides[0] * i);
             npy_intp Ssm = %(sm)s->strides[1]/sizeof(double);
--- a/statscollector.py	Thu May 15 13:10:21 2008 -0400
+++ b/statscollector.py	Thu May 15 15:21:00 2008 -0400
@@ -1,34 +1,118 @@

-from numpy import *
+# Here is how I see stats collectors:

-class StatsCollector(object):
-    """A StatsCollector object is used to record performance statistics during training
-    or testing of a learner. It can be configured to measure different things and
-    accumulate the appropriate statistics. From these statistics it can be interrogated
-    to obtain performance measures of interest (such as maxima, minima, mean, standard
-    deviation, standard error, etc.). Optionally, the observations can be weighted
-    (yielded weighted mean, weighted variance, etc., where applicable). The statistics
-    that are desired can be specified among a list supported by the StatsCollector
-    class or subclass. When some statistics are requested, others become automatically
-    available (e.g., sum or mean)."""
+#    def my_stats((residue,nll),(regularizer)):
+#            mse=examplewise_mean(square_norm(residue))
+# 	         training_loss=regularizer+examplewise_sum(nll)
+#            set_names(locals())
+#            return ((residue,nll),(regularizer),(),(mse,training_loss))
+#    my_stats_collector = make_stats_collector(my_stats)
+#
+# where make_stats_collector calls my_stats(examplewise_fields, attributes) to
+# construct its update function, and figure out what are the input fields (here "residue"
+# and "nll") and input attributes (here "regularizer") it needs, and the output
+# attributes that it computes (here "mse" and "training_loss"). Remember that
+# fields are examplewise quantities, but attributes are not, in my jargon.
+# In the above example, I am highlighting that some operations done in my_stats
+# are examplewise and some are not.  I am hoping that theano Ops can do these
+# kinds of internal side-effect operations (and proper initialization of these hidden
+# variables). I expect that a StatsCollector (returned by make_stats_collector)
+# knows the following methods:
+#     stats_collector.input_fieldnames
+#     stats_collector.input_attribute_names
+#     stats_collector.output_attribute_names
+#     stats_collector.update(mini_dataset)
+#     stats_collector['mse']
+# where mini_dataset has the input_fieldnames() as fields and the input_attribute_names()
+# as attributes, and in the resulting dataset the output_attribute_names() are set to the
+# proper numeric values.

-    default_statistics = [mean,standard_deviation,min,max]
+
+
+import theano
+from theano import tensor as t
+from Learner import Learner
+from lookup_list import LookupList
+
+class StatsCollectorModel(AttributesHolder):
+    def __init__(self,stats_collector):
+        self.stats_collector = stats_collector
+        self.outputs = LookupList(stats_collector.output_names,[None for name in stats_collector.output_names])
+        # the statistics get initialized here
+        self.update_function = theano.function(input_attributes+input_fields,output_attributes+output_fields,linker="c|py")
+        for name,value in self.outputs.items():
+            self.__setattribute__(name,value)
+    def update(self,dataset):
+        input_fields = dataset.fields()(self.stats_collector.input_field_names)
+        input_attributes = dataset.getAttributes(self.stats_collector.input_attribute_names)
+        self.outputs._values = self.update_function(input_attributes+input_fields)
+        for name,value in self.outputs.items():
+            self.__setattribute__(name,value)
+    def __call__(self):
+        return self.outputs
+    def attributeNames(self):
+        return self.outputs.keys()

-    __init__(self,n_quantities_observed, statistics=default_statistics):
-        self.n_quantities_observed=n_quantities_observed
+class StatsCollector(AttributesHolder):
+
+    def __init__(self,input_attributes, input_fields, outputs):
+        self.input_attributes = input_attributes
+        self.input_fields = input_fields
+        self.outputs = outputs
+        self.input_attribute_names = [v.name for v in input_attributes]
+        self.input_field_names = [v.name for v in input_fields]
+        self.output_names = [v.name for v in output_attributes]
+
+    def __call__(self,dataset=None):
+        model = StatsCollectorModel(self)
+        if dataset:
+            self.update(dataset)
+        return model

-    clear(self):
-        raise NotImplementedError
+if __name__ == '__main__':
+    def my_statscollector():
+        regularizer = t.scalar()
+        nll = t.matrix()
+        class_error = t.matrix()
+        total_loss = regularizer+t.examplewise_sum(nll)
+        avg_nll = t.examplewise_mean(nll)
+        avg_class_error = t.examplewise_mean(class_error)
+        for name,val in locals(): val.name = name
+        return StatsCollector([regularizer],[nll,class_error],[total_loss,avg_nll,avg_class_error])
+
+
+

-    update(self,observations):
-        """The observations is a numpy vector of length n_quantities_observed. Some
-        entries can be 'missing' (with a NaN entry) and will not be counted in the
-        statistics."""
-        raise NotImplementedError
-
-    __getattr__(self, statistic)
-        """Return a particular statistic, which may be inferred from the collected statistics.
-        The argument is a string naming that statistic."""
+# OLD DESIGN:
+#
+# class StatsCollector(object):
+#     """A StatsCollector object is used to record performance statistics during training
+#     or testing of a learner. It can be configured to measure different things and
+#     accumulate the appropriate statistics. From these statistics it can be interrogated
+#     to obtain performance measures of interest (such as maxima, minima, mean, standard
+#     deviation, standard error, etc.). Optionally, the observations can be weighted
+#     (yielded weighted mean, weighted variance, etc., where applicable). The statistics
+#     that are desired can be specified among a list supported by the StatsCollector
+#     class or subclass. When some statistics are requested, others become automatically
+#     available (e.g., sum or mean)."""
+#
+#     default_statistics = [mean,standard_deviation,min,max]
+#
+#     __init__(self,n_quantities_observed, statistics=default_statistics):
+#         self.n_quantities_observed=n_quantities_observed
+#
+#     clear(self):
+#         raise NotImplementedError
+#
+#     update(self,observations):
+#         """The observations is a numpy vector of length n_quantities_observed. Some
+#         entries can be 'missing' (with a NaN entry) and will not be counted in the
+#         statistics."""
+#         raise NotImplementedError
+#
+#     __getattr__(self, statistic)
+#         """Return a particular statistic, which may be inferred from the collected statistics.
+#         The argument is a string naming that statistic."""
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/stopper.py	Thu May 15 15:21:00 2008 -0400
@@ -0,0 +1,108 @@
+"""Early stopping iterators
+
+The idea here is to supply early-stopping heuristics that can be used in the
+form:
+
+    stopper = SomeEarlyStopper()
+
+    for i in stopper():
+        # train from data
+        if i.set_score:
+            i.score = validation_score
+
+
+So far I only have one heuristic, so maybe this won't scale.
+"""
+
+class Stopper(object):
+
+    def train(self, data, update_rows_fn, update, validate, save=None):
+        """Return the best model trained on data
+
+        Parameters:
+        data - a thing that accepts getitem(<list of int64>), or a tuple of such things
+        update_rows_fn - fn : int --> <list or tensor of int>
+        update - fn: update an internal model from elements of data
+        validate - fn: evaluate an internal model based on elements of data
+        save - fn: return a copy of the internal model
+
+        The body of this function exhausts the <self> iterator, and trains a
+        model using early stopping in the process.
+        """
+
+        best = None
+        for stp in self:
+            i = stp.iter
+
+            # call update on some training set rows
+            t_rows = update_rows_fn(i)
+            if isinstance(data, (tuple, list)):
+                update(*[d[t_rows] for d in data])
+            else:
+                update(data[t_rows])
+
+            if stp.set_score:
+                stp.score = validate()
+                if (stp.score < stp.best_score) and save:
+                    best = save()
+        return best
+
+
+class ICML08Stopper(Stopper):
+    @staticmethod
+    def icml08(ntrain, batchsize):
+        """Some setting similar to what I used for ICML08 submission"""
+        #TODO: what did I actually use? put that in here.
+        return ICML08Stopper(30*ntrain/batchsize,
+                ntrain/batchsize, 0.96, 2.0, 100000000)
+
+    def __init__(self, i_wait, v_int, min_improvement, patience, hard_limit):
+        self.initial_wait = i_wait
+        self.set_score_interval = v_int
+        self.min_improvement = min_improvement
+        self.patience = patience
+        self.hard_limit = hard_limit
+
+        self.best_score = float('inf')
+        self.best_iter = -1
+        self.iter = -1
+
+        self.set_score = False
+        self.score = None
+
+    def __iter__(self):
+        return self
+
+    E_set_score = 'when iter.set_score is True, caller must assign a score to iter.score'
+    def next(self):
+        if self.set_score: #left over from last time
+            if self.score is None:
+                raise Exception(ICML08Stopper.E_set_score)
+            if self.score < (self.best_score * self.min_improvement):
+                (self.best_score, self.best_iter) = (self.score, self.iter)
+            self.score = None #un-set it
+
+
+        starting = self.iter < self.initial_wait
+        waiting = self.iter < (self.patience * self.best_iter)
+        if starting or waiting:
+            # continue to iterate
+            self.iter += 1
+            if self.iter == self.hard_limit:
+                raise StopIteration
+            self.set_score = (self.iter % self.set_score_interval == 0)
+            return self
+
+        raise StopIteration
+
+
+class NStages(ICML08Stopper):
+    """Run for a fixed number of steps, checking validation set every so
+    often."""
+    def __init__(self, hard_limit, v_int):
+        ICML08Stopper.__init__(self, hard_limit, v_int, 1.0, 1.0, hard_limit)
+
+    #TODO: could optimize next() function. Most of what's in ICML08Stopper.next()
+    #is not necessary
+
+
--- a/test_mlp.py	Thu May 15 13:10:21 2008 -0400
+++ b/test_mlp.py	Thu May 15 15:21:00 2008 -0400
@@ -1,9 +1,58 @@

 from mlp import *
 import dataset
+import nnet_ops
+
+
+from functools import partial
+def separator(debugger, i, node, *ths):
+    print "==================="
+
+def what(debugger, i, node, *ths):
+    print "#%i" % i, node
+
+def parents(debugger, i, node, *ths):
+    print [input.step for input in node.inputs]
+
+def input_shapes(debugger, i, node, *ths):
+    print "input shapes: ",
+    for r in node.inputs:
+        if hasattr(r.value, 'shape'):
+            print r.value.shape,
+        else:
+            print "no_shape",
+    print
+
+def input_types(debugger, i, node, *ths):
+    print "input types: ",
+    for r in node.inputs:
+        print r.type,
+    print
+
+def output_shapes(debugger, i, node, *ths):
+    print "output shapes:",
+    for r in node.outputs:
+        if hasattr(r.value, 'shape'):
+            print r.value.shape,
+        else:
+            print "no_shape",
+    print
+
+def output_types(debugger, i, node, *ths):
+    print "output types:",
+    for r in node.outputs:
+        print r.type,
+    print
+

 def test0():
-    nnet = OneHiddenLayerNNetClassifier(10,3,.1,1000)
+    linker = 'c|py'
+    #linker = partial(theano.gof.DebugLinker, linkers = [theano.gof.OpWiseCLinker],
+    #                 debug_pre = [separator, what, parents, input_types, input_shapes],
+    #                 debug_post = [output_shapes, output_types],
+    #                 compare_fn = lambda x, y: numpy.all(x == y))
+
+    nnet = OneHiddenLayerNNetClassifier(10,2,.001,1000, linker = linker)
     training_set = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
                                                      [0, 1, 1],
                                                      [1, 0, 1],
@@ -16,5 +65,68 @@
     for fieldname in output_ds.fieldNames():
         print fieldname+"=",output_ds[fieldname]

-test0()
+def test1():
+    nnet = ManualNNet(2, 10,3,.1,1000)
+    training_set = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
+                                                     [0, 1, 1],
+                                                     [1, 0, 1],
+                                                     [1, 1, 1]]),
+                                        {'input':slice(2),'target':2})
+    fprop=nnet(training_set)
+
+    output_ds = fprop(training_set)
+
+    for fieldname in output_ds.fieldNames():
+        print fieldname+"=",output_ds[fieldname]
+
+def test2():
+    training_set = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
+                                                     [0, 1, 1],
+                                                     [1, 0, 1],
+                                                     [1, 1, 1]]),
+                                        {'input':slice(2),'target':2})
+    nin, nhid=2, 10
+    def sigm_layer(input):
+        W1 = t.matrix('W1')
+        b1 = t.vector('b1')
+        return (nnet_ops.sigmoid(b1 + t.dot(input, W1)),
+                [W1, b1],
+                [(numpy.random.rand(nin, nhid) -0.5) * 0.001, numpy.zeros(nhid)])
+    nnet = ManualNNet(nin, nhid, 3, .1, 1000, hidden_layer=sigm_layer)
+    fprop=nnet(training_set)
+
+    output_ds = fprop(training_set)

+    for fieldname in output_ds.fieldNames():
+        print fieldname+"=",output_ds[fieldname]
+
+def test_interface_0():
+    learner = ManualNNet(2, 10, 3, .1, 1000)
+
+    model = learner(training_set)
+
+    model2 = learner(training_set)    # trains model a second time
+
+    learner.update(additional_data)   # modifies nnet and model by side-effect
+
+
+def test_interface2_1():
+    learn_algo = ManualNNet(2, 10, 3, .1, 1000)
+
+    prior = learn_algo()
+
+    model1 = learn_algo(training_set1)
+
+    model2 = learn_algo(training_set2)
+
+    model2.update(additional_data)
+
+    n_match = 0
+    for o1, o2 in zip(model1.use(test_data), model2.use(test_data)):
+        n_match += (o1 == o2)
+
+    print n_match
+
+test1()
+test2()
+