changeset 563:16f91ca016b1

* added NStages as a stopper (moved from hpu/conv) * added a argmax_standalone output to logistic_regression which is independent of the targets, which was needed to compute an output independently of the target * fixed some import discrepancies between pylearn and pylearn_refactor (mostly for datasets) * added testDataset which generates sequential or random data for a given shape
author desjagui@atchoum.iro.umontreal.ca
date Wed, 03 Dec 2008 17:21:05 -0500
parents 96221aa02fcb
children e878003c3009
files pylearn/algorithms/logistic_regression.py pylearn/algorithms/stopper.py pylearn/datasets/MNIST.py pylearn/datasets/shapeset1.py pylearn/datasets/smallNorb.py pylearn/datasets/testDataset.py
diffstat 6 files changed, 60 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/algorithms/logistic_regression.py	Mon Dec 01 16:16:21 2008 -0500
+++ b/pylearn/algorithms/logistic_regression.py	Wed Dec 03 17:21:05 2008 -0500
@@ -40,11 +40,15 @@
         #here we actually build the model
         self.linear_output = T.dot(self.input, self.w) + self.b
         if 0:
+            # TODO: pending support for target being a sparse matrix
             self.softmax = nnet.softmax(self.linear_output)
 
             self._max_pr, self.argmax = T.max_and_argmax(self.linear_output)
             self._xent = self.target * T.log(self.softmax)
         else:
+            # TODO: when above is fixed, remove this hack (need an argmax
+            # which is independent of targets)
+            self.argmax_standalone = T.argmax(self.linear_output);
             (self._xent, self.softmax, self._max_pr, self.argmax) =\
                     nnet.crossentropy_softmax_max_and_argmax_1hot(
                     self.linear_output, self.target)
--- a/pylearn/algorithms/stopper.py	Mon Dec 01 16:16:21 2008 -0500
+++ b/pylearn/algorithms/stopper.py	Wed Dec 03 17:21:05 2008 -0500
@@ -122,6 +122,16 @@
 
         raise StopIteration
 
+class NStages(ICML08Stopper):
+    """Run for a fixed number of steps, checking validation set every so
+    often."""
+    def __init__(self, hard_limit, v_int):
+        ICML08Stopper.__init__(self, hard_limit, v_int, 1.0, 1.0, hard_limit)
+
+    #TODO: could optimize next() function. Most of what's in ICML08Stopper.next()
+    #is not necessary
+
+
 @stopper_factory('icml08')
 def icml08_stopper(i_wait, v_int, min_improvement, patience, hard_limit):
     return ICML08Stopper(i_wait, v_int, min_improvement, patience, hard_limit)
--- a/pylearn/datasets/MNIST.py	Mon Dec 01 16:16:21 2008 -0500
+++ b/pylearn/datasets/MNIST.py	Wed Dec 03 17:21:05 2008 -0500
@@ -46,6 +46,7 @@
             y=all_targ[ntrain+nvalid:ntrain+nvalid+ntest])
 
     rval.n_classes = 10
+    rval.img_shape = (28,28)
     return rval
 
 
--- a/pylearn/datasets/shapeset1.py	Mon Dec 01 16:16:21 2008 -0500
+++ b/pylearn/datasets/shapeset1.py	Wed Dec 03 17:21:05 2008 -0500
@@ -7,7 +7,7 @@
 import os
 import numpy
 
-from ..amat import AMat
+from ..io.amat import AMat
 from .config import data_root
 
 def _head(path, n):
--- a/pylearn/datasets/smallNorb.py	Mon Dec 01 16:16:21 2008 -0500
+++ b/pylearn/datasets/smallNorb.py	Wed Dec 03 17:21:05 2008 -0500
@@ -1,6 +1,6 @@
 import os
 import numpy
-from ..filetensor import read
+from ..io.filetensor import read
 from .config import data_root
 
 #Path = '/u/bergstrj/pub/data/smallnorb'
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/datasets/testDataset.py	Wed Dec 03 17:21:05 2008 -0500
@@ -0,0 +1,43 @@
+"""
+Various routines to load/access MNIST data.
+"""
+from __future__ import absolute_import
+
+import os
+import numpy
+
+from ..io.amat import AMat
+from .config import data_root
+from .dataset import dataset_factory, Dataset
+
+VALSEQ, VALRAND = range(2)
+
+@dataset_factory('DEBUG')
+def mnist_factory(variant='', ntrain=10, nvalid=10, ntest=10, \
+        nclass=2, ndim=1, dshape=None, valtype=VALSEQ):
+
+    temp = []
+    [temp.append(5) for i in range(ndim)]
+    dshape = temp if dshape is None else dshape
+
+    rval = Dataset()
+    rval.n_classes = nclass
+    rval.img_shape = dshape
+
+    dsize = numpy.prod(dshape);
+
+    print ntrain, nvalid, ntest, nclass, dshape, valtype
+
+    ntot = ntrain + nvalid + ntest
+    xdata = numpy.arange(ntot*numpy.prod(dshape)).reshape((ntot,dsize)) \
+            if valtype is VALSEQ else \
+            numpy.random.random((ntot,dsize));
+    ydata = numpy.round(numpy.random.random(ntot));
+
+    rval.train = Dataset.Obj(x=xdata[0:ntrain],y=ydata[0:ntrain])
+    rval.valid = Dataset.Obj(x=xdata[ntrain:ntrain+nvalid],\
+                             y=ydata[ntrain:ntrain+nvalid])
+    rval.test =  Dataset.Obj(x=xdata[ntrain+nvalid:ntrain+nvalid+ntest],
+                             y=ydata[ntrain+nvalid:ntrain+nvalid+ntest])
+
+    return rval