changeset 1334:6fd2610c1706

merge
author James Bergstra <bergstrj@iro.umontreal.ca>
date Mon, 18 Oct 2010 14:58:52 -0400
parents c7b2da4e2df6 (current diff) 837768915081 (diff)
children 7c51c0355d86
files pylearn/algorithms/tests/test_mcRBM.py
diffstat 6 files changed, 362 insertions(+), 65 deletions(-) [+]
line wrap: on
line diff
--- a/doc/v2_planning/API_coding_style.txt	Mon Oct 18 14:58:39 2010 -0400
+++ b/doc/v2_planning/API_coding_style.txt	Mon Oct 18 14:58:52 2010 -0400
@@ -655,6 +655,44 @@
 
 .. _Wiki page: http://www.iro.umontreal.ca/~lisa/twiki/bin/view.cgi/Divers/VimPythonRecommendations
 
+Commit message
+==============
+
+    * A one line summary. Try to keep it short, and provide the information
+      that seems most useful to other developers: in particular the goal of
+      a change is more useful than its description (which is always
+      available through the changeset patch log). E.g. say "Improved stability
+      of cost computation" rather than "Replaced log(exp(a) + exp(b)) by
+      a * log(1 + exp(b -a)) in cost computation".
+    * If needed a blank line followed by a more detailed summary
+    * Make a commit for each logical modification
+        * This makes reviews easier to do
+        * This makes debugging easier as we can more easily pinpoint errors in 
+	  commits with hg bisect
+    * NEVER commit reformatting with functionality changes
+    * Review your change before commiting
+        * "hg diff <files>..." to see the diff you have done
+        * "hg record" allows you to select which changes to a file should be
+          committed. To enable it, put into the file ~/.hgrc:
+
+          .. code-block:: bash
+
+              [extensions]
+              hgext.record=
+
+        * hg record / diff force you to review your code, never commit without
+          running one of these two commands first
+    * Write detailed commit messages in the past tense, not present tense.
+        * Good: "Fixed Unicode bug in RSS API."
+        * Bad: "Fixes Unicode bug in RSS API."
+        * Bad: "Fixing Unicode bug in RSS API."
+    * Separate bug fixes from feature changes.
+    * When fixing a ticket, start the message with "Fixed #abc"
+        * Can make a system to change the ticket?
+    * When referencing a ticket, start the message with "Refs #abc"
+        * Can make a system to put a comment to the ticket?
+
+
 TODO
 ====
 
--- a/pylearn/algorithms/tests/test_mcRBM.py	Mon Oct 18 14:58:39 2010 -0400
+++ b/pylearn/algorithms/tests/test_mcRBM.py	Mon Oct 18 14:58:52 2010 -0400
@@ -519,3 +519,111 @@
         def checkpoint():
             return checkpoint
         run_classif_experiment(checkpoint=checkpoint)
+
+
+
+if 0: # TEST IDEA OUT HERE
+
+
+    class doc_db(dict):
+        # A key->document dictionary.
+        # A "document" is itself a dictionary.
+
+        # A "document" can be a small or large object, but it cannot be partially retrieved.
+
+        # This simple data structure is used in pylearn to cache intermediate reults between
+        # several process invocations.
+
+    class UNSPECIFIED(object): pass
+
+    class CtrlObj(object):
+
+        def get(self, key, default_val=UNSPECIFIED, copy=True):
+            # Default to return a COPY because a set() is required to make a change persistent.
+            # Inplace changes that the CtrlObj does not know about (via set) will not be saved.
+            pass
+
+        def get_key(self, val):
+            """Return the key that retrieved `val`.
+            
+            This is useful for specifying cache keys for unhashable (e.g. numpy) objects that
+            happen to be stored in the db.
+            """
+            # if 
+            # lookup whether val is an obj
+            pass
+        def set(self, key, val):
+            pass
+        def delete(self, key):
+            pass
+        def checkpoint(self):
+            pass
+
+        @staticmethod
+        def cache_pickle(pass_ctrl=False):
+            def decorator(f):
+                # cache rval using pickle mechanism
+                def rval(*args, **kwargs):
+                    pass
+                return rval
+            return decorator
+
+        @staticmethod
+        def cache_dict(pass_ctrl=False):
+            def decorator(f):
+                # cache rval dict directly
+                def rval(*args, **kwargs):
+                    pass
+                return rval
+            return decorator
+
+        @staticmethod(f):
+        def cache_numpy(pass_ctrl=False, memmap_thresh=100*1000*1000):
+            def decorator(f):
+                # cache rval dict directly
+                def rval(*args, **kwargs):
+                    pass
+                return rval
+            return decorator
+
+    @CtrlObj.cache_numpy()
+    def get_whitened_dataset(pca_parameters):
+        # do computations
+        return None
+
+    @CtrlObj.cache_pickle(pass_ctrl=True)
+    def train_mcRBM(data, lr, n_hid, ctrl):
+
+        rbm = 45
+        for i in 10000:
+            # do some training
+            rbm += 1
+            ctrl.checkpoint()
+        return rbm
+
+    def run_experiment(args):
+
+        ctrl_obj = CtrlObj.factory(args)
+        # Could use db, or filesystem, or both, etc.
+        # There would be generic ones, but the experimenter should be very aware of what is being
+        # cached where, when, and how.  This is how results are stored and retrieved after all.
+        # Cluster-friendly jobs should not use local files directly, but should store cached
+        # computations and results to such a database.
+        #  Different jobs should avoid using the same keys in the database because coordinating
+        #  writes is difficult, and conflicts will inevitably arise.
+
+        raw_data = get_raw_data(ctrl=ctrl)
+        raw_data_key = ctrl.get_key(raw_data)
+        pca = get_pca(raw_data, max_energy=.05, ctrl=ctrl, 
+                _ctrl_raw_data_key=raw_data_key)
+        whitened_data = get_whitened_dataset(pca_parameters, ctrl=ctrl,
+                _ctrl_data_key=raw_data_key)
+
+        rbm = train_mcRBM(
+                data=whitened_data,
+                lr=0.01,
+                n_hid=100,
+                ctrl=ctrl,
+                _ctrl_data_key=raw_data_key
+                )
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/datasets/caltech.py	Mon Oct 18 14:58:52 2010 -0400
@@ -0,0 +1,46 @@
+"""
+Various routines to load/access MNIST data.
+"""
+
+import os
+import numpy
+
+from pylearn.io.pmat import PMat
+from pylearn.datasets.config import data_root # config
+from pylearn.datasets.dataset import Dataset
+
+def caltech_silhouette():
+
+    rval = Dataset()
+
+    
+    path = os.path.join(data_root(), 'caltech_silhouettes')
+
+    rval.train = Dataset.Obj(x=numpy.load(os.path.join(path,'train_data.npy')),
+                             y=numpy.load(os.path.join(path,'train_labels.npy')))
+    rval.valid = Dataset.Obj(x=numpy.load(os.path.join(path,'val_data.npy')),
+                             y=numpy.load(os.path.join(path,'val_labels.npy')))
+    rval.test  = Dataset.Obj(x=numpy.load(os.path.join(path,'test_data.npy')),
+                             y=numpy.load(os.path.join(path,'test_labels.npy')))
+
+    rval.n_classes = 101
+    rval.img_shape = (28,28)
+
+    return rval
+
+def caltech_silhouette2():
+
+    rval = Dataset()
+
+    from scipy import io 
+    path = '/data/lisa6/desjagui/caltech101_silhouettes_28_split1.mat'
+
+    data = io.loadmat(open(path,'r'))
+
+    rval.train = Dataset.Obj(x=data['train_data'], y=data['train_labels'])
+    rval.valid = Dataset.Obj(x=data['val_data'],   y=data['val_labels'])
+    rval.test  = Dataset.Obj(x=data['test_data'],  y=data['test_labels'])
+    rval.n_classes = 101
+    rval.img_shape = (28,28)
+
+    return rval
--- a/pylearn/datasets/test_modes.py	Mon Oct 18 14:58:39 2010 -0400
+++ b/pylearn/datasets/test_modes.py	Mon Oct 18 14:58:52 2010 -0400
@@ -99,7 +99,8 @@
 
     def __init__(self, n_modes, img_shape, seed=238904, 
                  min_p=1e-4, max_p=1e-1,
-                 min_w=0., max_w=1.):
+                 min_w=0., max_w=1.,
+                 w = None, p = None):
 
         self.n_modes = n_modes
         self.img_shape = img_shape
@@ -107,9 +108,14 @@
         self.img_size = numpy.prod(img_shape)
 
         # generate random p, w values
-        self.p = min_p + self.rng.rand(n_modes) * (max_p - min_p)
-        w = min_w + self.rng.rand(n_modes) * (max_w - min_w)
+        if p is None:
+            p = min_p + self.rng.rand(n_modes) * (max_p - min_p)
+        self.p = p
+
+        if w is None:
+            w = min_w + self.rng.rand(n_modes) * (max_w - min_w)
         self.w = w / numpy.sum(w)
+
         self.sort_w_idx = numpy.argsort(self.w)
 
         self.modes = self.rng.randint(0,2,size=(n_modes,self.img_size))
--- a/pylearn/formulas/costs.py	Mon Oct 18 14:58:39 2010 -0400
+++ b/pylearn/formulas/costs.py	Mon Oct 18 14:58:52 2010 -0400
@@ -1,10 +1,23 @@
 """
-Common training criteria.
+
+TODO: make sur stabilization optimization are done.
+TODO: make test
+TODO: check that this work for nd tensor.
 """
+
+#"""
+#Common training criteria.
+#"""
 import theano
 import theano.tensor as T
+
 from tags import tags
 
+__authors__   = "Frederic Bastien, Nicolas Boulanger-Lewandowski, .."
+__copyright__ = "(c) 2010, Universite de Montreal"
+__license__   = "3-clause BSD License"
+__contact__   = "theano-user <theano-users@googlegroups.com>"
+
 @tags('cost','binary','cross-entropy')
 def binary_crossentropy(output, target):
     """ Compute the crossentropy of binary output wrt binary target.
@@ -16,66 +29,141 @@
     :param output: Binary output or prediction :math:`\in[0,1]`
     :type target: Theano variable
     :param target: Binary target usually :math:`\in\{0,1\}`
+
+    :note: no stabilization optimization needed for a generic output variable
     """
     return -(target * T.log(output) + (1.0 - target) * T.log(1.0 - output))
 
 
+@tags('cost','binary','cross-entropy', 'sigmoid')
+def sigmoid_crossentropy(output, target):
+    """ crossentropy of a sigmoid activation
+
+    .. math::
+                L_{CE} \equiv t\log(\sigma(a)) + (1-t)\log(1-\sigma(a))
+
+    :type output: Theano variable
+    :param output: Output before activation
+    :type target: Theano variable
+    :param target: Target
+
+    :note: no stabilization done. 
+    """
+    return target * (- T.log(1.0 + T.exp(-output))) + (1.0 - target) * (- T.log(1.0 + T.exp(output)))
+
+@tags('cost','binary','cross-entropy', 'tanh')
+def tanh_crossentropy(output, target):
+    """ crossentropy of a tanh activation
+
+    .. math::
+                L_{CE} \equiv t\log(\\frac{1+\\tanh(a)}2) + (1-t)\log(\\frac{1-\\tanh(a)}2)
+
+    :type output: Theano variable
+    :param output: Output before activation
+    :type target: Theano variable
+    :param target: Target
+
+    :note: no stabilization done. 
+    """
+    return sigmoid_crossentropy(2.0*output, target)
+
+@tags('cost','binary','cross-entropy', 'tanh', 'abs')
+def abstanh_crossentropy(output, target):
+    """ crossentropy of a absolute value tanh activation
+
+    .. math::
+                L_{CE} \equiv t\log(\\frac{1+\\tanh(|a|)}2) + (1-t)\log(\\frac{1-\\tanh(|a|)}2)
+
+    :type output: Theano variable
+    :param output: Output before activation
+    :type target: Theano variable
+    :param target: Target
+
+    :note: no stabilization done. 
+    """
+    return tanh_crossentropy(T.abs_(output), target)
+
+@tags('cost','binary','cross-entropy', 'tanh', 'normalized')
+def normtanh_crossentropy(output, target):
+    """ crossentropy of a "normalized" tanh activation (LeCun)
+
+    .. math::
+                L_{CE} \equiv t\log(\\frac{1+\\tanh(0.6666a)}2) + (1-t)\log(\\frac{1-\\tanh(0.6666a)}2)
+
+    :type output: Theano variable
+    :param output: Output before activation
+    :type target: Theano variable
+    :param target: Target
+
+    :note: no stabilization done. 
+    """
+    return tanh_crossentropy(0.6666*output, target)
+
+@tags('cost','binary','cross-entropy', 'tanh', 'normalized', 'abs')
+def absnormtanh_cross_entropy(output, target):
+    """ crossentropy of a "absolute normalized" tanh activation
+
+    .. math::
+                L_{CE} \equiv t\log(\\frac{1+\\tanh(0.6666*|a|)}2) + (1-t)\log(\\frac{1-\\tanh(0.6666*|a|)}2)
+
+    :type output: Theano variable
+    :param output: Output before activation
+    :type target: Theano variable
+    :param target: Target
+
+    :note: no stabilization done. 
+    """
+    return normtanh_crossentropy(T.abs_(output), target)
+
+def cross_entropy(output_act, output, target, act=None):
+    """ Execute the cross entropy with a sum on the last dimension and a mean on the first dimension.
+
+    If act is in 'sigmoid', 'tanh', 'tanhnorm', 'abstanh', 'abstanhnorm' we 
+    call the specialized version.
+    
+    .. math::
+        mean(sum(sqr(output-target),axis=-1),axis=0)
+
+    :type output_act: Theano variable
+    :param output_act: Output after activation
+    :type output: Theano variable
+    :param output: Output before activation
+    :type target:Theano variable
+    :param target: Target
+    :type act: str or None
+    :param act: The type of activation used
+    """
+    if act in ['sigmoid','tanh','tanhnorm','abstanh','abstanhnorm']:
+        if act == 'sigmoid':
+            return sigmoid_crossentropy(output, target)
+        if act == 'tanh':
+            return tanh_crossentropy(output, target)
+        if act == 'tanhnorm':
+            return normtanh_crossentropy(output, target)
+        if act == 'abstanh':
+            return abstanh_crossentropy(output, target)
+        if act == 'abstanhnorm':
+            return absnormtanh_cross_entropy(output, target)
+    elif act is None:
+        XE = target * T.log(output_act) + (1 - target) * T.log(1 - output_act)
+        return -T.mean(T.sum(XE, axis=-1),axis=0)
+    else:
+        raise Exception("cross_entropy() Expected parameter act to be in ['sigmoid','tanh','tanhnorm','abstanh','abstanhnorm', None]")
+
+def quadratic_cost(output, target):
+    """ The quadratic cost of output again target with a sum on the last dimension and a mean on the first dimension.
+    
+    .. math::
+        mean(sum(sqr(output-target),axis=-1),axis=0)
+
+    :type output: Theano variable
+    :param output: The value that we want to compare again target
+    :type target:Theano variable
+    :param target: The value that we consider correct
+    """
+    return T.mean(T.sum(T.sqr(output - target), axis=-1),axis=0)
+
+
 # This file seems like it has some overlap with theano.tensor.nnet.  Which functions should go
 # in which file?
 
-@tags('cost','binary','cross-entropy', 'sigmoid')
-def sigmoid_crossentropy(output_act, target):
-    """ Stable crossentropy of a sigmoid activation
-
-    .. math::
-                L_{CE} \equiv t\log(\sigma(a)) + (1-t)\log(1-\sigma(a))
-
-    :type output_act: Theano variable
-    :param output: Activation
-    :type target: Theano variable
-    :param target: Binary target usually :math:`\in\{0,1\}`
-    """
-    return target * (- T.log(1.0 + T.exp(-output_act))) + (1.0 - target) * (- T.log(1.0 + T.exp(output_act)))
-
-@tags('cost','binary','cross-entropy', 'tanh')
-def tanh_crossentropy(output_act, target):
-    """ Stable crossentropy of a tanh activation
-
-    .. math::
-                L_{CE} \equiv t\log(\\frac{1+\\tanh(a)}2) + (1-t)\log(\\frac{1-\\tanh(a)}2)
-
-    :type output_act: Theano variable
-    :param output: Activation
-    :type target: Theano variable
-    :param target: Binary target usually :math:`\in\{0,1\}`
-    """
-    return sigmoid_crossentropy(2.0*output_act, target)
-
-@tags('cost','binary','cross-entropy', 'tanh', 'abs')
-def abstanh_crossentropy(output_act, target):
-    """ Stable crossentropy of a absolute value tanh activation
-
-    .. math::
-                L_{CE} \equiv t\log(\\frac{1+\\tanh(|a|)}2) + (1-t)\log(\\frac{1-\\tanh(|a|)}2)
-
-    :type output_act: Theano variable
-    :param output: Activation
-    :type target: Theano variable
-    :param target: Binary target usually :math:`\in\{0,1\}`
-    """
-    return tanh_crossentropy(T.abs_(output_act), target)
-
-@tags('cost','binary','cross-entropy', 'tanh', "normalized")
-def normtanh_crossentropy(output_act, target):
-    """ Stable crossentropy of a "normalized" tanh activation (LeCun)
-
-    .. math::
-                L_{CE} \equiv t\log(\\frac{1+\\tanh(0.6666a)}2) + (1-t)\log(\\frac{1-\\tanh(0.6666a)}2)
-
-    :type output_act: Theano variable
-    :param output: Activation
-    :type target: Theano variable
-    :param target: Binary target usually :math:`\in\{0,1\}`
-    """
-    return tanh_crossentropy(0.6666*output_act, target)
-
--- a/pylearn/formulas/noise.py	Mon Oct 18 14:58:39 2010 -0400
+++ b/pylearn/formulas/noise.py	Mon Oct 18 14:58:52 2010 -0400
@@ -22,7 +22,7 @@
 """
 
 @tags.tags('noise','binomial','salt')
-def binomial_noise(theano_rng,input,noise_lvl):
+def binomial_noise(theano_rng, input, noise_lvl, noise_value=0):
     """
     Return `inp` with randomly-chosen elements set to zero.
 
@@ -32,13 +32,21 @@
     :param input: input
     :type noise_lvl: float
     :param noise_lvl: The probability of setting each element to zero.
+    :type noise_value: Theano scalar variable
+    :param noise_value: The value that we want when their is noise.
     """
     mask = theano_rng.binomial(
-            size = inp.shape,
+            size = input.shape,
             n = 1,
             p =  1 - noise_lvl,
-            dtype=inp.dtype)
-    return mask * input
+            dtype=input.dtype)
+    value = theano.tensor.as_tensor_variable(noise_value)
+    if value.type.ndim!=0:
+        raise Exception('binomial_noise only support scalar noise_value')
+    if noise_value==0:
+        return mask * input
+    else:
+        return mask * input + noise_value*(not mask)
 
 
 @tags.tags('noise','binomial NLP','pepper','salt')
@@ -48,7 +56,10 @@
     :type inp: Theano variable
     :param inp: The input that we want to add noise
     :type noise_lvl: tuple(float,float)
-    :param noise_lvl: The %% of noise for the salt and pepper. Between 0 (no noise) and 1.
+    :param noise_lvl: The probability of changing each element to zero or one. 
+                      (prob of salt, prob of pepper)
+
+    :note: The sum of the prob of salt and prob of pepper should be less then 1.
     """
     assert inp.dtype in ['float32','float64']
     return theano_rng.binomial( size = inp.shape, n = 1, p =  1 - noise_lvl[0], dtype=inp.dtype) * inp \