# HG changeset patch # User James Bergstra # Date 1287428332 14400 # Node ID 6fd2610c1706248c7b9ed351e62f110b161f5dad # Parent c7b2da4e2df67d762aa3ab1fb74399c13b9bb7f5# Parent 83776891508101cb519be9bc350735530b83c7b8 merge diff -r c7b2da4e2df6 -r 6fd2610c1706 doc/v2_planning/API_coding_style.txt --- a/doc/v2_planning/API_coding_style.txt Mon Oct 18 14:58:39 2010 -0400 +++ b/doc/v2_planning/API_coding_style.txt Mon Oct 18 14:58:52 2010 -0400 @@ -655,6 +655,44 @@ .. _Wiki page: http://www.iro.umontreal.ca/~lisa/twiki/bin/view.cgi/Divers/VimPythonRecommendations +Commit message +============== + + * A one line summary. Try to keep it short, and provide the information + that seems most useful to other developers: in particular the goal of + a change is more useful than its description (which is always + available through the changeset patch log). E.g. say "Improved stability + of cost computation" rather than "Replaced log(exp(a) + exp(b)) by + a * log(1 + exp(b -a)) in cost computation". + * If needed a blank line followed by a more detailed summary + * Make a commit for each logical modification + * This makes reviews easier to do + * This makes debugging easier as we can more easily pinpoint errors in + commits with hg bisect + * NEVER commit reformatting with functionality changes + * Review your change before commiting + * "hg diff ..." to see the diff you have done + * "hg record" allows you to select which changes to a file should be + committed. To enable it, put into the file ~/.hgrc: + + .. code-block:: bash + + [extensions] + hgext.record= + + * hg record / diff force you to review your code, never commit without + running one of these two commands first + * Write detailed commit messages in the past tense, not present tense. + * Good: "Fixed Unicode bug in RSS API." + * Bad: "Fixes Unicode bug in RSS API." + * Bad: "Fixing Unicode bug in RSS API." + * Separate bug fixes from feature changes. + * When fixing a ticket, start the message with "Fixed #abc" + * Can make a system to change the ticket? + * When referencing a ticket, start the message with "Refs #abc" + * Can make a system to put a comment to the ticket? + + TODO ==== diff -r c7b2da4e2df6 -r 6fd2610c1706 pylearn/algorithms/tests/test_mcRBM.py --- a/pylearn/algorithms/tests/test_mcRBM.py Mon Oct 18 14:58:39 2010 -0400 +++ b/pylearn/algorithms/tests/test_mcRBM.py Mon Oct 18 14:58:52 2010 -0400 @@ -519,3 +519,111 @@ def checkpoint(): return checkpoint run_classif_experiment(checkpoint=checkpoint) + + + +if 0: # TEST IDEA OUT HERE + + + class doc_db(dict): + # A key->document dictionary. + # A "document" is itself a dictionary. + + # A "document" can be a small or large object, but it cannot be partially retrieved. + + # This simple data structure is used in pylearn to cache intermediate reults between + # several process invocations. + + class UNSPECIFIED(object): pass + + class CtrlObj(object): + + def get(self, key, default_val=UNSPECIFIED, copy=True): + # Default to return a COPY because a set() is required to make a change persistent. + # Inplace changes that the CtrlObj does not know about (via set) will not be saved. + pass + + def get_key(self, val): + """Return the key that retrieved `val`. + + This is useful for specifying cache keys for unhashable (e.g. numpy) objects that + happen to be stored in the db. + """ + # if + # lookup whether val is an obj + pass + def set(self, key, val): + pass + def delete(self, key): + pass + def checkpoint(self): + pass + + @staticmethod + def cache_pickle(pass_ctrl=False): + def decorator(f): + # cache rval using pickle mechanism + def rval(*args, **kwargs): + pass + return rval + return decorator + + @staticmethod + def cache_dict(pass_ctrl=False): + def decorator(f): + # cache rval dict directly + def rval(*args, **kwargs): + pass + return rval + return decorator + + @staticmethod(f): + def cache_numpy(pass_ctrl=False, memmap_thresh=100*1000*1000): + def decorator(f): + # cache rval dict directly + def rval(*args, **kwargs): + pass + return rval + return decorator + + @CtrlObj.cache_numpy() + def get_whitened_dataset(pca_parameters): + # do computations + return None + + @CtrlObj.cache_pickle(pass_ctrl=True) + def train_mcRBM(data, lr, n_hid, ctrl): + + rbm = 45 + for i in 10000: + # do some training + rbm += 1 + ctrl.checkpoint() + return rbm + + def run_experiment(args): + + ctrl_obj = CtrlObj.factory(args) + # Could use db, or filesystem, or both, etc. + # There would be generic ones, but the experimenter should be very aware of what is being + # cached where, when, and how. This is how results are stored and retrieved after all. + # Cluster-friendly jobs should not use local files directly, but should store cached + # computations and results to such a database. + # Different jobs should avoid using the same keys in the database because coordinating + # writes is difficult, and conflicts will inevitably arise. + + raw_data = get_raw_data(ctrl=ctrl) + raw_data_key = ctrl.get_key(raw_data) + pca = get_pca(raw_data, max_energy=.05, ctrl=ctrl, + _ctrl_raw_data_key=raw_data_key) + whitened_data = get_whitened_dataset(pca_parameters, ctrl=ctrl, + _ctrl_data_key=raw_data_key) + + rbm = train_mcRBM( + data=whitened_data, + lr=0.01, + n_hid=100, + ctrl=ctrl, + _ctrl_data_key=raw_data_key + ) + diff -r c7b2da4e2df6 -r 6fd2610c1706 pylearn/datasets/caltech.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/datasets/caltech.py Mon Oct 18 14:58:52 2010 -0400 @@ -0,0 +1,46 @@ +""" +Various routines to load/access MNIST data. +""" + +import os +import numpy + +from pylearn.io.pmat import PMat +from pylearn.datasets.config import data_root # config +from pylearn.datasets.dataset import Dataset + +def caltech_silhouette(): + + rval = Dataset() + + + path = os.path.join(data_root(), 'caltech_silhouettes') + + rval.train = Dataset.Obj(x=numpy.load(os.path.join(path,'train_data.npy')), + y=numpy.load(os.path.join(path,'train_labels.npy'))) + rval.valid = Dataset.Obj(x=numpy.load(os.path.join(path,'val_data.npy')), + y=numpy.load(os.path.join(path,'val_labels.npy'))) + rval.test = Dataset.Obj(x=numpy.load(os.path.join(path,'test_data.npy')), + y=numpy.load(os.path.join(path,'test_labels.npy'))) + + rval.n_classes = 101 + rval.img_shape = (28,28) + + return rval + +def caltech_silhouette2(): + + rval = Dataset() + + from scipy import io + path = '/data/lisa6/desjagui/caltech101_silhouettes_28_split1.mat' + + data = io.loadmat(open(path,'r')) + + rval.train = Dataset.Obj(x=data['train_data'], y=data['train_labels']) + rval.valid = Dataset.Obj(x=data['val_data'], y=data['val_labels']) + rval.test = Dataset.Obj(x=data['test_data'], y=data['test_labels']) + rval.n_classes = 101 + rval.img_shape = (28,28) + + return rval diff -r c7b2da4e2df6 -r 6fd2610c1706 pylearn/datasets/test_modes.py --- a/pylearn/datasets/test_modes.py Mon Oct 18 14:58:39 2010 -0400 +++ b/pylearn/datasets/test_modes.py Mon Oct 18 14:58:52 2010 -0400 @@ -99,7 +99,8 @@ def __init__(self, n_modes, img_shape, seed=238904, min_p=1e-4, max_p=1e-1, - min_w=0., max_w=1.): + min_w=0., max_w=1., + w = None, p = None): self.n_modes = n_modes self.img_shape = img_shape @@ -107,9 +108,14 @@ self.img_size = numpy.prod(img_shape) # generate random p, w values - self.p = min_p + self.rng.rand(n_modes) * (max_p - min_p) - w = min_w + self.rng.rand(n_modes) * (max_w - min_w) + if p is None: + p = min_p + self.rng.rand(n_modes) * (max_p - min_p) + self.p = p + + if w is None: + w = min_w + self.rng.rand(n_modes) * (max_w - min_w) self.w = w / numpy.sum(w) + self.sort_w_idx = numpy.argsort(self.w) self.modes = self.rng.randint(0,2,size=(n_modes,self.img_size)) diff -r c7b2da4e2df6 -r 6fd2610c1706 pylearn/formulas/costs.py --- a/pylearn/formulas/costs.py Mon Oct 18 14:58:39 2010 -0400 +++ b/pylearn/formulas/costs.py Mon Oct 18 14:58:52 2010 -0400 @@ -1,10 +1,23 @@ """ -Common training criteria. + +TODO: make sur stabilization optimization are done. +TODO: make test +TODO: check that this work for nd tensor. """ + +#""" +#Common training criteria. +#""" import theano import theano.tensor as T + from tags import tags +__authors__ = "Frederic Bastien, Nicolas Boulanger-Lewandowski, .." +__copyright__ = "(c) 2010, Universite de Montreal" +__license__ = "3-clause BSD License" +__contact__ = "theano-user " + @tags('cost','binary','cross-entropy') def binary_crossentropy(output, target): """ Compute the crossentropy of binary output wrt binary target. @@ -16,66 +29,141 @@ :param output: Binary output or prediction :math:`\in[0,1]` :type target: Theano variable :param target: Binary target usually :math:`\in\{0,1\}` + + :note: no stabilization optimization needed for a generic output variable """ return -(target * T.log(output) + (1.0 - target) * T.log(1.0 - output)) +@tags('cost','binary','cross-entropy', 'sigmoid') +def sigmoid_crossentropy(output, target): + """ crossentropy of a sigmoid activation + + .. math:: + L_{CE} \equiv t\log(\sigma(a)) + (1-t)\log(1-\sigma(a)) + + :type output: Theano variable + :param output: Output before activation + :type target: Theano variable + :param target: Target + + :note: no stabilization done. + """ + return target * (- T.log(1.0 + T.exp(-output))) + (1.0 - target) * (- T.log(1.0 + T.exp(output))) + +@tags('cost','binary','cross-entropy', 'tanh') +def tanh_crossentropy(output, target): + """ crossentropy of a tanh activation + + .. math:: + L_{CE} \equiv t\log(\\frac{1+\\tanh(a)}2) + (1-t)\log(\\frac{1-\\tanh(a)}2) + + :type output: Theano variable + :param output: Output before activation + :type target: Theano variable + :param target: Target + + :note: no stabilization done. + """ + return sigmoid_crossentropy(2.0*output, target) + +@tags('cost','binary','cross-entropy', 'tanh', 'abs') +def abstanh_crossentropy(output, target): + """ crossentropy of a absolute value tanh activation + + .. math:: + L_{CE} \equiv t\log(\\frac{1+\\tanh(|a|)}2) + (1-t)\log(\\frac{1-\\tanh(|a|)}2) + + :type output: Theano variable + :param output: Output before activation + :type target: Theano variable + :param target: Target + + :note: no stabilization done. + """ + return tanh_crossentropy(T.abs_(output), target) + +@tags('cost','binary','cross-entropy', 'tanh', 'normalized') +def normtanh_crossentropy(output, target): + """ crossentropy of a "normalized" tanh activation (LeCun) + + .. math:: + L_{CE} \equiv t\log(\\frac{1+\\tanh(0.6666a)}2) + (1-t)\log(\\frac{1-\\tanh(0.6666a)}2) + + :type output: Theano variable + :param output: Output before activation + :type target: Theano variable + :param target: Target + + :note: no stabilization done. + """ + return tanh_crossentropy(0.6666*output, target) + +@tags('cost','binary','cross-entropy', 'tanh', 'normalized', 'abs') +def absnormtanh_cross_entropy(output, target): + """ crossentropy of a "absolute normalized" tanh activation + + .. math:: + L_{CE} \equiv t\log(\\frac{1+\\tanh(0.6666*|a|)}2) + (1-t)\log(\\frac{1-\\tanh(0.6666*|a|)}2) + + :type output: Theano variable + :param output: Output before activation + :type target: Theano variable + :param target: Target + + :note: no stabilization done. + """ + return normtanh_crossentropy(T.abs_(output), target) + +def cross_entropy(output_act, output, target, act=None): + """ Execute the cross entropy with a sum on the last dimension and a mean on the first dimension. + + If act is in 'sigmoid', 'tanh', 'tanhnorm', 'abstanh', 'abstanhnorm' we + call the specialized version. + + .. math:: + mean(sum(sqr(output-target),axis=-1),axis=0) + + :type output_act: Theano variable + :param output_act: Output after activation + :type output: Theano variable + :param output: Output before activation + :type target:Theano variable + :param target: Target + :type act: str or None + :param act: The type of activation used + """ + if act in ['sigmoid','tanh','tanhnorm','abstanh','abstanhnorm']: + if act == 'sigmoid': + return sigmoid_crossentropy(output, target) + if act == 'tanh': + return tanh_crossentropy(output, target) + if act == 'tanhnorm': + return normtanh_crossentropy(output, target) + if act == 'abstanh': + return abstanh_crossentropy(output, target) + if act == 'abstanhnorm': + return absnormtanh_cross_entropy(output, target) + elif act is None: + XE = target * T.log(output_act) + (1 - target) * T.log(1 - output_act) + return -T.mean(T.sum(XE, axis=-1),axis=0) + else: + raise Exception("cross_entropy() Expected parameter act to be in ['sigmoid','tanh','tanhnorm','abstanh','abstanhnorm', None]") + +def quadratic_cost(output, target): + """ The quadratic cost of output again target with a sum on the last dimension and a mean on the first dimension. + + .. math:: + mean(sum(sqr(output-target),axis=-1),axis=0) + + :type output: Theano variable + :param output: The value that we want to compare again target + :type target:Theano variable + :param target: The value that we consider correct + """ + return T.mean(T.sum(T.sqr(output - target), axis=-1),axis=0) + + # This file seems like it has some overlap with theano.tensor.nnet. Which functions should go # in which file? -@tags('cost','binary','cross-entropy', 'sigmoid') -def sigmoid_crossentropy(output_act, target): - """ Stable crossentropy of a sigmoid activation - - .. math:: - L_{CE} \equiv t\log(\sigma(a)) + (1-t)\log(1-\sigma(a)) - - :type output_act: Theano variable - :param output: Activation - :type target: Theano variable - :param target: Binary target usually :math:`\in\{0,1\}` - """ - return target * (- T.log(1.0 + T.exp(-output_act))) + (1.0 - target) * (- T.log(1.0 + T.exp(output_act))) - -@tags('cost','binary','cross-entropy', 'tanh') -def tanh_crossentropy(output_act, target): - """ Stable crossentropy of a tanh activation - - .. math:: - L_{CE} \equiv t\log(\\frac{1+\\tanh(a)}2) + (1-t)\log(\\frac{1-\\tanh(a)}2) - - :type output_act: Theano variable - :param output: Activation - :type target: Theano variable - :param target: Binary target usually :math:`\in\{0,1\}` - """ - return sigmoid_crossentropy(2.0*output_act, target) - -@tags('cost','binary','cross-entropy', 'tanh', 'abs') -def abstanh_crossentropy(output_act, target): - """ Stable crossentropy of a absolute value tanh activation - - .. math:: - L_{CE} \equiv t\log(\\frac{1+\\tanh(|a|)}2) + (1-t)\log(\\frac{1-\\tanh(|a|)}2) - - :type output_act: Theano variable - :param output: Activation - :type target: Theano variable - :param target: Binary target usually :math:`\in\{0,1\}` - """ - return tanh_crossentropy(T.abs_(output_act), target) - -@tags('cost','binary','cross-entropy', 'tanh', "normalized") -def normtanh_crossentropy(output_act, target): - """ Stable crossentropy of a "normalized" tanh activation (LeCun) - - .. math:: - L_{CE} \equiv t\log(\\frac{1+\\tanh(0.6666a)}2) + (1-t)\log(\\frac{1-\\tanh(0.6666a)}2) - - :type output_act: Theano variable - :param output: Activation - :type target: Theano variable - :param target: Binary target usually :math:`\in\{0,1\}` - """ - return tanh_crossentropy(0.6666*output_act, target) - diff -r c7b2da4e2df6 -r 6fd2610c1706 pylearn/formulas/noise.py --- a/pylearn/formulas/noise.py Mon Oct 18 14:58:39 2010 -0400 +++ b/pylearn/formulas/noise.py Mon Oct 18 14:58:52 2010 -0400 @@ -22,7 +22,7 @@ """ @tags.tags('noise','binomial','salt') -def binomial_noise(theano_rng,input,noise_lvl): +def binomial_noise(theano_rng, input, noise_lvl, noise_value=0): """ Return `inp` with randomly-chosen elements set to zero. @@ -32,13 +32,21 @@ :param input: input :type noise_lvl: float :param noise_lvl: The probability of setting each element to zero. + :type noise_value: Theano scalar variable + :param noise_value: The value that we want when their is noise. """ mask = theano_rng.binomial( - size = inp.shape, + size = input.shape, n = 1, p = 1 - noise_lvl, - dtype=inp.dtype) - return mask * input + dtype=input.dtype) + value = theano.tensor.as_tensor_variable(noise_value) + if value.type.ndim!=0: + raise Exception('binomial_noise only support scalar noise_value') + if noise_value==0: + return mask * input + else: + return mask * input + noise_value*(not mask) @tags.tags('noise','binomial NLP','pepper','salt') @@ -48,7 +56,10 @@ :type inp: Theano variable :param inp: The input that we want to add noise :type noise_lvl: tuple(float,float) - :param noise_lvl: The %% of noise for the salt and pepper. Between 0 (no noise) and 1. + :param noise_lvl: The probability of changing each element to zero or one. + (prob of salt, prob of pepper) + + :note: The sum of the prob of salt and prob of pepper should be less then 1. """ assert inp.dtype in ['float32','float64'] return theano_rng.binomial( size = inp.shape, n = 1, p = 1 - noise_lvl[0], dtype=inp.dtype) * inp \