Mercurial > pylearn
changeset 1387:5a76d56be0bf
Merged
author | Olivier Delalleau <delallea@iro> |
---|---|
date | Tue, 14 Dec 2010 14:22:16 -0500 |
parents | e3d02b0a05e3 (current diff) 8b857d141761 (diff) |
children | 0ff6c613cdf0 |
files | |
diffstat | 10 files changed, 292 insertions(+), 135 deletions(-) [+] |
line wrap: on
line diff
--- a/doc/conf.py Tue Dec 14 14:18:41 2010 -0500 +++ b/doc/conf.py Tue Dec 14 14:22:16 2010 -0500 @@ -33,6 +33,11 @@ except ImportError: pass +try: + import numpydoc + extensions.append('numpydoc') +except ImportError: + pass # Add any paths that contain templates here, relative to this directory. templates_path = ['.templates']
--- a/doc/formulas.txt Tue Dec 14 14:18:41 2010 -0500 +++ b/doc/formulas.txt Tue Dec 14 14:22:16 2010 -0500 @@ -20,4 +20,8 @@ .. automodule:: pylearn.formulas.noise :members: +pylearn.formulas.regularization +------------------------------- +.. automodule:: pylearn.formulas.regularization + :members:
--- a/doc/v2_planning/code_review.txt Tue Dec 14 14:18:41 2010 -0500 +++ b/doc/v2_planning/code_review.txt Tue Dec 14 14:22:16 2010 -0500 @@ -10,13 +10,87 @@ TODO ---- -- Install Review Board and try it - - test review of merge - - how work with branch -- Write our proposed politic +- Test the 2 proposed workflow (review board and github) + +Reason for the code review +-------------------------- + +- We want at least 2 people to read all code. That mean we need a reviewer +- This help to find better solution to problem +- This help to train people on our tools and framework +- This give better code and less bug in the end. + - Everybode make mistake... + +Proposed Politic +---------------- + +Their is 2 proposal that we want to test: + +- GitHub fork/merge/merge request for new Pylearn + - When someone ask for a merge request, someone else make the review. +- Review Board post-commit for Theano + - For each commit message, if the author don't want this commit to be reviewed, tell it in the message + - Usefull for experimental repository, not Theano + - Usefull for Official repo when we commit something that is disabled by default and we want to split it in many commits or start using it event if not fully tested. + - Reviewer should check that it is not enabled by default and when enabled it print a warning(once per job execution) + - We check all commit to Theano and Jobman.(Official tools) + - If the official review is not the right person for the task(gpu code,...) + He have the responsability to find the right persone (ask people in the lab, mailing-list, ...) + - The official reviewer should do: + - Review all code(see the check list) and ask expert when needed. + - Should check the check list again all review. + - We choose the reviewer in the theano user of the lab with commit right. + - We make a list of expert by domain of problem.(gpu, optimization, algo,...) + - For the test, the official reviewer work for one day. + - Later when we finish the test, maybe we will want to make that longer(1 week?) + - If some body break the build bot, it is him the reviewer for the next week/days + - Maximum of one week by month. + - If their is big week or during rush that include every body of the lab, we can change more frequently. + - If a commit have problem, it is the original reviewer that should make the follow up. + +We should never be the official reviewer of our own code. When this happen, ask someone else to do it. -Some system that we should check: ---------------------------------- +Check list for review +--------------------- + +- Is their tests and do they test all case? + - Do test cover all case? +- Is their documentation in the code file? + - Do this need doc in the html(public) documentation? +- Is the addition well integrated into our framework +- Is the code well placed in the right files and right place in them? +- Try to don't duplicate code +- Is the code clear/comprehensible +- Are the comment describing what is being done? +- Answer question by de commiter, this can also serve to train people +- Check for typo +- No debug code(print, breakpoint,...) +- If commit message tell to don't review, check that the code is disabled by default and that when enabled print a warning. +- Check for conformence to our coding guideline + +Some system that we checked +--------------------------- + +- `Review Board <http://www.reviewboard.org>`_ + - Interesting, but some questions remain (how well it integrates with hg, + notably) + - Some advantages over Google code (comment on multi-line chunks, list of + unreviewed commits, more esthetics, handle many repo, keep assemble easily) + - Fred will install it so we can test it more thoroughly +- `GitHub pull request <https://github.com/blog/712-pull-requests-2-0>`_ + - pre-commit review + +- `Google Code <http://code.google.com/p/support/wiki/CodeReviews>`_ + - Test bench with a clone of Theano at + http://code.google.com/p/theanoclone/ + - post-commit + - no list of not reviewed commit + - no python syntax highlight + - weird comment by line + - diff of merge seam bugged + - Maybe + +- `Kiln <http://fogcreek.com/Kiln/LearnMore.html?section=StartReviewsEffortlessly>`_ - `rietveld <http://code.google.com/p/rietveld/>`_ - Made by Guido van Rossum, seam basic and svn only @@ -26,13 +100,6 @@ - git only - No -- `Review Board <http://www.reviewboard.org>`_ - - Interesting, but some questions remain (how well it integrates with hg, - notably) - - Some advantages over Google code (comment on multi-line chunks, list of - unreviewed commits, more esthetics, handle many repo, keep assemble easily) - - Fred will install it so we can test it more thoroughly - - `Code Striker <http://codestriker.sourceforge.net/>`_ - hg added? David told in May 2009 it can do it easily. - Seems less interesting than Review Board @@ -45,21 +112,16 @@ - Could be integrated with the current ticket system?, not maintained, review code in general, not commit. - No +- `track CodeReviewPlugin <http://trac-hacks.org/wiki/CodeReviewPlugin/Concepts>`_ + +- `track ExoWebCodeReviewPlugin <http://trac-hacks.org/wiki/ExoWebCodeReviewPlugin>`_ + - `feature request at assembla <http://feedback.assembla.com/forums/5433-feature-requests/suggestions/253297-add-a-code-review-tool-e-g-reviewboard->`_ - No (we will not wait until the feature is added...) - `JCR <http://jcodereview.sourceforge.net/>`_ - No -- `Google Code <http://code.google.com/>`_ - - Test bench with a clone of Theano at - http://code.google.com/p/theanoclone/ - - post-commit - - no list of not reviewed commit - - no python syntax highlight - - weird comment by line - - diff of merge seam bugged - - Maybe What we could want from our code review --------------------------------------- @@ -96,44 +158,3 @@ We seam to do Over-the-shoulder, email and variant of pair programming from time to time. Some people read rapidly the commit of Theano and Pylearn. -Reason for the code review --------------------------- - -- We want at least 2 people to read all code. That mean we need a reviewer -- This help to find better solution to problem -- This help to train people on our tools and framework. - -Check list for review ---------------------- - -- Is their tests and do they test all case? -- Is their documentation in the file? - - Do this need doc in the html doc? -- Is the addition well integrated into our framework -- Is the code well placed in the right files and right place in them? -- Try to don't duplicate code -- Is the code clear/comprehensible -- Are the comment describing what is being done? -- Answer question by de commiter, this can also serve to train people -- Check for typo -- No debug code(print, breakpoint,...) -- If commit message tell to don't review, check that the code is disabled by default and that when enabled print a warning. - -Proposed Politic ----------------- - -- For each commit message, if the author don't want this commit to be reviewed, tell it in the message - - Usefull for experimental repository, not Theano - - Usefull for Official repo when we commit something that is disabled by default and we want to split in many commits or start using event if not fully tested. - - Reviewer should check that the check is not enabled by default and when enabled should print a warning. -- We check all commit to Theano, Pylearn and Jobman.(Official tools) -- We check experimental repos when asked. -- One official reviewer per week. - - He review all code and ask expert when needed. - - Should check the check list again all review. - - We choose the reviewer in the theano user of the lab with commit right. - - On fait une list d'expert par demain de problem(gpu, optimization, algo,...) - - If some body break the build bot, it is him the reviewer for the next week - - Maximum of one week by mount. - - If their is big week or during rush that include every body of the lab, we can change more frequently. - - If a commit have problem, it is the original reviewer that should make the follow up.
--- a/pylearn/datasets/embeddings/parameters.py Tue Dec 14 14:18:41 2010 -0500 +++ b/pylearn/datasets/embeddings/parameters.py Tue Dec 14 14:22:16 2010 -0500 @@ -1,8 +1,8 @@ """ Locations of the embedding data files. """ -WEIGHTSFILE = "/home/fringant2/lisa/data/word_embeddings.collobert-and-weston/lm-weights.txt" -VOCABFILE = "/home/fringant2/lisa/data/word_embeddings.collobert-and-weston/words.asc" +WEIGHTSFILE = "/data/lisa/data/word_embeddings.collobert-and-weston/lm-weights.txt" +VOCABFILE = "/data/lisa/data/word_embeddings.collobert-and-weston/words.asc" #WEIGHTSFILE = "/home/joseph/data/word_embeddings.collobert-and-weston/lm-weights.txt" #VOCABFILE = "/home/joseph/data/word_embeddings.collobert-and-weston/words.asc" NUMBER_OF_WORDS = 30000
--- a/pylearn/datasets/miniblocks.py Tue Dec 14 14:18:41 2010 -0500 +++ b/pylearn/datasets/miniblocks.py Tue Dec 14 14:22:16 2010 -0500 @@ -17,7 +17,7 @@ #from plearn.pyext import pl #data = pl.AutoVMatrix(filename='/u/delallea/LisaPLearn/UserExp/delallea/perso/gen_compare/1DBall_12.amat').getMat() - #data = pl.AutoVMatrix(filename='/home/fringant2/lisa/delallea/python_modules/LeDeepNet/mnist_binarized.pmat').getMat() + #data = pl.AutoVMatrix(filename='/data/lisa/exp/delallea/python_modules/LeDeepNet/mnist_binarized.pmat').getMat() #input = data # Note that the target being returned seems to be a dummy target. So
--- a/pylearn/datasets/smallNorb.py Tue Dec 14 14:18:41 2010 -0500 +++ b/pylearn/datasets/smallNorb.py Tue Dec 14 14:22:16 2010 -0500 @@ -4,7 +4,7 @@ from pylearn.datasets.config import data_root #Path = '/u/bergstrj/pub/data/smallnorb' -#Path = '/home/fringant2/lisa/louradoj/data/smallnorb' +#Path = '/data/lisa/datasmallnorb' #Path = '/home/louradou/data/norb' class Paths(object):
--- a/pylearn/datasets/tinyimages.py Tue Dec 14 14:18:41 2010 -0500 +++ b/pylearn/datasets/tinyimages.py Tue Dec 14 14:22:16 2010 -0500 @@ -9,6 +9,8 @@ import PIL.Image import numpy +import pylearn.io.image_tiling + logger = logging.getLogger('pylearn.datasets.tinyimages') def sorted_listdir(*path): @@ -61,20 +63,39 @@ yield it.next() i +=1 + +def arrange_first_N_into_tiling(R,C, filename): + R=int(R) + C=int(C) + A = numpy.asarray([i.copy() for i,ii in zip(image_generator(), xrange(R*C))], + dtype='float32') + print A.shape + A.shape = (R*C, 32*32,3) + pylearn.io.image_tiling.save_tiled_raster_images( + pylearn.io.image_tiling.tile_raster_images( + (A[:,:,0], A[:,:,1], A[:,:,2], None), + (32,32)), + filename) + + n_images = 1608356 -def main(): - def iter_len(x): - i = 0 - for xx in x: - i += 1 - return i - n_files = iter_len(iterate_over_filenames()) - print 'got %i files' % n_files - assert n_images == n_files +def main(argv=[]): + if argv: + arrange_first_N_into_tiling( argv[0], argv[1], argv[2]) + else: + def iter_len(x): + i = 0 + for xx in x: + i += 1 + return i + n_files = iter_len(iterate_over_filenames()) + print 'got %i files' % n_files + assert n_images == n_files - for p in load_first_N(10): - load_image(os.path.join(*p)) + for p in load_first_N(10): + load_image(os.path.join(*p)) + if __name__ == '__main__': - sys.exit(main()) + sys.exit(main(sys.argv[1:]))
--- a/pylearn/formulas/activations.py Tue Dec 14 14:18:41 2010 -0500 +++ b/pylearn/formulas/activations.py Tue Dec 14 14:22:16 2010 -0500 @@ -24,6 +24,7 @@ function of the input x. .. math:: + \\textrm{sigmoid}(x) = \\frac{1}{1 + e^x} The image of :math:`\\textrm{sigmoid}(x)` is the open interval (0, @@ -31,13 +32,18 @@ point representations, :math:`\\textrm{sigmoid}(x)` will lie in the closed range [0, 1]. - :param x: tensor-like (a Theano variable with type theano.Tensor, - or a value that can be converted to one) :math:`\in - \mathbb{R}^n` + Parameters + ---------- + x : tensor-like + A Theano variable with type theano.Tensor, or a value that can be + converted to one :math:`\in \mathbb{R}^n` - :return: a Theano variable with the same shape as the input, where - the sigmoid function is mapped to each element of the - input x. + Returns + ------- + ret : a Theano variable with the same shape as the input + where the sigmoid function is mapped to each element of the + input `x`. + """ return theano.tensor.nnet.sigmoid(x) @@ -52,6 +58,7 @@ tangent) of the input x. .. math:: + \\textrm{tanh}(x) = \\frac{e^{2x} - 1}{e^{2x} + 1} The image of :math:`\\textrm{tanh}(x)` is the open interval (-1, @@ -59,13 +66,16 @@ point representations, :math:`\\textrm{tanh}(x)` will lie in the closed range [-1, 1]. - :param x: tensor-like (a Theano variable with type theano.Tensor, - or a value that can be converted to one) :math:`\in - \mathbb{R}^n` + Parameters + ---------- + x : tensor-like + A Theano variable with type theano.Tensor, or a value that can be + converted to one :math:`\in \mathbb{R}^n` - :return: a Theano variable with the same shape as the input, where - the tanh function is mapped to each element of the input - x. + Returns + ------- + ret : a Theano variable with the same shape as the input + where the tanh function is mapped to each element of the input `x`. """ return theano.tensor.tanh(x) @@ -81,6 +91,7 @@ TODO: where does 1.759 come from? why is it normalized like that? .. math:: + \\textrm{tanh\_normalized}(x) = 1.759\\textrm{ tanh}\left(\\frac{2x}{3}\\right) The image of :math:`\\textrm{tanh\_normalized}(x)` is the open @@ -90,13 +101,17 @@ closed range [-1.759, 1.759]. The exact bound depends on the precision of the floating point representation. - :param x: tensor-like (a Theano variable with type theano.Tensor, - or a value that can be converted to one) :math:`\in - \mathbb{R}^n` + Parameters + ---------- + x : tensor-like + A Theano variable with type theano.Tensor, or a value that can be + converted to one :math:`\in \mathbb{R}^n` - :return: a Theano variable with the same shape as the input, where - the tanh\_normalized function is mapped to each element of - the input x. + Returns + ------- + ret : a Theano variable with the same shape as the input + where the tanh_normalized function is mapped to each element of + the input `x`. """ return 1.759*theano.tensor.tanh(0.6666*x) @@ -111,6 +126,7 @@ hyperbolic tangent of x. .. math:: + \\textrm{abs\_tanh}(x) = |\\textrm{tanh}(x)| The image of :math:`\\textrm{abs\_tanh}(x)` is the interval [0, 1), @@ -118,13 +134,17 @@ point representations, :math:`\\textrm{abs\_tanh}(x)` will lie in the range [0, 1]. - :param x: tensor-like (a Theano variable with type theano.Tensor, - or a value that can be converted to one) :math:`\in - \mathbb{R}^n` + Parameters + ---------- + x : tensor-like + A Theano variable with type theano.Tensor, or a value that can be + converted to one :math:`\in \mathbb{R}^n` - :return: a Theano variable with the same shape as the input, where - the abs_tanh function is mapped to each element of the - input x. + Returns + ------- + ret : a Theano variable with the same shape as the input + where the abs_tanh function is mapped to each element of + the input `x`. """ return theano.tensor.abs_(theano.tensor.tanh(x)) @@ -140,6 +160,7 @@ TODO: where does 1.759 come from? why is it normalized like that? .. math:: + \\textrm{abs\_tanh\_normalized}(x) = \left|1.759\\textrm{ tanh}\left(\\frac{2x}{3}\\right)\\right| The image of :math:`\\textrm{abs\_tanh\_normalized}(x)` is the range @@ -149,13 +170,17 @@ approximative closed range [0, 1.759]. The exact upper bound depends on the precision of the floating point representation. - :param x: tensor-like (a Theano variable with type theano.Tensor, - or a value that can be converted to one) :math:`\in - \mathbb{R}^n` + Parameters + ---------- + x: tensor-like + A Theano variable with type theano.Tensor, or a value that can be + converted to one :math:`\in \mathbb{R}^n` - :return: a Theano variable with the same shape as the input, where - the abs_tanh_normalized function is mapped to each - element of the input x. + Returns + ------- + ret: a Theano variable with the same shape as the input + where the abs_tanh_normalized function is mapped to each + element of the input `x`. """ return theano.tensor.abs_(1.759*theano.tensor.tanh(0.6666*x)) @@ -167,13 +192,20 @@ Returns a symbolic variable that computes the softsign of ``input``. .. math:: + f(input) = \\frac{input}{1.0 + |input|} - :type input: tensor-like - :param input: input tensor to which softsign should be applied - :rtype: Theano variable - :return: tensor obtained after applying the softsign function + Parameters + ---------- + input : tensor-like + A Theano variable with type theano.Tensor, or a value that can be + converted to one :math:`\in \mathbb{R}^n` + Returns + ------- + ret : a Theano variable with the same shape as the input + where the softsign function is mapped to each + element of the input `x`. """ return input/(1.0 + tensor.abs_(input)) @@ -186,11 +218,17 @@ .. math:: f(input) = \left| \\frac{input}{1.0 +|input|} \\right| - :type input: tensor-like - :param input: input tensor to which softsign should be applied - :rtype: Tensor variable - :return: tensor obtained by taking the absolute value of softsign - of the input + Parameters + ---------- + input : tensor-like + A Theano variable with type theano.Tensor, or a value that can be + converted to one :math:`\in \mathbb{R}^n` + + Returns + ------- + ret : a Theano variable with the same shape as the input + where the absolute value of the softsign function is mapped to each + element of the input `x`. """ return tensor.abs_(input)/(1.0 + tensor.abs_(input)) @@ -202,19 +240,24 @@ and only if it is positive, 0 otherwise. .. math:: + f(input) = \left \lbrace \\begin{array}{l} input \quad \\text{ if } input > 0 \\ 0 \quad \\text{ else } \end{array} \\right \} - :type input: tensor-like - :param input: input tensor to which the rectifier activation function - will be applied - :rtype: Tensor variable - :return: always positive tensor which equals with the input if it is also - positive or to 0 otherwise + Parameters + ---------- + input : tensor-like + A Theano variable with type theano.Tensor, or a value that can be + converted to one :math:`\in \mathbb{R}^n` + Returns + ------- + ret : a Theano variable with the same shape as the input + A tensor always positive whose element equals the inputs if it is also + positive or to 0 otherwise """ return input*(input>=0) @@ -226,12 +269,20 @@ at initialization. .. math:: + f(input) = ln \left( 1 + e^{input} \\right) - :type input: tensor-like - :param input: input tensor to which the softplus should be applied - :rtype: Theano variable - :return: tensor obtained by applying softsign on the input + Parameters + ---------- + input : tensor-like + A Theano variable with type theano.Tensor, or a value that can be + converted to one :math:`\in \mathbb{R}^n` + + Returns + ------- + ret : a Theano variable with the same shape as the input + where the softsign function is mapped to each + element of the input `x`. """ return tensor.nnet.softplus(input) @@ -242,15 +293,21 @@ ``input``. .. math:: + f(input) = |input| - :type input: tensor-like - :param input: input tensor - :rtype: Theano variable - :return: tensor that represents the absolute value of the input + Parameters + ---------- + input : tensor-like + A Theano variable with type theano.Tensor, or a value that can be + converted to one :math:`\in \mathbb{R}^n` - - """ + Returns + ------- + ret : a Theano variable with the same shape as the input + where the absolute function is mapped to each + element of the input `x`. + """ return theano.tensor.abs_(input)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/formulas/regularization.py Tue Dec 14 14:22:16 2010 -0500 @@ -0,0 +1,49 @@ + +""" +Different symbolic regularization and sparsity functions. +""" + +import theano +import theano.tensor as T + +from tags import tags + +__authors__ = "Frederic Bastien, Nicolas Boulanger-Lewandowski, .." +__copyright__ = "(c) 2010, Universite de Montreal" +__license__ = "3-clause BSD License" +__contact__ = "theano-user <theano-users@googlegroups.com>" + +@tags('regularization', 'L1') +def l1(x, target = 0, axis_sum = -1, axis_mean = 0): + """ Construct the L1 regularization penalty :math:`\sum|x-target|` + + :type x: Theano variable + :param x: Weights or other variable to regularize + :type target: Theano variable + :param target: Target of x + :type axis_sum: Scalar + :param axis_sum: Axis along which the penalty terms will be summed (e.g. output units) + :type axis_mean: Scalar + :param axis_mean: Axis along which the penalty terms will be averaged (e.g. minibatches) + + :note: no stabilization required + """ + return T.mean(T.sum(T.abs_(x - target), axis_sum), axis_mean) + +@tags('regularization', 'L2') +def l2(x, target = 0, axis_sum = -1, axis_mean = 0): + """ Construct the L2 regularization penalty :math:`\sum(x-target)^2` + + :type x: Theano variable + :param x: Weights or other variable to regularize + :type target: Theano variable + :param target: Target of x + :type axis_sum: Scalar + :param axis_sum: Axis along which the penalty terms will be summed (e.g. output units) + :type axis_mean: Scalar + :param axis_mean: Axis along which the penalty terms will be averaged (e.g. minibatches) + + :note: no stabilization required + """ + return T.mean(T.sum((x - target)**2, axis_sum), axis_mean) +
--- a/pylearn/shared/layers/tests/test_kouh2008.py Tue Dec 14 14:18:41 2010 -0500 +++ b/pylearn/shared/layers/tests/test_kouh2008.py Tue Dec 14 14:22:16 2010 -0500 @@ -46,7 +46,7 @@ fN = f(xval, yval) assert fN < f0 f0 = fN - if 0 == i % 5: print i, 'rval', fN + #if 0 == i % 5: print i, 'rval', fN return fN