changeset 1158:aea510b71386

merge
author Arnaud Bergeron <abergeron@gmail.com>
date Fri, 17 Sep 2010 12:01:32 -0400
parents 9686c0d9689d (current diff) f2105a06201c (diff)
children 8b65a1b27b94 3c2d7c5f0cf7
files
diffstat 20 files changed, 2167 insertions(+), 193 deletions(-) [+]
line wrap: on
line diff
--- a/.hgignore	Fri Sep 17 12:01:12 2010 -0400
+++ b/.hgignore	Fri Sep 17 12:01:32 2010 -0400
@@ -3,3 +3,4 @@
 *.swp
 *.pyc
 core.*
+html
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/formulas.txt	Fri Sep 17 12:01:32 2010 -0400
@@ -0,0 +1,14 @@
+
+.. contents:: Formulas categories
+
+pylearn.formulas.costs
+-----------------------
+.. automodule:: pylearn.formulas.costs
+    :members:
+
+pylearn.formulas.noise
+-----------------------
+.. automodule:: pylearn.formulas.noise
+    :members:
+ 
+
--- a/doc/index.txt	Fri Sep 17 12:01:12 2010 -0400
+++ b/doc/index.txt	Fri Sep 17 12:01:32 2010 -0400
@@ -24,6 +24,7 @@
 
 For the moment, the following documentation is available.
 
+* `Formulas <formulas.html>`_ -- Built-in math formulas optimized for speed and robustness
 * :doc:`io.SeriesTables module <seriestables>` -- Saves error series and other statistics during training
 * `API <api/>`_ -- The automatically-generated API documentation
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/v2_planning/API_coding_style.txt	Fri Sep 17 12:01:32 2010 -0400
@@ -0,0 +1,253 @@
+=========================
+ Coding Style Guidelines
+=========================
+
+Main Goals
+==========
+
+    * Code should be compatible with Python 2.4 and above (using 2to3 for
+      conversion to Python 3.x). This may not be possible in the short term
+      for Theano-dependent code.
+
+    * Code should be easy to read, understand and update by developers and
+      users.
+
+    * Code should be well-documented and well-tested.
+
+Python Coding Guidelines
+========================
+
+Official Guidelines
+-------------------
+
+Source Material
+~~~~~~~~~~~~~~~
+
+The four main documents describing our Python coding guidelines are:
+    * `PEP 8 -- Style Guide for Python Code
+      <http://www.python.org/dev/peps/pep-0008>`_
+    * `PEP 257 -- Docstring Conventions
+      <http://www.python.org/dev/peps/pep-0257>`_
+    * `Numpy Docstring Standard
+      <http://projects.scipy.org/numpy/wiki/CodingStyleGuidelines#docstring-standard>`_
+    * `Google Python Style Guide
+      <http://google-styleguide.googlecode.com/svn/trunk/pyguide.html>`_
+
+
+However, there are a few points mentioned in those documents that we decided
+to do differently:
+
+    * Use only one space (not two) after a sentence-ending period in comments.
+
+    * You do not need to add an extra blank line before the closing quotes of
+      a multi-line docstring.
+
+      .. code-block:: python
+
+        # Good.
+        """This is a multi-line docstring.
+
+        Which means it has more than one line.
+        """
+
+        # Bad.
+        """This is a multi-line docstring.
+
+        Which means it has more than one line.
+
+        """
+
+Excerpts
+~~~~~~~~
+
+We emphasize here a few important topics that are found in the official
+guidelines:
+
+    * Avoid using lists if all you care about is iterating on something. Using
+      lists:
+        - uses more memory (and possibly more CPU if the code may break out of
+          the iteration),
+        - can lead to ugly code when converted to Python 3 with 2to3,
+        - can have a different behavior if evaluating elements in the list has
+          side effects (if you want these side effects, make it explicit by
+          assigning the list to some variable before iterating on it).
+
+      +------------------------+------------------------+
+      | Iterative version      |    List version        |
+      +========================+========================+
+      | .. code-block:: python | .. code-block:: python |
+      |                        |                        |
+      |   my_dict.iterkeys     |   my_dict.keys         |
+      |   my_dict.itervalues   |   my_dict.values       |
+      |   my_dict.iteritems    |   my_dict.items        |
+      +------------------------+------------------------+
+      | .. code-block:: python | .. code-block:: python |
+      |                        |                        |
+      |   itertools.ifilter    |   filter               |
+      |   itertools.imap       |   map                  |
+      |   itertools.izip       |   zip                  |
+      +------------------------+------------------------+
+      | .. code-block:: python | .. code-block:: python |
+      |                        |                        |
+      |   xrange               |   range                |
+      +------------------------+------------------------+
+
+      Code example with ``map``:
+
+      .. code-block:: python
+
+        # Good.
+        for f_x in imap(f, x):
+            ...
+        all_f_x = map(f, x)
+        map(f, x)
+        # Bad.
+        for element in map(f, x):
+            ...
+        imap(f, x)
+
+    * Generally prefer list comprehensions to ``map`` / ``filter``, as the former are
+      easier to read.
+
+      .. code-block:: python
+
+        # Good.
+        non_comments = [line.strip() for line in my_file.readlines()
+                                     if not line.startswith('#')]
+        # Bad.
+        non_comments = map(str.strip,
+                           ifilter(lambda line: not line.startswith('#'),
+                                   my_file.readlines()))
+ 
+    * Use ``in`` on container objects instead of using class-specific methods:
+      it is easier to read and may allow you to re-use your code with different
+      container types.
+
+      .. code-block:: python
+
+        # Good.
+        has_key = key in my_dict
+        has_substring = substring in my_string
+        # Bad.
+        has_key = my_dict.has_key(key)
+        has_substring = my_string.find(substring) >= 0
+
+
+Additional Recommendations
+--------------------------
+
+Things you should do even if they are not listed in official guidelines:
+
+    * Avoid backslashes whenever possible. They make it more
+      difficult to edit code, and they are ugly (as well as potentially
+      dangerous if there are trailing white spaces).
+
+      .. code-block:: python
+
+        # Good.
+        if (cond_1 and
+            cond_2 and
+            cond_3):
+            ... 
+        # Bad.
+        if cond_1 and \
+           cond_2 and \
+           cond_3:
+            ...
+
+    * When indenting multi-line statements like lists or function arguments,
+      keep elements of the same level aligned with each other.
+      The position of the first
+      element (on the same line or a new line) should be chosen depending on
+      what is easiest to read (sometimes both can be ok).
+
+      .. code-block:: python
+
+        # Good.
+        for my_very_long_variable_name in [my_foo, my_bar, my_love,
+                                           my_everything]:
+            ...
+        for my_very_long_variable_name in [
+                my_foo, my_bar, my_love, my_everything]:
+            ...
+        # Good iff the list needs to be frequently updated or is easier to
+        # understand when each element is on its own line.
+        for my_very_long_variable_name in [
+                my_foo,
+                my_bar,
+                my_love,
+                my_everything,
+                ]:
+            ...
+        # Good as long as it does not require more than two lines.
+        for my_very_long_variable_name in [my_foo,
+                                           my_bar]:
+            ...
+        # Bad.
+        for my_very_long_variable_name in [my_foo, my_bar, my_love,
+                my_everything]:
+            ...
+        for my_very_long_variable_name in [my_foo,
+                                           my_bar,
+                                           my_love,
+                                           my_everything]:
+            ...
+
+
+The ``logging`` Module vs. the ``warning`` Module
+=================================================
+
+The ``logging`` Module
+----------------------
+
+A central logging facility for Python capable of logging messages of various
+categories/urgency and choosing with some granularity which messages are
+displayed/suppressed, as well as where they are displayed or written. This
+includes an ``INFO`` level for innocuous status information, a ``WARNING`` level
+for unexpected state that is still recoverable, ``DEBUG`` for detailed
+information which is only really of interest when things are going wrong, etc.
+
+In addition to the `library documentation`_, see this helpful tutorial,
+`Python Logging 101`_.
+
+.. _library documentation: http://docs.python.org/library/logging.html
+.. _Python Logging 101: http://plumberjack.blogspot.com/2009/09/python-logging-101.html
+
+The ``warning`` Module
+----------------------
+
+The ``warning`` module in the standard library and its main interface, the
+``warn()`` function, allows the programmer to issue warnings in situations where
+they wish to alert the user to some condition, but the situation is not
+urgent enough to throw an exception. By default, a warning issued at a given
+line of the code will only be displayed the first time that line is executed.
+By default, warnings are written to ``sys.stderr`` but the ``warning`` module
+contains flexible facilities for altering the defaults, redirecting, etc.
+
+Which? When?
+------------
+
+It is our feeling that the ``logging`` module's ``WARNING`` level be used to log
+warnings more meant for *internal*, *developer* consumption, to log situations
+where something unexpected happened that may be indicative of a problem but
+is several layers of abstraction below what a user of the library would
+care about.
+
+By contrast, the warning module should be used for warnings intended for user
+consumption, e.g. alerting them that their version of Pylearn is older than
+this plugin requires, so things may not work as expected, or that a given
+function/class/method is slated for deprecation in a coming release (early
+in the library's lifetime, ``DeprecationWarning`` will likely be the most common
+case). The warning message issued through this facility should avoid
+referring to Pylearn internals.
+
+Code Sample
+===========
+
+The following code sample illustrates many of the coding guidelines one should
+follow in Pylearn.
+
+.. code-block:: python
+
+    import os, sys, time
+
--- a/doc/v2_planning/api_optimization.txt	Fri Sep 17 12:01:12 2010 -0400
+++ b/doc/v2_planning/api_optimization.txt	Fri Sep 17 12:01:32 2010 -0400
@@ -23,83 +23,141 @@
 only uses Theano for the implementation.
 
 
-Iterative Interface
--------------------
+Theano Interface
+-----------------
+
+The theano interface to optimization algorithms is to ask for a dictionary of
+updates that can be used in theano.function.  Implementations of iterative
+optimization algorithms should be global functions with a signature like
+'iterative_optimizer'.
 
-def iterative_optimizer(parameters, 
-        cost=None,
-        grads=None,
-        stop=None, 
-        updates=None,
-        **kwargs):
-    """
-    :param parameters: list or tuple of Theano variables (typically shared vars)
-        that we want to optimize iteratively.  If we're minimizing f(x), then
-        together, these variables represent 'x'.
-
-    :param cost: scalar-valued Theano variable that computes an exact or noisy estimate of
-        cost  (what are the conditions on the noise?).  Some algorithms might
-        need an exact cost, some algorithms might ignore the cost if the grads
-        are given.
+    def iterative_optimizer(parameters, 
+            cost=None,
+            gradients=None,
+            stop=None, 
+            updates=None,
+            **kwargs):
+        """
+        :param parameters: list or tuple of Theano variables 
+            that we want to optimize iteratively.  If we're minimizing f(x), then
+            together, these variables represent 'x'.  Typically these are shared
+            variables and their values are the initial values for the minimization
+            algorithm.
 
-    :param grads: list or tuple of Theano variables representing the gradients on
-        the corresponding parameters.  These default to tensor.grad(cost,
-        parameters).
+        :param cost: scalar-valued Theano variable that computes an exact or noisy estimate of
+            cost  (what are the conditions on the noise?).  Some algorithms might
+            need an exact cost, some algorithms might ignore the cost if the
+            gradients are given.
 
-    :param stop: a shared variable (scalar integer) that (if provided) will be
-        updated to say when the iterative minimization algorithm has finished
-        (1) or requires more iterations (0).
+        :param gradients: list or tuple of Theano variables representing the gradients on
+            the corresponding parameters.  These default to tensor.grad(cost,
+            parameters).
 
-    :param updates: a dictionary to update with the (var, new_value) items
-        associated with the iterative algorithm.  The default is a new empty
-        dictionary.  A KeyError is raised in case of key collisions.
+        :param stop: a shared variable (scalar integer) that (if provided) will be
+            updated to say when the iterative minimization algorithm has finished
+            (1) or requires more iterations (0).
 
-    :param kwargs: algorithm-dependent arguments
+        :param updates: a dictionary to update with the (var, new_value) items
+            associated with the iterative algorithm.  The default is a new empty
+            dictionary.  A KeyError is raised in case of key collisions.
 
-    :returns: a dictionary mapping each parameter to an expression that it
-       should take in order to carry out the optimization procedure.
+        :param kwargs: algorithm-dependent arguments
 
-       If all the parameters are shared variables, then this dictionary may be
-       passed as the ``updates`` argument to theano.function.
+        :returns: a dictionary mapping each parameter to an expression that it
+           should take in order to carry out the optimization procedure.
 
-       There may be more key,value pairs in the dictionary corresponding to
-       internal variables that are part of the optimization algorithm.
+           If all the parameters are shared variables, then this dictionary may be
+           passed as the ``updates`` argument to theano.function.
 
-    """
+           There may be more key,value pairs in the dictionary corresponding to
+           internal variables that are part of the optimization algorithm.
+
+        """
 
 
-One-shot Interface
-------------------
+Numpy Interface
+---------------
+
+The numpy interface to optimization algorithms is supposed to mimick
+scipy's.  Its arguments are numpy arrays, and functions that manipulate numpy
+arrays.
 
-def minimize(x0, f, df, opt_algo, **kwargs):
-    """
-    Return a point x_new that minimizes function `f` with derivative `df`.
+    def minimize(x0, f, df, opt_algo, **kwargs):
+        """
+        Return a point x_new with the same type as x0 that minimizes function `f`
+        with derivative `df`.
+
+        This is supposed to provide an interface similar to scipy's minimize
+        routines, or MATLAB's.
+
+        :type x0: numpy ndarray or list of numpy ndarrays.
+        :param x0: starting point for minimization
 
-    This is supposed to provide an interface similar to scipy's minimize
-    routines, or MATLAB's.
+        :type f: python callable mapping something like x0 to a scalar
+        :param f: function to minimize
+
+        :type df: python callable mapping something like x0 to the derivative of f at that point
+        :param df: derivative of `f`
+
+        :param opt_algo: one of the functions that implements the
+        `iterative_optimizer` interface.
 
-    :type x0: numpy ndarray
-    :param x0: starting point for minimization
+        :param kwargs: passed through to `opt_algo`
+
+        """
+
 
-    :type f: python callable mapping something like x0 to a scalar
-    :param f: function to minimize
+There is also a numpy-based wrapper to the iterative algorithms.
+This can be more useful than minimize() because it doesn't hog program
+control.  Technically minimize() is probably implemented using this
+minimize_iterator interface.
 
-    :type df: python callable mapping something like x0 to the derivative of f at that point
-    :param df: derivative of `f`
+    class minimize_iterator(object):
+        """
+        Attributes
+         - x  - the current best estimate of the minimum
+         - f  - the function being minimized
+         - df - f's derivative function
+         - opt_algo - the optimization algorithm at work (a serializable, callable
+           object with the signature of iterative_optimizer above).
 
-    :param opt_algo: one of the functions that implements the
-    `iterative_optimizer` interface.
+        """
+        def __init__(self, x0, f, df, opt_algo, **kwargs):
+            """Initialize state (arguments as in minimize())
+            """
+        def __iter__(self): 
+            return self
+        def next(self):
+            """Take a step of minimization and return self raises StopIteration when
+            the algorithm is finished with minimization
 
-    :param kwargs: passed through to `opt_algo`
+            """
+
 
-    """
+Examples
+--------
 
-OD: Could it be more convenient for x0 to be a list?
+Simple stochastic gradient descent could be called like this:
+
+    sgd([p], gradients=[g], step_size=.1) 
+
+and this would return
+
+    {p:p-.1*g}
+
+
+Simple stochastic gradient descent with extra updates:
 
-OD: Why make a difference between iterative and one-shot versions? A one-shot
-    algorithm can be seen as an iterative one that stops after its first
-    iteration. The difference I see between the two interfaces proposed here
-    is mostly that one relies on Theano while the other one does not, but
-    hopefully a non-Theano one can be created by simply wrapping around the
-    Theano one.
+    sgd([p], gradients=[g], updates={a:b}, step_size=.1) 
+
+will return 
+
+    {a:b, p:p-.1*g}
+
 
+If the parameters collide with keys in a given updates dictionary an exception
+will be raised:
+
+    sgd([p], gradients=[g], updates={p:b}, step_size=.1) 
+    
+will raise a KeyError.
--- a/doc/v2_planning/architecture.txt	Fri Sep 17 12:01:12 2010 -0400
+++ b/doc/v2_planning/architecture.txt	Fri Sep 17 12:01:32 2010 -0400
@@ -3,13 +3,13 @@
 ====================
 
 
-Basic Design Approach
-=====================
+SE + VM Approach
+=================
 
-I propose that the basic design of the library follow the Symbolic Expression
-(SE) structure + virtual machine (VM) pattern that worked for Theano.
+One avenue for the basic design of the library is to follow the Symbolic
+Expression (SE) structure + virtual machine (VM) pattern that worked for Theano.
 
-So the main things for the library to provide would be:
+The main things for the library to provide would be:
 
 - a few VMs, some of which can run programs in parallel across processors,
   hosts, and networks [R6,R8];
@@ -57,4 +57,87 @@
 just not essential to choose an API that will guarantee a match, or indeed to
 choose any explicit API at all.
 
+YB: I agree that lambdas are more flexible, but from the user's point of
+view it is really important to know what can swap with what, so that they
+can easily plug-and-play. So even if informal, something in the spirit
+of an API must be described somewhere, and components should declare
+either formally or through comments what functionality 'type' 
+they can take on.
 
+Encapsulation vs. linearity
+---------------------------
+
+A while ago, the Apstat crew went to fight "encapsulation" to propose instead
+a more "linearized" approach to experiment design. I must admit I didn't
+really understand the deep motivations behind this, and after practicing both
+styles (encapsulation for PLearn / Theano, linearity @ ARL / Ubisoft), I still
+don't. I do find, however, some not-so-deep-but-still-significant advantages
+to the linear version, which hopefully can be made clear (along with a
+clarification of what the h*** am I talking about) in the following example:
+
+   * Linear version:
+    my_experiment = pipeline([
+        data,
+        filter_samples,
+        PCA,
+        k_fold_split,
+        neural_net,
+        evaluation,
+    ])
+
+   * Encapsulated version:
+    my_experiment = evaluation(
+        data=PCA(filter_samples(data)),
+        split=k_fold_split,
+        model=neural_net)
+
+What I like in the linear version is it is much more easily human-readable
+(once you know what it means): you just follow the flow of the experiment by
+reading through a single list.
+On the other hand, the encapsulated version requires some deeper analysis to
+understand what is going on and in which order.
+Also, commenting out parts of the processing is simpler in the first case (it
+takes a single # in front of an element).
+However, linearity tends to break when the experiment is actually not linear,
+i.e. the graph of object dependencies is more complex (*).
+
+I'm just bringing this up because it may be nice to be able to provide the
+user with the most intuitive way to design experiments. I actually don't think
+those approaches are mutually exclusive, and it could be possible for the
+underlying system to use the more flexible / powerful encapsulated
+representation, while having the option to write simple scripts in a form that
+is easier to understand and manipulate.
+
+It could also be worth discussing this issue with Xavier / Christian /
+Nicolas.
+
+(*) Note that I cheated a bit in my example above: the graph from the
+encapsulated version is not a simple chain, so it is not obvious how to
+convert it into the pipeline given in the linear version. It's still possible
+though, but this is probably not the place to get into the details.
+
+RP comment : The way I see it, you could always have everything using the
+encapsulation paradigm ( which as you pointed out is a bit more powerful) and
+then have linear shortcuts ( functions that take a list of functions and some
+inputs and apply them in some order). You will not be able to have a one case
+cover all pipeline function, but I think it is sufficient to offer such
+options (linear functions) for a few widely used cases ..
+
+
+Jobman Compatibility Approach
+=============================
+
+One basic approach for the library is to provide a set of components that are
+compatible with remote execution.  The emphasis could be not so much on
+standardizing the roles and APIs of components, so much as ensuring that they
+can be glued together and supports parallel execution on one or more CPUs or
+clusters.
+
+In this approach we would provide a proxy for asynchronous execution
+(e.g. "pylearn.call(fn, args, kwargs, backend=default_backend)"), which would
+come with constraints on what fn, args, and kwargs can be.  Specifically, they
+must be picklable, and there are benefits (e.g. automatic function call caching)
+associated with them being hashable as well.
+
+
+
--- a/doc/v2_planning/coding_style.txt	Fri Sep 17 12:01:12 2010 -0400
+++ b/doc/v2_planning/coding_style.txt	Fri Sep 17 12:01:32 2010 -0400
@@ -8,7 +8,139 @@
 - David
 - Olivier D [leader]
 
+Open for public debate
+----------------------
 
+    * Avoid contractions in code comments (particularly in
+      documentation): "We do not add blue to red because it does not look good"
+      rather than "We don't add blue to red because it doesn't look good".
+      OD: I mostly find it to be cleaner (been used to it while writing
+          scientific articles too).
+      JB: +1
+
+   * Imperative vs. third-person comments.
+        # Return the sum of elements in x.  <-- imperative
+        # Returns the sum of elements in x. <-- third-person
+     OD: I am used to the imperative form and like it better only because it
+         typically saves one letter (the 's') and is easier to conjugate.
+     JB: What about being compatible with markup formats that have a :returns:
+         tag?
+     OD: That'd make sense. However, when I wrote the above I hadn't looked
+         closely at PEP257 yet, and I just noticed the following official
+         recommendation for one-line docstrings in it:
+            The docstring is a phrase ending in a period. It prescribes the
+            function or method's effect as a command ("Do this", "Return that"), not as a
+            description; e.g. don't write "Returns the pathname ...".
+         Anyone knows which style is most popular in the open-source
+         community?
+
+    * OD: I like always doing the following when subclassing
+      a class A:
+        class B(A):
+            def __init__(self, b_arg_1, b_arg_2, **kw):
+                super(B, self).__init__(**kw)
+                ...
+      The point here is that the constructor always allow for extra keyword
+      arguments (except for the class at the very top of the hierarchy), which
+      are automatically passed to the parent class.
+      Pros:
+        - You do not need to repeat the parent class arguments whenever you
+          write a new subclass.
+        - Whenever you add an argument to the parent class, all child classes
+          can benefit from it without modifying their code.
+      Cons:
+        - One needs to look at the parent classes to see what these arguments
+          are.
+        - You cannot use a **kw argument in your constructor for your own
+          selfish purpose.
+        - I have no clue whether one could do this with multiple inheritance.
+        - More?
+      Question: Should we encourage this in Pylearn?
+
+      JB: +0.5
+
+Closed for public debate
+------------------------
+
+   * Use imports for packages and modules only. I.e. avoid
+        from foo import *
+        from foo import Bar
+     OD: Overall I agree with this. However we probably want to allow some
+        exceptions, like:
+            from itertools import imap, izip
+        Also, some people may want to have shortcuts like
+            from theano import tensor as T
+        but I would prefer to forbid this. It is handy when trying stuff in
+        the interactive interpreter, but in real code it can easily get messy
+        when you want to copy / paste different pieces of code and they use
+        different conventions. Typing tensor.* is a bit longer, but a lot more
+        portable.
+     JB: I thought that these are nice:
+         - "from foo import Bar" 
+         - "from foo import Bar, Blah"
+        What's wrong with them?  They keep the code listing short and readable.
+        I would discourage these forms when symbols 'Bar' and 'Blah' are
+        ambiguous, in which case the parent module prefix serves to disambiguate
+        them in the code.
+        I agree that the "import A as B" form should be discouraged in general,
+        because that's just confusing and makes code less grep-friendly.
+     OD: I agree that "from foo import Bar, Blah" is sometimes convenient
+        (typically when you re-use Bar / Blah many times in the same file),
+        and would vote in favor of accepting it when it is appropriate.
+        This guideline was taken from Google's coding recommendation:
+            "from foo import * or from foo import Bar is very nasty and can
+             lead to serious maintenance issues because it makes it hard to find
+             module dependencies."
+     OD: Decision was taken in committee's meeting to allow
+            from foo import Bar, Blah
+         when imported stuff is re-used multiple times in the same file, and
+         there is no ambiguity.
+
+   * Imports should usually be on separate lines.
+     OD: I would add an exception, saying it is ok to group multiple imports
+        from the standard library on a single line, e.g.
+            import os, sys, time
+        I just don't see much benefit in putting them on separate lines (for
+        third-party imports I agree it is best to keep them separate, as it
+        makes dependencies clearer, and diffs look better when someone adds /
+        removes an import).  Does anyone see a good reason to keep standard
+        library imports on different lines?
+     JB: what does 'usually' mean here? The guideline seems vacuous.
+     OD: Sorry my fault, I did not quote the whole guideline from PEP8. The
+         'usually' was because of what followed:
+            it's okay to say this though:
+                from subprocess import Popen, PIPE
+         (which btw contradicts Google's recommendation mentioned previously)
+     OD: Decision was taken in committee's meeting to allow multiple imports
+         on the same line for standard library modules (only).
+
+    * The BDFL recommends inserting a blank line between the
+      last paragraph in a multi-line docstring and its closing quotes, placing
+      the closing quotes on a line by themselves. This way, Emacs'
+      fill-paragraph command can be used on it.
+      OD: I think it is ugly and I have not seen it used much. Any Emacs
+          user believes it is a must?
+      OD: Decision was taken in committee's meeting to drop this
+          recommendation.
+
+    * JB: How should we combine capitalization and underscores to name classes
+          and functions related to an algorithm like 'SGD' or a model like 'RBM'
+          whose common name is capitalized?  Case in point: How should I name a
+          Hybrid Monte Carlo Sampler?  Should I use the common HMC abbreviation?
+      OD: This one is answered by PEP8 (search HTTPServerError in it).
+          You should use:
+            RBMClassName
+            rbm_function_name
+          As far as using abbreviations is concerned:
+            All identifiers in the Python standard library (...) SHOULD use
+            English words wherever feasible (in many cases, abbreviations and
+            technical terms are used which aren't English).
+          so I guess HMC is ok when using Hybrid Monte Carlo is considered to
+          make some names too long.
+
+
+Note about warnings
+-------------------
 
 Fred: This is a refactored thing from James email of what we should put in message
 that we send to the user:
@@ -19,28 +151,29 @@
 Existing Python coding style specifications and guidelines
 ----------------------------------------------------------
 
-    * http://www.python.org/dev/peps/pep-0008/ Style Guide for Python Code
-    * http://www.python.org/dev/peps/pep-0257/ Docstring Conventions 
-    * http://google-styleguide.googlecode.com/svn/trunk/pyguide.html Google Python Style Guide
-    * http://www.voidspace.org.uk/python/articles/python_style_guide.shtml
-    * http://python.net/~goodger/projects/pycon/2007/idiomatic/handout.html
-    * http://www.cs.caltech.edu/courses/cs11/material/python/misc/python_style_guide.html
-    * http://barry.warsaw.us/software/STYLEGUIDE.txt
-    * http://self.maluke.com/style
-    * http://chandlerproject.org/Projects/ChandlerCodingStyleGuidelines
-    * http://lists.osafoundation.org/pipermail/dev/2003-March/000479.html
-    * http://learnpython.pbworks.com/PythonTricks
-    * http://eikke.com/how-not-to-write-python-code/
-    * http://jaynes.colorado.edu/PythonGuidelines.html
-    * http://docs.djangoproject.com/en/dev/internals/contributing/#coding-style
-    * http://projects.scipy.org/numpy/wiki/CodingStyleGuidelines 
+  * Must-read
+    * Official Python coding style guide: http://www.python.org/dev/peps/pep-0008
+    * Official docstring conventions: http://www.python.org/dev/peps/pep-0257
+    * Google Python Style Guide: http://google-styleguide.googlecode.com/svn/trunk/pyguide.html
+  * Interesting
+    * Code Like a Pythonista: http://python.net/~goodger/projects/pycon/2007/idiomatic/handout.html
+    * Numpy notes on conversion to Python 3: http://projects.scipy.org/numpy/browser/trunk/doc/Py3K.txt
+  * Can skip
+    * Python style for university class: http://www.cs.caltech.edu/courses/cs11/material/python/misc/python_style_guide.html
+    * Mailman coding style: http://barry.warsaw.us/software/STYLEGUIDE.txt
+    * Some company coding style: http://self.maluke.com/style
+    * Chandler coding style: http://chandlerproject.org/Projects/ChandlerCodingStyleGuidelines
+    * Outdated recommendations: http://lists.osafoundation.org/pipermail/dev/2003-March/000479.html
+    * Mostly some beginners tips: http://learnpython.pbworks.com/PythonTricks
+    * More beginners tips: http://eikke.com/how-not-to-write-python-code/
+    * Cogent coding guidelines: http://jaynes.colorado.edu/PythonGuidelines.html
+    * Djangoo coding guidelines: http://docs.djangoproject.com/en/dev/internals/contributing/#coding-style
+    * Numpy documentation style guidelines: http://projects.scipy.org/numpy/wiki/CodingStyleGuidelines 
+    * Some random guy guidelines (nothing special): http://www.voidspace.org.uk/python/articles/python_style_guide.shtml
 
 We will probably want to take PEP-8 as starting point, and read what other
 people think about it / how other coding guidelines differ from it.
 
-Dumi: we should also try to find tools that automate these
-processes: pylint, pyflakes, pychecker, pythontidy
-
 OD: Things about PEP 8 I don't like (but it may be just me):
 
    * If necessary, you can add an extra pair of parentheses around an
@@ -61,27 +194,7 @@
     or less been wiped out by HTML's convention of ignoring extra 
     whitespace: see http://en.wikipedia.org/wiki/Sentence_spacing for
     more detail. I think it's okay to drop this convention in source code.)
-
-   * Imports should usually be on separate lines
-    --> Can be a lot of lines wasted for no obvious benefit. I think this is
-        mostly useful when you import different modules from different places,
-        but I would say that for instance for standard modules it would be
-        better to import them all on a single line (doing multiple lines only
-        if there are too many of them), e.g. prefer:
-            import os, sys, time
-        to
-            import os
-            import sys
-            import time
-        However, I agree about separating imports between standard lib / 3rd
-        party, e.g. prefer:
-            import os, sys, time
-            import numpy, scipy
-        to
-            import numpy, os, scipy, sys, time
-        (Personal note: preferably order imports by alphabetical order, makes
-         it easier to quickly see if a specific module is already imported,
-         and avoids duplicated imports)
+    OD: Cool, thanks, I guess we can drop it then.
 
     * Missing in PEP 8:
         - How to indent multi-line statements? E.g. do we want
@@ -101,12 +214,6 @@
           be to go with 2 when it can fit on two lines, and 3 otherwise. Same
           with lists.
 
-    * From PEP 257: The BDFL [3] recommends inserting a blank line between the
-      last paragraph in a multi-line docstring and its closing quotes, placing
-      the closing quotes on a line by themselves. This way, Emacs'
-      fill-paragraph command can be used on it.
-     --> I have nothing against Emacs, but this is ugly!
-
 Documentation
 -------------
 
@@ -136,16 +243,13 @@
 Use RST with Sphinx.
 Task: Provide specific examples on how to document a class, method, and some
 specific classes like Op (DE). Modify the theano documentation to include that.
+OD: May want to check out
+    http://projects.scipy.org/numpy/wiki/CodingStyleGuidelines
 
    * Python versions to be supported
 Support 2.4 (because some of the clusters are still running 2.4) and write
 code that can be converted to 3.x with 2to3 in a straightforward way.
 Task: Write to-do's and to-not-do's to avoid compatibility issues. (OD)
-(DWF: Pauli Virtanen and others have put together extensive
-documentation in the process of porting NumPy to Py3K, see his notes at
-http://projects.scipy.org/numpy/browser/trunk/doc/Py3K.txt -- this is
-the most complete resource for complicated combinations of Python and C).
- 
 
    * C coding style
 How to write C code (in particular for Numpy / Cuda), and how to mix C and
@@ -162,6 +266,8 @@
    * Automatized code verification
 Use pychecker & friends to make sure everything is fine.
 Task: Look into the various options available (DE)
+Result: See sections 'Tools to help us out' and 'Automating and enforcing coding
+style'
 
    * Tests
 Force people to write tests. Automatic email reminder of code lines not
@@ -170,6 +276,7 @@
 automatically warn the user when he is using untested stuff (and to remind
 ourselves we should add a test).
 Task: See feasibility. (OD)
+Result: See section 'Enforcing strict testing policy'.
 
    * VIM / Emacs plugins / config files
 To enforce good coding style automatically.
@@ -181,79 +288,9 @@
 
 Have a sample code that showcases everything one should comply to.
 
-Some coding guidlines (work-in-progress from OD)
-------------------------------------------------
-
-   * Avoid using lists if all you care about is iterating on something. Using
-     lists:
-        - uses more memory (and possibly more CPU if the code may break out of
-          the iteration)
-        - can lead to ugly code when converted to Python 3 with 2to3
-        - can have a different behavior if evaluating elements in the list has
-          side effects (if you want these side effects, make it explicit by
-          assigning the list to some variable before iterating on it)
-    
-    Iterative version       List version
-    my_dict.iterkeys()      my_dict.keys()
-    my_dict.itervalues()    my_dict.values()
-    my_dict.iteritems()     my_dict.items()
-    itertools.imap          map
-    itertools.ifilter       filter
-    itertools.izip          zip
-    xrange                  range
-    
-    * Use `in` on container objects instead of using class-specific methods.
-      It is easier to read and may allow you to use your code with different
-      container types.
-
-    Yes                         No
-    ---                         --
-    key in my_dict              my_dict.has_key(key)
-    sub_string in my_string     my_string.find(sub_string) >= 0
-
-    * (Point to debate) Avoid contractions in code comments (particularly in
-      documentation): "We do not add blue to red because it does not look
-      good" rather than "We don't add blue to red because it doesn't look
-      good". I mostly find it to be cleaner (been used to it while writing
-      scientific articles too).
+Some coding guidelines (work-in-progress from OD)
+-------------------------------------------------
 
-   * (Point to debate) Imperative vs. third-person comments. I am used to the
-     imperative form and like it better only because it typically saves one
-     letter (the 's'): "Return the sum of elements in x" rather than
-     "Returns the sum of elements in x".
-
-    * (Point to debate) I like always doing the following when subclassing
-      a class A:
-        class B(A):
-            def __init__(self, b_arg_1, b_arg_2, **kw):
-                super(B, self).__init__(**kw)
-                ...
-      The point here is that the constructor always allow for extra keyword
-      arguments (except for the class at the very top of the hierarchy), which
-      are automatically passed to the parent class.
-      Pros:
-        - You do not need to repeat the parent class arguments whenever you
-          write a new subclass.
-        - Whenever you add an argument to the parent class, all child classes
-          can benefit from it without modifying their code.
-      Cons:
-        - One needs to look at the parent classes to see what these arguments
-          are.
-        - You cannot use a **kw argument in your constructor for your own
-          selfish purpose.
-        - I have no clue whether one could do this with multiple inheritance.
-        - More?
-      Question: Should we encourage this in Pylearn?
-
-   * Generally prefer list comprehensions to map / filter, as the former are
-     easier to read.
-    Yes:
-        non_comments = [line.strip() for line in my_file.readlines()
-                                     if not line.startswith('#')]
-    No:
-        non_comments = map(str.strip,
-                           filter(lambda line: not line.startswith('#'),
-                                  my_file.readlines()))
     
     * Use the `key` argument instead of `cmp` when sorting (for Python 3
       compatibility).
@@ -272,6 +309,12 @@
 
     * Code indent must be done with four blank characters (not with tabs).
 
+    * Limit lines to 79 characters.
+
+    * Comments should start with a capital letter (unless the first word is a
+      code identifier) and end with a period (very short inline comments may
+      ignore this rule).
+
     * Whenever you read / write binary files, specify it in the mode ('rb' for
       reading, 'wb' for writing). This is important for cross-platform and
       Python 3 compatibility (e.g. when pickling / unpickling objects).
@@ -290,9 +333,251 @@
         raise MyException(args)
       where MyException inherits from Exception.
 
+    * Imports should be listed in alphabetical order. It makes it easier to
+      verify that something is imported, and avoids duplicated imports.
+
+    * Use absolute imports only. This is compatible across a wider range of
+      Python versions, and avoids confusion about what is being
+      imported.
+
+    * Use a leading underscore '_' for internal attributes / methods,
+      but avoid the double underscore '__' unless you know what you are
+      doing.
+
+    * A script's only top-level code should be something like:
+        if __name__ == '__main__':
+            sys.exit(main())
+
+    * No conditional expression (not supported in Python 2.4). These are
+      expressions of the form
+        x = y if condition else z
+
+    * Use either "try ... except" or "try ... finally", but do not mix
+      "except" with "finally" (which is not supported in Python 2.4).
+      You can make a try... except inside a try... finally if you need both.
+
+    * Do not use the `all` and `any` builtin functions (they are not supported
+      in Python 2.4).
+      You can use numpy.{all,any} instead of import theano.gof.python25 that 
+      define all and any.
+      OD: I think we should have something like pylearn.compat.{all,any}.
+          numpy.{all,any} are meant to be used on arrays only.
+      OD: As agreed during committee's meeting, we will use
+          theano.gof.python25
+
+    * Do not use the `hashlib` module (not supported in Python 2.4).
+      You can do as in theano.gof.cc:
+      ..code::
+	if sys.version_info[:2] >= (2,5):
+	    import hashlib
+    	    def hash_from_code(msg):
+                return hashlib.md5(msg).hexdigest()
+        else:
+	    import md5
+	    def hash_from_code(msg):
+	        return md5.new(msg).hexdigest()
+    OD: Yep, we could probably come up with such a wrapper in a pylearn.compat
+        module.
+
+    * Do not use mutable arguments as default values. Instead, use a helper
+      function:
+      Yes:
+        def f(array=None):
+            array = pylearn.if_none(array, [])
+      No:
+        def f(array=[]):
+            # Dangerous if `array` is modified down the road.
+
 Mercurial commits
 -----------------
 
    * How to write good commit messages?
+    OD: Check Django's guidelines (link above)
    * Standardize the merge commit text (what is the message from fetch?)
 
+During committee's meeting, Fred mentioned a bug with Assembla links for
+multi-line commits.
+
+Tools to help us out
+---------------------
+
+Dumi:
+
+  * pylint: highly configurable and very popular tool, similar in spirit to lint
+  for C. Can specify a config file, customize/disable warnings and errors, hook
+  it to vim/emacs and include coding style convensions in the check too. A nice
+  feature is that you can include a comment like "# pylint: disable-msg=C0103"
+  into a file and disable a message locally. This is nice and dangerous at the
+  same time. Another cool feature is incremental checking with caching of
+  results, which also allows tracking of progress.
+
+  * pyflakes: pylint alternative that is supposedly faster, but is I think more
+  limited in the number of things it is good at: "PyFlakes will tell you when
+  you have forgotten an import, mistyped a variable name, defined two functions
+  with the same name, shadowed a variable from another scope, imported a module
+  twice, or two different modules with the same name, and so on.". Most reviews
+  found online praise the speed, but note that pylint is clearly superior in
+  every other respect.
+
+  * pychecker: it actually *imports* each module (not sure if pylint does this).
+  It seems that pylint = pychecker + coding style and that pylint is more
+  popular.
+
+  * pep8: if all you care is about obeying PEP-8:
+  http://pypi.python.org/pypi/pep8 (includes the actual PEP-8 snippets with the
+  errors found, which is neat). Otherwise, pylint seems like a superset of this. 
+
+  * http://www.doughellmann.com/articles/pythonmagazine/completely-different/2008-03-linters/index.html
+  - article from 2008 comparing pylint, pychecker, and pyflakes. The conclusion
+    is to use pylint, more or less.
+ 
+I say we stick with pylint for now as it provides a great degree of flexibility
+in a single mature package.
+
+  * vim + pylint: http://www.vim.org/scripts/script.php?script_id=891
+  * emcas + pylint: http://www.emacswiki.org/emacs/PythonProgrammingInEmacs#toc5
+
+Automating and enforcing coding style
+-------------------------------------
+
+Ideally, we would like to have a uniform approach to this, where everyone tests
+against the same tool(s) and uses the same list of disabled warnings etc.
+
+Dumi: there are several ways of approaching this, independently of the tools used:
+
+   * Create a precommit hook for mercurial, which runs the tool(s) of choice and
+   generates warnings or aborts the commit process. This hook is a simple Python
+   module (well, as simple as we want it to be), which we can include into
+   everyone's hgrc, in the precommit.pylint variable, for instance. An example
+   is http://github.com/jrburke/dvcs_jslint/blob/master/dvcs_jslint.js. The
+   advantage of this approach is that the load is distributed and
+   errors/warnings are caught client-side, before the commit.
+
+   * Another client-side option is to have editor plugins for the various style
+   checkers: vim and emacs can access pylint pretty easily for instance.
+
+   * Instead of doing this client-side, one can do things server-side. On
+   Assembla, this means using their Webhooks
+   (http://www.assembla.com/spaces/demostuff/webhook_tool), since HTTP-based
+   hooks that we would need to tie with our buildbot server (whichever server we
+   choose that to be).
+
+   * I (DE) prefer starting with the client-side approach, as it is easier to
+   implement, has no single point of failure and is deployable fast. We could
+   have a "batch" script that runs our lint tools in conjunction with hg
+   annotate and sends hate-mail once a week to offenders who have somehow
+   slipped things through the cracks. Also on the server-side we could run
+   time-consuming checking (though how such checks would differ from tests is
+   unclear).
+
+Note that:
+
+  * I haven't found anything ready-made online, so we need to write these
+  hooks ourselves.
+  * I think we should make it so that it is not possible to commit things if
+  pylint reports an actual error.
+
+Type checking
+-------------
+
+(Suggested by Francois Savard)
+
+vu que vous êtes en train de vous occuper de l'aspect coding style, je
+mentionne ceci, à faire ce que vous en voulez: j'aime bien éviter des
+erreurs sur l'ordre de mes paramètres, sur les assumptions sur les
+paramètres etc. en faisant des argument check. Ça remplace un peu le
+static type checking des langages genre Java.
+
+En Python y'a une façon élégante de définir ses propres typecheckers,
+value checkers etc. et ensuite les passer en paramètre à un décorateur de
+fonction:
+
+http://code.activestate.com/recipes/454322-type-checking-decorator/
+
+(Juste un exemple, vu que les checks peuvent être plus élaborés, inclure
+des value checks (>0 etc.), être flexibles pour ne pas demander que ce
+soit un type fixe mais plutôt que ça réponde à certaines contraintes (que
+ça "ressemble" à un float, p. ex.). J'avais développé une lib pour faire
+qqch du genre en Javascript).
+
+Je ne sais pas si vous comptiez parler de ça, et si ça vaut la peine, mais
+personnellement je préfère du code à des commentaires qui peuvent être out
+of sync avec le contenu d'une méthode. Si vous croyez que ça vaut la peine,
+vous pourriez p-e définir des type/value-checkers standards pour éviter que
+tout le monde redéfinissent les siens à sa façon.
+
+OD: This was discussed in committee's meeting. We agreed to provide ways to do
+this, but not to enforce its usage.
+
+Consistent inf / nan
+--------------------
+
+OD: Use numpy.inf and numpy.nan rather than float('inf') / float('nan')?
+(should be slightly more efficient even if efficiency usually doesn't matter
+here - the main goal would be for everyone to use the same inf / nan to make
+the code consistent).
+OD: Approved during committee's meeting.
+
+Enforcing strict testing policy
+-------------------------------
+
+The `coverage` third-party module provides a way to gather code coverage
+statistics in the test suite. `nosetests` has a plugin that can be activated
+with the --with-coverage option to use this module.
+It is possible to know which lines specifically lack coverage. However, we
+will probably want to post-process this data to do more than a simple report
+(which noone will care about). This could be done either by parsing nosetests'
+coverage output, or modifying its coverage plugin, or writing our own version
+of it. The main goal would be to identify who is responsible for writing lines
+that are not currently covered (using 'hg annotate'), in order to send email
+notifications.
+
+We should aim at 100% code coverage in tests. This is realistic because
+`coverage` offers ways to ignore coverage for lines we explicitely do not want
+to cover (typically debug code, or AssertionError / NotImplementedError that
+are not supposed to be triggered during normal usage).
+We may need to do some advanced processing though to e.g. collect results from
+multiple build bots, if for instance some bot is running tests without GPU
+support, and another one is taking care of the GPU tests.
+
+Code that should be tested but for which no test is currently written would
+also require some decorator / helper function that would trigger a warning at
+run-time (only once / execution). This could be enforced by adopting a
+different policy about lack-of-coverage notification emails, depending on
+whether or not the warning is present:
+- if there is no warning, daily email notification (ADD A WARNING!!!)
+- if there is a warning, weekly email notification (ADD A TEST!!!)
+
+Meeting 2010/09/16
+------------------
+
+Tasks to be performed by tomorrow:
+    * OD:
+        * Write down summary of Python coding style recommendations
+        * Start a file that showcases those guidelines
+    * DWF:
+        * Look into recommendations on how to document a class, method, ...
+        * Write recommendations on when to use logging vs. warning
+        * Make public some configuration files / plugins for vim
+        * Come up with official common file header (license in particular)
+
+Suggested per-file boilerplate
+------------------------------
+
+"""Module docstring as the first line, as usual."""
+
+__authors__ = "Olivier Delalleau, Frederic Bastien, David Warde-Farley"
+__copyright__ = "(c) 2010, Université de Montréal"
+__license__ = "3-clause BSD License"
+__contact__ = "Name Of Current Guardian of this file <email@address>"
+
+We could also pull Mercurial revision info and put it in __version__, this
+seems to be common.
+
+Editor setup
+------------
+
+(DWF:) Some enhanced configuration files for Vim that I've put a little bit
+of work into modifying in some cases can be found at:
+
+http://www.iro.umontreal.ca/~lisa/twiki/bin/view.cgi/Divers/VimPythonRecommendations
--- a/doc/v2_planning/dataset.txt	Fri Sep 17 12:01:12 2010 -0400
+++ b/doc/v2_planning/dataset.txt	Fri Sep 17 12:01:32 2010 -0400
@@ -175,6 +175,22 @@
 COMMENTS
 ~~~~~~~~
 
+JB asks:  How about asking datasets to also provide a visualization mechanism
+for showing / playing individual examples  from the dataset, but also other
+external objects that are similar to dataset examples (e.g. filters from a
+weight matrix that filters images).  This doesn't have to be complicated, and it
+can be shared between datasets that exist in one modality (e.g. image datasets
+can all use an image-rending method)
+
+OD replies: Besides being able to display data without prior knowledge of the
+kind of data inside a dataset, is there any reason to put this within the
+dataset class? If not, it seems to me it may be more appropriate to have a way
+for the dataset to describe the kind of data it holds, and keep the
+visualization code separate from the dataset itself. It would make it easier
+in particular to try different visualization systems, and description of the
+data may turn out to be useful for other reasons (however, it also means we'd
+need to come up with a good way to describe data, which could prove
+difficult).
 
 JB asks: What may be passed as argument to the functions in Dataset, and what
 can be expected in return?  Are there side effects (e.g. on the state of the
@@ -258,6 +274,11 @@
 use numpy arrays (for numeric data) or lists (for anything else) to store
 mini-batches' data. So I vote for 'no'.
 
+YB: I agree that a mini-batch should definitely be safely assumed
+to fit in memory. That makes it at least in principle semantically
+different from a dataset. But barring that restriction, it might
+share of the properties of a dataset.
+
 A dataset is a learner
 ~~~~~~~~~~~~~~~~~~~~~~
 
@@ -324,3 +345,55 @@
 understanding of it, but my feeling is that you need your learner to be
 written in a specific way to achieve this, in which case it may be up to the
 learner to take its input data and store it into a shared variable.
+
+RP comment: Yes, the dataset object alone can not handle this, the issue is somewhere 
+between the dataset and the learner. Or in other words, everytime you change
+the data you need to recompile your theano function. So the learner can not
+only get data from the dataset, it needs to get a shared variable. The learner
+should also be aware when the dataset is changed, to recompile its internal 
+functions. I'm not sure which is the best wa to do this. My personal feeling
+is that the dataset should be part of the learner. The lerner should provide
+a function use_dataset ( or replace_dataset). When this function is called,
+all the theano functions in the learner get recompiled based on shared
+variables that the dataset object provides. It sort of fits very well in the 
+framework that I have in mind, which was spattered around in the learner.txt
+and some of my previous emails. I think it shares a lot with James concepts, 
+since it follows quite closely the concepts behind Theano.
+
+OD asks: Ok, so why would the dataset have to be responsible for providing a
+shared variable? Why wouldn't the learner just create this shared variable
+internally and copy into it the data provided by the dataset?
+
+RP replies: Sure, the learner could take care of all this. Note though that the
+learner should take care to divide the dataset into chunks that fit in the 
+GPU memory ( in case of a large dataset) and then take care of updating the 
+shared variables acording to the current chunk. Personally I feel like all
+this data division, management and so on should be done by the dataset. 
+It feels more natural that way. For example assume you have a dataset that
+is composed of a time series and some static data ( carre-tech heart beat
+data is a good example). The static data is small enough so that you could 
+always store on the GPU, and you would only need to split the time series. 
+For the learner to do this ( since it gets the same interface from any 
+dataset object) would be like and if <this case> then, while for the 
+dataset is just a different class. But I'm happy to have all this GPU stuff
+send to the learner as well if everybody else believe that is better. 
+
+FB comment: I don't understand why you would need to recompile the theano function.
+Their is 2 cases, the data is in a shared variable. You can directly change the data
+in the shared variable without recompiling the theano fct. The second case is when 
+the dataset is in an ordinary theano variable. In that case, the first step in the 
+theano fct will be to transfer the dataset to the gpu before computation. If the data
+change at each call, that will be as efficient as changing the data manually every time
+in the shared variable.
+
+AB: I have an idea about this which kind of fits in the "building a
+theano op" thing that we talked about at the last meeting.
+
+We can just build a theano Op that wraps dataset objects and takes
+care of the details of tranferring data to the GPU or otherwise.
+
+I have a prototype interface/implemantation in the shared_dataset.py
+file in this directory.
+
+OD: I like AB's approach.
+
--- a/doc/v2_planning/main_plan.txt	Fri Sep 17 12:01:12 2010 -0400
+++ b/doc/v2_planning/main_plan.txt	Fri Sep 17 12:01:32 2010 -0400
@@ -235,7 +235,7 @@
 separate file.
 
 Indexing Convention
-~~~~~~~~~~~~~~~~~~~
+===================
 
 Something to decide on - Fortran-style or C-style indexing.  Although we have
 often used c-style indexing in the past (for efficiency in c!) this is no
--- a/doc/v2_planning/optimization.txt	Fri Sep 17 12:01:12 2010 -0400
+++ b/doc/v2_planning/optimization.txt	Fri Sep 17 12:01:32 2010 -0400
@@ -46,10 +46,82 @@
 
 
 
-Proposal for API
-================
+Discussion
+==========
+
+OD asks: Could it be more convenient for x0 to be a list?
+ 
+JB replies: Yes, but that's not the interface used by other minimize()
+routines (e.g. in scipy).  Maybe another list-based interface is required?
+
+OD replies: I think most people would prefer to use a list-based interface, so
+    they don't have to manually pack / unpack multiple arrrays of parameters. So I
+    would vote in favor or having both (where the main reason to also provide a
+    non-list interface would be to allow one to easily switch e.g. to scipy's
+    minimize). 
+    I would guess the reason scipy's interface is like this is because it makes
+    it easier for the optimization algorithm. However, this does not really
+    matter if we are just wrapping a theano-based algorithm (that already has
+    to handle multiple parameters), and avoiding useless data copies on each call
+    to f / df can only help speed-wise.
+JB replies: Done, I added possibility that x0 is list of ndarrays to the api
+doc.
+
+
+
+OD asks: Why make a difference between iterative and one-shot versions? A one-shot
+    algorithm can be seen as an iterative one that stops after its first
+    iteration. The difference I see between the two interfaces proposed here
+    is mostly that one relies on Theano while the other one does not, but
+    hopefully a non-Theano one can be created by simply wrapping around the
+    Theano one.
+
+JB replies: Right, it would make more sense to distinguish them by the fact that
+one works on Theano objects, and the other on general Python callable functions.
+There is room for an iterative numpy interface, but I didn't make it yet.  Would
+that answer your question?
+
+OD replies and asks: Partly. Do we really need a non-iterative interface?
+
+OD: I wish we could get closer to each other the Theano and Numpy interfaces.
+It would be nice if we could do something like:
 
-See api_optimization.txt.
+    # Theano version.
+    updates = sgd([p], gradients=[g], stop=stop, step_size=.1)
+    sgd_step = theano.function([input_var, target_var], [], updates=updates)
+    while not stop.value:
+        input, target = training_iter.next()
+        sgd_step(input, target)
+
+    # Numpy version (you can replace *.value by regular numpy arrays).
+    sgd_step = sgd([p.value], gradients=g_func, stop=stop.value, step_size=.1)
+    while not stop.value:
+        input, target = training_iter.next()
+        sgd_step(input, target)
+
+where sgd would look something like:
 
-OD: Do we really need a different file? If yes, maybe create a subdirectory to
-    be able to easily find all files related to optimization?
+    class sgd(...):
+        def __init__(self, parameters, cost=None, gradients=None, stop=None,
+                     step_size=None):
+            # Allow for extra arguments to be provided in self.__call__, that
+            # are forwarded to the underlying gradients function.
+            self.gradients = lambda *lst, **kw: gradients(*(parameters + lst),
+                                                          **kw)
+            ...
+
+        def __call__(*lst, **kw):
+            grads = self.gradients(*lst, **kw)
+            for param, grad in izip(self.parameters, grads):
+                param -= self.step_size * grad
+
+Then a wrapper to provide a scipy-like interface could be:
+
+    def minimize(x0, f, df, algo, **kw):
+        stop = numpy.array(0, dtype=numpy.int8)
+        algo_step = eval(algo)([x0], cost=f, gradients=lambda x: (df(x), ),
+                               stop=stop, **kw)
+        while not stop:
+            algo_step()
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/v2_planning/plugin.py	Fri Sep 17 12:01:32 2010 -0400
@@ -0,0 +1,314 @@
+
+import time
+from collections import defaultdict, deque
+from copy import copy
+
+inf = float('inf')
+
+#############
+### EVENT ###
+#############
+
+class Event(object):
+
+    def __init__(self, type, **attributes):
+        self.type = type
+        self.__dict__.update(attributes)
+        self.attributes = dict(type = type, **attributes)
+
+    def match(self, other):
+        if isinstance(other, Matcher):
+            return other(self)
+        else:
+            oattr = other.attributes
+            for k, v in self.attributes.iteritems():
+                if k in oattr:
+                    v2 = oattr[k]
+                    if isinstance(v2, Matcher):
+                        if not v2(v): return False
+                    else:
+                        if v != v2: return False
+            return True
+
+    def __str__(self):
+        return "Event(%s)" % ", ".join("%s=%s" % (k, v) for k, v in self.attributes.iteritems())
+
+class Matcher(object):
+
+    def __call__(self, object):
+        raise NotImplementedError("Implement this!")
+
+class FnMatcher(Matcher):
+
+    def __init__(self, function):
+        self.function = function
+
+    def __call__(self, object):
+        return self.function(object)
+
+all_events = FnMatcher(lambda _: True)
+
+
+
+################
+### SCHEDULE ###
+################
+
+class Schedule(Matcher):
+    def __add__(self, i):
+        return OffsetSchedule(self, i)
+    def __or__(self, s):
+        return UnionSchedule(self, to_schedule(s))
+    def __and__(self, s):
+        return IntersectionSchedule(self, to_schedule(s))
+    def __sub__(self, i):
+        return OffsetSchedule(self, -i)
+    def __ror__(self, s):
+        return UnionSchedule(to_schedule(s), self)
+    def __rand__(self, s):
+        return IntersectionSchedule(to_schedule(s), self)
+    def __invert__(self):
+        return NegatedSchedule(self)
+
+def to_schedule(x):
+    if x in (None, False):
+        return never
+    if x is True:
+        return always
+    elif isinstance(x, (list, tuple)):
+        return reduce(UnionSchedule, x)
+    else:
+        return x
+
+
+class ScheduleMix(Schedule):
+    __n__ = None
+    def __init__(self, *subschedules):
+        assert (not self.__n__) or len(subschedules) == self.__n__
+        self.subschedules = map(to_schedule, subschedules)
+
+class UnionSchedule(ScheduleMix):
+    def __call__(self, time):
+        return any(s(time) for s in self.subschedules)
+
+class IntersectionSchedule(ScheduleMix):
+    def __call__(self, time):
+        return all(s(time) for s in self.subschedules)
+
+class DifferenceSchedule(ScheduleMix):
+    __n__ = 2
+    def __call__(self, time):
+        return self.subschedules[0](time) and not self.subschedules[1](time)
+
+class NegatedSchedule(ScheduleMix):
+    __n__ = 1
+    def __call__(self, time):
+        return not self.subschedules[0](time)
+
+class OffsetSchedule(Schedule):
+    def __init__(self, schedule, offset):
+        self.schedule = schedule
+        self.offset = offset
+    def __call__(self, time):
+        if isinstance(time, int):
+            return self.schedule(time - self.offset)
+        else:
+            t1, t2 = time
+            return self.schedule((t1 - self.offset, t2 - self.offset))
+
+
+class AlwaysSchedule(Schedule):
+    def __call__(self, time):
+        return True
+
+always = AlwaysSchedule()
+never = ~always
+
+class IntervalSchedule(Schedule):
+    def __init__(self, step, repeat = inf):
+        self.step = step
+        self.upper_bound = step * (repeat - 1)
+    def __call__(self, time):
+        if isinstance(time, int):
+            if time < 0 or time > self.upper_bound:
+                return False
+            return time % self.step == 0
+        else:
+            t1, t2 = time
+            if t2 < 0 or t1 > self.upper_bound:
+                return False
+            diff = t2 - t1
+            t1m = t1 % self.step
+            t2m = t2 % self.step
+            return (diff >= self.step
+                    or t1m == 0
+                    or t2m == 0
+                    or t1m > t2m)
+
+each = lambda step, repeat = inf: each0(step, repeat) + step
+each0 = IntervalSchedule
+
+
+class RangeSchedule(Schedule):
+    def __init__(self, low = None, high = None):
+        self.low = low or -inf
+        self.high = high or inf
+    def __call__(self, time):
+        if isinstance(time, int):
+            return self.low <= time <= self.high
+        else:
+            t1, t2 = time
+            return self.low <= t1 <= self.high \
+                or self.low <= t2 <= self.high
+
+inrange = RangeSchedule    
+
+
+class ListSchedule(Schedule):
+    def __init__(self, *schedules):
+        self.schedules = schedules
+    def __call__(self, time):
+        if isinstance(time, int):
+            return time in self.schedules
+        else:
+            for t in self.schedules:
+                if t1 <= t <= t2:
+                    return True
+        return False
+
+at = ListSchedule
+
+
+##############
+### PLUGIN ###
+##############
+
+class Plugin(object):
+
+    def attach(self, scheduler):
+        c = copy(self)
+        c.scheduler = scheduler
+        return c
+
+    def __call__(self, event):
+        raise NotImplementedError("Implement this!")
+
+    def fire(self, type, **attributes):
+        event = Event(type, issuer = self, **attributes)
+        self.scheduler.queue(event)
+
+class FnPlugin(Plugin):
+
+    def __init__(self, function):
+        self.function = function
+
+    def __call__(self, event):
+        return self.function(self, event)
+
+class DispatchPlugin(Plugin):
+
+    def __call__(self, event):
+        getattr(self, "on_" + event.type, self.generic)(event)
+
+    def generic(self, event):
+        return
+
+
+#################
+### SCHEDULER ###
+#################
+
+class Scheduler(object):
+
+    def __init__(self):
+        self.plugins = []
+        self.categorized = defaultdict(list)
+        self.event_queue = deque()
+
+    def __call__(self):
+        i = 0
+        evq = self.event_queue
+        self.queue(Event("begin", issuer = self))
+        while True:
+            self.queue(Event("tick", issuer = self, time = i))
+            while evq:
+                event = evq.popleft()
+                candidates = self.categorized[event.type] + self.categorized[None]
+                for event_template, plugin in candidates:
+                    if event.match(event_template):
+                        plugin(event) # note: the plugin might queue more events
+                if event.type == "terminate":
+                    return
+            i += 1
+
+    def schedule_plugin(self, event_template, plugin):
+        plugin = plugin.attach(self)
+        if isinstance(event_template, Matcher) or isinstance(event_template.type, Matcher):
+            # These plugins may execute upon any event type
+            self.categorized[None].append((event_template, plugin))
+        else:
+            self.categorized[event_template.type].append((event_template, plugin))
+        self.plugins.append((event_template, plugin))
+
+    def queue(self, event):
+        self.event_queue.append(event)
+
+
+
+
+@FnPlugin
+def printer(self, event):
+    print event
+
+@FnPlugin
+def stopper(self, event):
+    self.fire("terminate")
+
+@FnPlugin
+def byebye(self, event):
+    print "bye bye!"
+
+
+@FnPlugin
+def waiter(self, event):
+    time.sleep(0.1)
+
+# @FnPlugin
+# def timer(self, event):
+#     if not hasattr(self, 'previous'):
+#         self.beginning = time.time()
+#         self.previous = 0
+#     now = time.time() - self.beginning
+#     inow = int(now)
+#     if inow > self.previous:
+#         self.fire("second", time = inow)
+#     self.previous = now
+
+class Timer(DispatchPlugin):
+
+    def on_begin(self, event):
+        self.beginning = time.time()
+        self.previous = 0
+
+    def on_tick(self, event):
+        now = time.time() - self.beginning
+        inow = int(now)
+        if inow > self.previous:
+            self.fire("second", time = inow)
+        self.previous = now
+
+
+
+sch = Scheduler()
+
+
+sch.schedule_plugin(all_events, Timer())
+sch.schedule_plugin(Event("tick"), waiter) # this means: execute the waiter plugin (a delay) on every "tick" event. Is it confusing to use Event(...)?
+sch.schedule_plugin(Event("second"), printer)
+
+# sch.schedule_plugin(all_events, printer)
+
+sch.schedule_plugin(Event("tick", time = at(100)), stopper)
+sch.schedule_plugin(Event("terminate"), byebye)
+
+sch()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/v2_planning/plugin.txt	Fri Sep 17 12:01:32 2010 -0400
@@ -0,0 +1,181 @@
+
+======================================
+Plugin system for iterative algorithms
+======================================
+
+I would like to propose a plugin system for iterative algorithms in
+Pylearn. Basically, it would be useful to be able to sandwich
+arbitrary behavior in-between two training iterations of an algorithm
+(whenever applicable). I believe many mechanisms are best implemented
+this way: early stopping, saving checkpoints, tracking statistics,
+real time visualization, remote control of the process, or even
+interlacing the training of several models and making them interact
+with each other.
+
+So here is the proposal: essentially, a plugin would be a (schedule,
+timeline, function) tuple.
+
+Schedule
+========
+
+The schedule is some function that takes two "times", t1 and t2, and
+returns True if the plugin should be run in-between these times. The
+indices refer to a "timeline" unit described below (e.g. "real time" or
+"iterations"). The reason why we check a time range [t1, t2] rather than
+some discrete time t is that we do not necessarily want to schedule plugins
+on iteration numbers. For instance, we could want to run a plugin every
+second, or every minute, and then [t1, t2] would be the start time and end
+time of the last iteration - and then we run the plugin whenever a new
+second started in that range (but still on training iteration
+boundaries). Alternatively, we could want to run a plugin every n examples
+seen - but if we use mini-batches, the nth example might be square in the
+middle of a batch.
+
+I've implemented a somewhat elaborate schedule system. `each(10)`
+produces a schedule that returns true whenever a multiple of 10 is in
+the time range. `at(17, 153)` produces one that returns true when 17
+or 143 is in the time range. Schedules can be combined and negated,
+e.g. `each(10) & ~at(20, 30)` (execute at each 10, except at 20 and
+30). So that gives a lot of flexibility as to when you want to do
+things.
+
+Timeline
+========
+
+This would be a string indicating on what "timeline" the schedule is
+supposed to operate. For instance, there could be a "real time"
+timeline, an "algorithm time" timeline, an "iterations" timeline, a
+"number of examples" timeline, and so on. This means you can schedule
+some action to be executed every actual second, or every second of
+training time (ignoring time spent executing plugins), or every
+discrete iteration, or every n examples processed. This might be a
+bloat feature (it was an afterthought to my original design, anyway),
+but I think that there are circumstances where each of these options
+is the best one.
+
+Function
+========
+
+The plugin function would receive some object containing the time
+range, a flag indicating whether the training has started, a flag
+indicating whether the training is done (which they can set in order
+to stop training), as well as anything pertinent about the model.
+
+Implementation
+==============
+
+I have implemented the feature in plugin.py, in this directory. Simply
+run python plugin.py to test it.
+
+
+
+===============
+Revised version
+===============
+
+Taking into account ideas thrown around during the September 16
+meeting I (OB) have made the following modifications to my original
+proposal:
+
+Event objects
+=============
+
+In the revised framework, an Event is a generic object which can
+contain any attributes you want, with one privileged attribute, the
+'type' attribute, which is a string. I expect the following attributes
+to be used widely:
+
+* type: this is a string describing the abstract semantics of this
+  event ("tick", "second", "millisecond", "batch", etc.)
+
+* issuer: a pointer to the plugin that issued this event. This allows
+  for fine grained filtering in the case where several plugins can
+  fire the same event type
+
+* time: an integer or float index on an abstract timeline. For
+  instance, the "tick" event would have a "time" field, which would be
+  increased by one every time the event is fired. Pretty much all
+  recurrent events should include this.
+
+* data: some data associated to the event. presumably it doesn't have
+  to be named "data", and more than one data field could be given.
+
+The basic idea is that it should be possible to say: "I want this
+plugin to be executed every tenth time an event of this type is fired
+by this plugin", or any subset of these conditions.
+
+Matching events
+===============
+
+When registering a plugin, you specify a sort of "abstract event" that
+an event must "match" in order to be fed to the plugin. This can be
+done by simply instantiating an event with the fields you want to
+match. I think examples would explain best my idea
+(sch.schedule_plugin = add a plugin to the scheduler):
+
+# Print the error on every parameter update (learner given in the event)
+sch.schedule_plugin(Event("parameter_update"), PrintError())
+# Print the reconstruction error of daa0 whenever it does a parameter update
+sch.schedule_plugin(Event("parameter_update", issuer = daa0), PrintReconstructionError())
+# Save the learner every 10 minutes
+sch.schedule_plugin(Event("minute", time = each(10)), Save(learner))
+
+The events given as first argument to schedule_plugin are not real
+events: they are "template events" meant to be *matched* against the
+real events that will be fired. If the terminology is confusing, it
+would not be a problem to use another class with a better name (for
+example, On("minute", time = each(10)) could be clearer than
+Event(...), I don't know).
+
+Note that fields in these Event objects can be a special kind of
+object, a Matcher, which allows to filter events based on arbitrary
+conditions. My Schedule objects (each, at, etc.) now inherit from
+Matcher. You could easily have a matcher that allows you to match
+issuers that are instances of a certain class, or matches every single
+event (I have an example of the latter in plugin.py).
+
+Plugins
+=======
+
+The plugin class would have the following methods:
+
+* attach(scheduler): tell the plugin that it is being scheduled by the
+  scheduler, store the scheduler in self. The method can return self,
+  or a copy of itself.
+
+* fire(type, **attributes): adds Event(type, issuer = self, **attributes)
+  to the event queue of self.scheduler
+
+Scheduler
+=========
+
+A Scheduler would have a schedule_plugin(event_template, plugin)
+method to add plugins, a queue(event) method to queue a new event, and
+it would be callable.
+
+My current version proceeds as follows:
+
+* Fire Event("begin"). Somewhat equivalent to "tick" at time 0, but I
+  find it cleaner to have a special event to mark the beginning of the
+  event loop.
+* Infinite loop
+  * Fire Event("tick", time = <iteration#>)
+  * Loop until the queue is empty
+    * Pop event, execute all plugins that respond to it
+    * Check if event.type == "terminate". If so, stop.
+
+Varia
+=====
+
+I've made a very simple implementation of a DispatchPlugin which, upon
+reception of an event, dispatches it to its "on_<event.type>" method
+(or calls a fallback). It seems nice. However, in order for it to work
+reliably, it has to be registered on all events, and I'm not sure it
+can scale well to more complex problems where the source of events is
+important.
+
+Implementation
+==============
+
+See plugin.py.
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/v2_planning/plugin_RP.py	Fri Sep 17 12:01:32 2010 -0400
@@ -0,0 +1,161 @@
+'''
+=================================================
+Plugin system for interative algortithm Version B
+=================================================
+
+After the meeting (September 16) we sort of stumbled on 
+two possible versions of the plug-in system. This represents
+the second version. It suffered a few changes after seeing 
+Olivier's code and talking to him.
+
+Concept
+=======
+
+The basic idea behind this version is not to have a list of all 
+possible events, but rather have plugin register to events.By 
+specifying what plugin listens to which event produced by what 
+plugin you define a sort of dependency graph. Structuring things
+in such a graph might make the script more intuitive when reading.
+
+I will first go through pseudo-code for two example and then enumerate
+my insights and concepts on the matter
+
+
+Example : Producer - Consumer that Guillaume described
+======================================================
+
+
+.. code-block::
+'''
+    sch = Schedular()
+
+    @FnPlugin(sch)
+    def producer(self,event):
+        self.fire('stuff', value = 'some text')
+
+    @FnPlugin(sch)
+    def consumer(self,event):
+        print event.value
+
+    @FnPlugin(sch)
+    def prod_consumer(self,event):
+        print event.value
+        self.fire('stuff2', value = 'stuff')
+
+    producer.act( on = Event('begin'), when = once() )
+    producer.act( on = Event('stuff'), when = always() )
+    consumer.act( on = Event('stuff'), when = always() )
+    prod_consumer.act( on = Event('stuff'), when = always() )
+
+    sch.run()
+
+
+
+'''
+Example : Logistic regression
+=============================
+
+Task description
+----------------
+
+Apply a logistic regression network to some dataset. Use early stopping.
+Save the weights everytime a new best score is obtained. Print trainnig score 
+after each epoch.
+
+
+Possible script
+---------------
+
+Notes : This would look the same for any other architecture that does not
+imply pre-training ( i.e. deep networks). For example the mlp.
+
+.. code-block::
+'''
+
+sched = Schedular()
+
+# Data / Model Building : 
+# I skiped over how to design this part
+# though I have some ideas
+real_train_data, real_valid_data = load_mnist()
+model = logreg()
+
+# Main Plugins ( already provided in the library ); 
+# This wrappers also registers the plugin
+train_data = create_data_plugin( sched, data = real_train_data)
+valid_data = create_data_plugin( sched, data = real_valid_data)
+train_model    = create_train_model(sched, model = model)
+validate_model = create_valid_model(sched, model = model, data = valid_data)
+early_stopper  = create_early_stopper(sched)
+
+
+# On the fly plugins ( print random stuff); the main difference from my 
+# FnPlugin from Olivier's version is that it also register the plugin in sched
+@FnPlugin(sched)
+def print_error(self, event):
+    if event.type == Event('begin'):
+        self.value = []
+    elif event.type == train_model.error():
+        self.value += [event.value]
+    else event.type == train_data.eod():
+        print 'Error :', numpy.mean(self.value)
+
+@FnPlugin(sched)
+def save_model(self, event):
+    if event.type == early_stopper.new_best_error():
+        cPickle.dump(model.parameters(), open('best_params.pkl','wb'))
+
+
+# Create the dependency graph describing what does what 
+train_model.act(on = train_data.batch(), when = always())
+validate_model.act(on = train_model.done(), when = every(n=10000)) 
+early_stopper.act(on = validate_model.error(), when = always())
+print_error.act( on = train_model.error(), when = always() )
+print_error.act( on = train_data.eod(), when = always() )
+save_model.act( on = eraly_stopper.new_best_errot(), when = always() )
+
+# Run the entire thing
+sched.run()
+
+
+'''
+Notes
+=====
+
+ * I think we should have a FnPlugin decorator ( exactly like Olivier's) just
+ that also attaches the new created plugin to the schedule. This way you 
+ can create plugin on the fly ( as long as they are simple functions that
+ print stuff, or compute simple statitics ).
+ * I added a method act to a Plugin. You use that to create the dependency
+ graph ( it could also be named listen to be more plugin like interface)
+ * Plugins are obtained in 3 ways  :
+     - by wrapping a dataset / model or something similar
+     - by a function that constructs it from nothing
+     - by decorating a function
+   In all cases I would suggest then when creating them you should provide
+   the schedular as well, and the constructor also registers the plugin
+
+ * The plugin concept works well as long as the plugins are a bit towards
+ heavy duty computation, disregarding printing plugins and such. If you have
+ many small plugins this system might only introduce an overhead. I would 
+ argue that using theano is restricted to each plugin. Therefore I would
+ strongly suggest that the architecture to be done outside the schedular
+ with a different approach.
+
+ * I would suggest that the framework to be used only for the training loop
+ (after you get the adapt function, compute error function) so is more about
+ the meta-learner, hyper-learner learner level.
+
+ * A general remark that I guess everyone will agree on. We should make 
+ sure that implementing a new plugin is as easy/simple as possible. We 
+ have to hide all the complexity in the schedular ( it is the part of the 
+ code we will not need or we would rarely need to work on). 
+
+ * I have not went into how to implement the different components, but 
+ following Olivier's code I think that part would be more or less straight
+ forward. 
+
+ '''
+
+
+'''
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/v2_planning/plugin_architecture_GD.txt	Fri Sep 17 12:01:32 2010 -0400
@@ -0,0 +1,267 @@
+Overview
+========
+
+The "central authority" (CA) is the glue which takes care of interfacing plugins
+with one another. It has 3 basic roles:
+* it maintains a list of "registered" or "active" plugins
+* it receives and queues the various messages sent by the plugins
+* dispatches the messages to the recipient, based on various "events"
+
+Events can take different forms:
+* the CA can trigger various events based on running time
+* can be linked to messages emitted by the various plugins. Events can be
+  triggered based on the frequency of such messages.
+* Once an event is triggered, it is relayed to the appropriate "recipient
+  plugin(s)"
+
+It is the responsibility of each plugin to inform the CA of which "events" it
+cares about.
+
+
+Generic Pseudo-code
+===================
+
+I'll try to write this in pseudo-python as best I can. I'll do this in
+traditional OOP, as this is what I'm more comfortable with. I'll leave it up to
+James and OB to python-ize this :)
+
+
+class MessageX(Message):
+    """
+    A message is basically a data container. This could very well be replaced by
+    a generic Python object.
+    """
+
+class Plugin(object):
+    """
+    The base plugin object doesn't do much. It contains a reference to the CA
+    (upon plugin being registered with the CA), provides boilerplate code
+    for storing which "events" this plugin is susceptible to, as well as code
+    for registering callback functions for the various messages.
+    """
+    
+    CA = None        # to be initialized upon plugin registration
+    active_msg = {}  # dictionary of messages this plugin is susceptible to
+    callbacks  = {}  # mapping of message class names --> callback function
+
+    def listen(msg_class, interval):
+        """
+        :param msg_class: reference to the "message" class we are interested in.
+                          These messages will be forwarded to this plugin, when
+                          the trigger condition is met.
+        :param interval: integer. Forward the message to this plugin every 'interval'
+                         such messages.
+        """
+        self.active_msg[msg_class] = interval
+
+
+    def check_trigger(msg_class, time):
+        """
+        Checks whether or not the "trigger" condition associated with message of
+        class 'msg_class' is satisfied or not. This could be the default
+        behavior, and be overridden by the various plugins.
+        """
+        return time % self.active_msg[msg_class] == 0
+
+
+    def handler(msg_class, callback):
+        """
+        Decorator which registers a callback function for the given message
+        type.
+        
+        NOTE: I don't think what I wrote would work as a Python decorator. I am
+        not sure how to handle decoraters with multiple parameters (one
+        explicit, and the other as the reference to the function). I'm pretty
+        sure James or OB could figure it out though !
+        
+        :params msg_class: reference to the message class for which we are
+                           registering a callback function
+        :params callback : reference to which function to call for a given message
+        """
+
+        self.callbacks[msg_class] = callback
+
+
+    def execute(self, message):
+        """
+        Boiler-plate code which executes the right callback function, for the
+        given message type.
+        """
+        for (msg_class, callback) in self.callbacks.iteritems():
+            if message.__class__ == msg_class:
+                callback(message)
+
+
+class ProducerPlugin(Plugin):
+
+    def dostuff():
+        """
+        A typical "producer" plugin. It basically performs an arbitrary action
+        and asks the CA to forward the results (in the form of a message) to
+        other plugins.
+        """
+
+        # iteratively do stuff and relay messages to other plugins
+        while(condition):
+
+            msga =         # do something
+            ca.send(msga)  # ask CA to forward to other plugins
+
+
+class ConsumerPlugin(Plugin):
+
+    @handler(MessageA)
+    def func(msga):
+        """
+        A consumer or "passive plugin" (eg. logger, etc). This function is
+        register as being the callback function for Message A objects.
+        """
+        # do something with message A
+
+
+class ConsumerProducerPlugin(Plugin):
+
+    @handler(MessageA)
+    def func(msga):
+        """
+        Example of a consumer / producer plugin. It receives MessageA messages,
+        processes the data, then asks the CA to send a new message (MessageB) as
+        the result of its computation. The CA will automatically forward to all
+        interested parties.
+
+        :param msga: MessageA instance
+        """
+
+        data = dostuff(msga)   # process message
+        msgb = MessageB(data)  # generate new message for other plugins
+        ca.send(msgb)          # ask CA to forward to other plugins
+
+
+
+class CentralAuthority(object):
+
+    active_plugins = []  # contains a list of registered plugins
+
+    mailmain = {}        # dictionary which contains, for each message class, a
+                         # list of plugins interested in this message
+
+    event_count = {}     # dictionary of "event" counts for various messages
+
+    def register(plugin):
+        """
+        Registers the plugin and adds it as a listener for the various messages
+        it is interested in.
+        :param plugin: plugin instance which we want to "activate"
+        """
+    
+        # each plugin must have a reference to the CA
+        plugin.ca = self
+        
+        # maintain list of active plugins
+        active_plugins.append(plugin)
+
+        # remember which messages this plugin cares about
+        for msg in plugin.active_msg.keys():
+            self.mailman[msg].append(plugin)
+            self.event_count[msg] = 0
+
+    def send(msg):
+        """
+        This function relays the message to the appropriate plugins, based on
+        their "trigger" condition. It also keeps track of the number of times
+        this event was raised.
+
+        :param msg: message instance
+        """
+        
+        event_count[msg.__class__] += 1
+
+        # for all plugins interested in this message ...
+        for plugin in self.mailman[msg.__class__]:
+
+            # check if trigger condition is met
+            if plugin.check_trigger(msg, self.event_count[msg.__class__]):
+                
+                # have the plugin execute the message
+                plugin.execute(msg)
+
+
+    def run(self):
+        """
+        This would be the main loop of the program. I won't go into details
+        because its still somewhat blurry in my head :) But basically, the CA
+        could be configured to send out its own messages, independently from all
+        other plugins.
+        
+        These could be "synchronous" messages such as: "5 seconds have passed",
+        or others such as "save state we are about to get killed".
+       
+        NOTE: seems like this would almost have to live in its own thread ...
+        """
+
+        # the following would be parametrized obviously
+        while(True):
+            msg = ElapsedTimeMessage(5)
+            self.send(msg)
+            sleep(5)
+
+
+
+Putting it all-together
+=======================
+
+
+def main():
+
+    ca = CentralAuthority()
+
+    producer = ProducerPlugin()
+    ca.register(producer)
+
+    consumer = ConsumerPlugin()
+    consumer.listen(MessageB, 1)
+    ca.register(consumer))
+
+    other = ConsumerProducerPlugin()
+    other.listen(MessageB, 10)
+    ca.register(other)
+
+    # this is the function call which gets the ball rolling
+    producer.dostuff()
+
+
+DISCUSSION: blocking vs. non-blocking
+=====================================
+
+In the above example, I used "blocking" sends. However it is not-clear that this
+is the best option. 
+
+In the example, the producer basically acts as the main loop. It relinquishes
+control of the main loop when the CA decides to forward the message to other
+plugins. Control will only be returned once the cascade of send/receives
+initiated with MessageA is complete (all subplugins have processed MessageA and
+any messages sent as a side-effect have also been processed). 
+
+This definitely imposes constraints on what the plugins can do, and how they do
+it. For the type of single-processor / linear jobs we tend to run, this might be
+enough (??).
+
+The good news is that going forward, the above plugin architecture can also
+scale to distributed systems, by changing the sends to be non-blocking. Plugins
+could then live on different machines and process data as they see fit.
+Synchronization would be enforced by way of messages. In the above, the "main
+producer" would thus become a consumer/producer who listens for "done processing
+MessageA" messages and produces a new MessageA as a result.
+
+On single-processor systems, the synchronization overhead might be too costly
+however. That is something we would have to investigate. On the plus side
+however, our plugins would be "future proof" and lend themselves well to the
+type of "massively parallel jobs" we wish to run (i.e. meta-learners, etc.)
+
+
+
+Logistic Regression
+===================
+
+
+TO COME SOON (?)
--- a/doc/v2_planning/requirements.txt	Fri Sep 17 12:01:12 2010 -0400
+++ b/doc/v2_planning/requirements.txt	Fri Sep 17 12:01:32 2010 -0400
@@ -77,3 +77,12 @@
     hyper-parameters, and want to easily be able to recover the full "processing
     pipeline" that performs best, and use it on real/test data later.
 
+OD comments: Note that R9 and R13 may conflict with each other. Some
+optimizations performed by R9 may modify the input "symbolic graph" in such a
+way that extracting the required components for "production purpose" (R13)
+could be made more difficult (or even impossible). Imagine for instance that
+the graph is modified to take advantage of the fact that k-fold validation can
+be performed efficiently internally by some specific algorithm. Then it may
+not be obvious anymore how to remove the k-fold split in the saved model you
+want to use in production.
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/v2_planning/shared_dataset.py	Fri Sep 17 12:01:32 2010 -0400
@@ -0,0 +1,47 @@
+import theano
+
+# This is not final and may not even run for now.  It is just to give
+# a feeling of what the interface could look like.
+
+def shared_dataset(dataset, mem_size):
+    if dataset.total_size > mem_size:
+        return OnlineDataset(dataset)
+    else:
+        return MemoryDataset(dataset)
+
+class MemoryDataset(theano.Op):
+    def __init__(self, dataset):
+        self.input = theano.shared(dataset.input)
+        self.output = theano.shared(dataset.output)
+        self.batch_size = dataset.batch_size
+
+    def make_node(self, idx):
+        idx_ = theano.as_tensor_variable(idx)
+        return theano.Apply(self,
+                            inputs = [idx_],
+                            outputs = [self.input.type(), 
+                                       self.output.type()])
+
+    def preform(self, node, inputs, output_storage):
+        idx, = inputs
+        self.output_storage[0][0] = self.input[idx*self.batch_size:(idx+1)*self.batch_size]
+        self.output_storage[1][0] = self.output[idx*self.batch_size:(idx+1)*self.batch_size]
+
+class OnlineDataset(theano.Op):
+    def __init__(self, dataset):
+        self.dataset = dataset
+
+    def make_node(self, idx):
+        idx_ = theano.as_tensor_variable(idx)
+        return theano.Apply(self,
+                            inputs = [idx_],
+                            outputs = [theano.tensor.fmatrix(), 
+                                       theano.tensor.fmatrix()])
+                            # fix this so its not fmatrix(), 
+                            # but whatever the dataset outputs
+
+    def perform(self, node, inputs, output_storage):
+        idx, = inputs
+        b = self.dataset.get_batch(idx.value)
+        output_storage[0][0] = b.input
+        output_storage[1][0] = b.output
--- a/doc/v2_planning/use_cases.txt	Fri Sep 17 12:01:12 2010 -0400
+++ b/doc/v2_planning/use_cases.txt	Fri Sep 17 12:01:32 2010 -0400
@@ -66,6 +66,7 @@
             classification_accuracy(
                 examples=MNIST.validation_dataset,
                 function=as_classifier('learner_obj'))),
+
         step_fn = vm_lambda(('learner_obj',),
             sgd_step_fn(
                 parameters = vm_getattr('learner_obj', 'params'),
@@ -96,6 +97,29 @@
 - there are no APIs for things which are not passed as arguments (i.e. the logic
   of the whole program is not exposed via some uber-API).
 
+OD comments: I didn't have time to look closely at the details, but overall I
+like the general feel of it. At least I'd expect us to need something like
+that to be able to handle the multiple use cases we want to support. I must
+say I'm a bit worried though that it could become scary pretty fast to the
+newcomer, with 'lambda functions' and 'virtual machines'.
+Anyway, one point I would like to comment on is the line that creates the
+linear classifier. I hope that, as much as possible, we can avoid the need to
+specify dataset dimensions / number of classes in algorithm constructors. I
+regularly had issues in PLearn with the fact we had for instance to give the
+number of inputs when creating a neural network. I much prefer when this kind
+of thing can be figured out at runtime:
+    - Any parameter you can get rid of is a significant gain in
+      user-friendliness.
+    - It's not always easy to know in advance e.g. the dimension of your input
+      dataset. Imagine for instance this dataset is obtained in a first step
+      by going through a PCA whose number of output dimensions is set so as to
+      keep 90% of the variance.
+    - It seems to me it fits better the idea of a symbolic graph: my intuition
+      (that may be very different from what you actually have in mind) is to
+      see an experiment as a symbolic graph, which you instantiate when you
+      provide the input data. One advantage of this point of view is it makes
+      it natural to re-use the same block components on various datasets /
+      splits, something we often want to do.
 
 K-fold cross validation of a classifier
 ---------------------------------------
@@ -113,7 +137,7 @@
             initial_model=alloc_model('param1', 'param2'),
             burnin=100,
             score_fn = vm_lambda(('learner_obj',),
-                graph=classification_error(
+                classification_error(
                     function=as_classifier('learner_obj'),
                     dataset=MNIST.subset(validation_set))),
             step_fn = vm_lambda(('learner_obj',),
@@ -145,7 +169,7 @@
 extending the symbolic program, and calling the extended function.
 
     vm.call(
-        [pylearn.min(model.weights) for model in trained_models], 
+        [pylearn.min(pylearn_getattr(model, 'weights')) for model in trained_models], 
         param1=1, param2=2)
 
 If this is run after the previous calls:
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/formulas/costs.py	Fri Sep 17 12:01:32 2010 -0400
@@ -0,0 +1,22 @@
+"""
+This script defines a few often used cost functions.
+"""
+import theano
+import theano.tensor as T
+from tags import tags
+
+@tags('cost','binary','cross-entropy')
+def binary_crossentropy(output, target):
+    """ Compute the crossentropy of binary output wrt binary target.
+
+    .. math::
+                L_{CE} \equiv t\log(o) + (1-t)\log(1-o) 
+
+    :type output: Theano variable
+    :param output: Binary output or prediction :math:`\in[0,1]`
+    :type target: Theano variable
+    :param target: Binary target usually :math:`\in\{0,1\}`
+    """
+    return -(target * tensor.log(output) + (1.0 - target) * tensor.log(1.0 - output))
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/formulas/noise.py	Fri Sep 17 12:01:32 2010 -0400
@@ -0,0 +1,56 @@
+"""
+
+This script define the different symbolic noise functions.
+The noise contract is simple: noise_lvl is a symbolic variable going from 0 to 1.
+0: no changement.
+1: max noise.
+"""
+import theano
+import theano.tensor as T
+from tags import tags
+s="""
+* A latex mathematical description of the formulas(for picture representation in generated documentation)
+* Tags(for searching):
+   * a list of lower lovel fct used
+   * category(name of the submodule itself)
+* Tell if we did some work to make it more numerical stable. Do theano do the optimization needed?
+* Tell if the grad is numericaly stable? Do theano do the optimization needed?
+* Tell if work on gpu/not/unknow
+* Tell alternate name
+* Tell the domaine, range of the input/output(range should use the english notation of including or excluding)
+"""
+
+@tags('noise','binomial','salt')
+def binomial_noise(theano_rng,inp,noise_lvl):
+    """ This add binomial noise to inp. Only the salt part of pepper and salt.
+
+    :type inp: Theano Variable
+    :param inp: The input that we want to add noise
+    :type noise_lvl: float
+    :param noise_lvl: The % of noise. Between 0(no noise) and 1.
+    """
+    return theano_rng.binomial( size = inp.shape, n = 1, p =  1 - noise_lvl, dtype=theano.config.floatX) * inp
+
+
+@tags('noise','binomial NLP','pepper','salt')
+def pepper_and_salt_noise(theano_rng,inp,noise_lvl):
+    """ This add pepper and salt noise to inp
+    
+    :type inp: Theano Variable
+    :param inp: The input that we want to add noise
+    :type noise_lvl: tuple(float,float)
+    :param noise_lvl: The % of noise for the salt and pepper. Between 0(no noise) and 1.
+    """
+    return theano_rng.binomial( size = inp.shape, n = 1, p =  1 - noise_lvl[0], dtype=theano.config.floatX) * inp \
+                        + (inp==0) * theano_rng.binomial( size = inp.shape, n = 1, p =  noise_lvl[1], dtype=theano.config.floatX)
+
+@tags('noise','gauss','gaussian')
+def gaussian_noise(theano_rng,inp,noise_lvl):
+    """ This add gaussian NLP noise to inp
+
+    :type inp: Theano Variable
+    :param inp: The input that we want to add noise
+    :type noise_lvl: float
+    :param noise_lvl: The standard deviation of the gaussian.
+    """
+    return theano_rng.normal( size = inp.shape, std = noise_lvl, dtype=theano.config.floatX) + inp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/formulas/tags.py	Fri Sep 17 12:01:32 2010 -0400
@@ -0,0 +1,53 @@
+
+from collections import defaultdict
+
+tags_db = defaultdict(set)
+
+def tags(*_tags):
+    tags = set()
+    def add_tag(tag):
+        if isinstance(tag, (list, tuple)):
+            map(add_tag, tag)
+        elif isinstance(tag, (str, unicode)):
+            for word in tag.split(" "):
+                tags.add(word)
+            tags.add(tag)
+        else:
+            raise TypeError("Tags should be strings or lists/tuples of strings. Got: %s, of type %s" % (tag, type(tag)))
+    map(add_tag, _tags)
+    tags = tuple(sorted(tags))
+    def decorator(function):
+        function.tags = tags
+        function.__doc__ += "\n\nTags: %s" % ", ".join(tags)
+        for tag in tags:
+            tags_db[tag].add(function)
+        return function
+    return decorator
+
+def search(*tags):
+    return reduce(set.__and__, [tags_db[tag] for tag in tags])
+
+
+if __name__ == '__main__':
+    common_tags = ['c', 'd']
+
+    @tags(common_tags, 'a', 'b', 'long tag')
+    def f(a,b):
+        ''' function f returns a+b '''
+        return a+b
+    
+    @tags(common_tags, 'x')
+    def g(a,b):
+        ''' function f returns a-b '''
+        return a-b
+    
+    @tags('c', 'x', 'y', 'z')
+    def h(a,b):
+        ''' function f returns a*b '''
+        return a*b
+
+
+
+    print f.__doc__
+    print [x.__name__ for x in search('c', 'd')]
+    print [x.__name__ for x in search('x')]