changeset 1174:fe6c25eb1e37

merge
author pascanur
date Fri, 17 Sep 2010 16:13:58 -0400
parents a0f178bc9052 (current diff) fab72f424ee0 (diff)
children 805e7c369fd1
files doc/v2_planning/API_coding_style.txt doc/v2_planning/coding_style.txt doc/v2_planning/learn_meeting.py doc/v2_planning/plugin_RP.py
diffstat 9 files changed, 494 insertions(+), 271 deletions(-) [+]
line wrap: on
line diff
--- a/.hgignore	Fri Sep 17 16:12:33 2010 -0400
+++ b/.hgignore	Fri Sep 17 16:13:58 2010 -0400
@@ -2,5 +2,6 @@
 *~
 *.swp
 *.pyc
+*.orig
 core.*
 html
\ No newline at end of file
--- a/doc/v2_planning/API_coding_style.txt	Fri Sep 17 16:12:33 2010 -0400
+++ b/doc/v2_planning/API_coding_style.txt	Fri Sep 17 16:13:58 2010 -0400
@@ -2,6 +2,10 @@
  Coding Style Guidelines
 =========================
 
+Note: until the Pylearn documentation is properly compiled, you can view
+the HTML version of this document `here
+<http://www.iro.umontreal.ca/~delallea/tmp/coding_style/html/API_coding_style.html>`_.
+
 Main Goals
 ==========
 
@@ -58,20 +62,81 @@
 
         """
 
+    * Standard library imports can (and should) be on the same line, to avoid
+      wasting space on straighforward imports:
+
+      .. code-block:: python
+
+        # Good.
+        import os, sys, time
+        # Good when it does not fit on a single line.
+        import std_lib_module_1, std_lib_module_2, std_lib_module_3
+        import std_lib_module_4, std_lib_module_5, std_lib_module_6
+        # Bad.
+        import os
+        import sys
+        import time
+
+    * Importing class / functions from a module is allowed when these are
+      used multiple times, and no ambiguity is possible.
+
+      .. code-block:: python
+
+        # Good when Bar and Blah are used many times.
+        from foo import Bar, Blah
+        do_something_with(Bar(), Blah(), Bar(), Blah(), Bar(), Blah())
+        # Good in most situations.
+        import foo
+        do_something_with(foo.Bar(), foo.Blah())
+        # Bad.
+        from foo import *
+        from numpy import any   # Potential ambiguity with __builtin__.any
+
 Excerpts
 ~~~~~~~~
 
 We emphasize here a few important topics that are found in the official
 guidelines:
 
+    * Only use ASCII characters in code files.
+
+    * Code indent must be done with four blank characters (no tabs).
+
+    * Limit lines to 79 characters.
+
+    * Naming conventions: ``ClassName``, ``TOP_LEVEL_CONSTANT``,
+      ``everything_else``.
+
+    * Comments should start with a capital letter (unless the first word is a
+      code identifier) and end with a period (short inline comments may skip
+      the period at the end).
+
+    * Imports should be listed in alphabetical order. It makes it easier to
+      verify that something is imported, and avoids duplicated imports.
+
+    * Use absolute imports only. This is compatible across a wider range of
+      Python versions, and avoids confusion about what is being
+      imported.
+
+    * Avoid renaming imported modules. This makes code more difficult to
+      re-use, and is not grep-friendly.
+
+      .. code-block:: python
+
+        # Good.
+        from theano import tensor
+        # Bad.
+        from theano import tensor as T
+
     * Avoid using lists if all you care about is iterating on something. Using
       lists:
-        - uses more memory (and possibly more CPU if the code may break out of
-          the iteration),
-        - can lead to ugly code when converted to Python 3 with 2to3,
-        - can have a different behavior if evaluating elements in the list has
-          side effects (if you want these side effects, make it explicit by
-          assigning the list to some variable before iterating on it).
+
+         - uses more memory (and possibly more CPU if the code may break out of
+           the iteration),
+         - can lead to ugly code when converted to Python 3 with 2to3,
+         - can have a different behavior if evaluating elements in the list has
+           side effects (if you want these side effects, make it explicit by
+           assigning the list to some variable before iterating on it).
 
       +------------------------+------------------------+
       | Iterative version      |    List version        |
@@ -101,7 +166,7 @@
         for f_x in imap(f, x):
             ...
         all_f_x = map(f, x)
-        map(f, x)
+        map(f, x)   # f has some side effect.
         # Bad.
         for element in map(f, x):
             ...
@@ -133,12 +198,107 @@
         has_key = my_dict.has_key(key)
         has_substring = my_string.find(substring) >= 0
 
+    * Do not use mutable arguments as default values. Instead, use a helper
+      function (conditional expressions are forbidden at this point, see
+      below).
+
+      .. code-block:: python
+
+        # Good.
+        def f(array=None):
+            array = pylearn.if_none(array, [])
+            ...
+        # Bad.
+        def f(array=[]): # Dangerous if `array` is modified down the road.
+            ...
+
+    * Use a leading underscore '_' in names of internal attributes / methods,
+      but avoid the double underscore '__' unless you know what you are
+      doing.
+
 
 Additional Recommendations
 --------------------------
 
 Things you should do even if they are not listed in official guidelines:
 
+    * All Python code files should start like this:
+
+      .. code-block:: python
+
+        """Module docstring as the first line, as usual."""
+
+        __authors__ = "Olivier Delalleau, Frederic Bastien, David Warde-Farley"
+        __copyright__ = "(c) 2010, Universite de Montreal"
+        __license__ = "3-clause BSD License"
+        __contact__ = "Name Of Current Guardian of this file <email@address>"
+
+    * Use ``//`` for integer division and ``/ float(...)`` if you want the
+      floating point operation (for readability and compatibility across all
+      versions of Python).
+
+      .. code-block:: python
+
+        # Good.
+        n_samples_per_split = n_samples // n_splits
+        mean_x = sum(x) / float(len(x))
+        # Bad.
+        n_samples_per_split = n_samples / n_splits
+        mean_x = sum(x) / len(x)
+
+    * Always raise an exception with ``raise MyException(args)`` where ``MyException``
+      inherits from ``Exception``. This is required for compatibility across
+      all versions of Python.
+
+      .. code-block:: python
+
+        # Good.
+        raise NotImplementedError('The Pylearn team is too lazy.')
+        # Bad.
+        raise NotImplementedError, 'The Pylearn team is too lazy.'
+        raise 'The Pylearn team is too lazy to implement this.'
+
+    * Use either ``try ... except`` or ``try ... finally``, but do not mix
+      ``except`` with ``finally`` (which is not supported in Python 2.4).
+      You can however embed one into the other to mimic the ``try ... except ...
+      finally`` behavior.
+
+      .. code-block:: python
+    
+        # Good.
+        try:
+            try:
+                something_that_may_fail()
+            except SomeError:
+                do_something_if_it_failed()
+        finally:
+            always_do_this_regardless_of_what_happened()
+        # Bad.
+        try:
+            something_that_may_fail()
+        except SomeError:
+            do_something_if_it_failed()
+        finally:
+            always_do_this_regardless_of_what_happened()
+
+    * No conditional expression (not supported in Python 2.4). These are
+      expressions of the form ``x = y if condition else z``.
+
+    * Do not use the ``all`` and ``any`` builtin functions (they are not supported
+      in Python 2.4). Instead, import them from ``theano.gof.python25`` (or
+      use ``numpy.all`` / ``numpy.any`` for array data).
+
+    * Do not use the ``hashlib`` module (not supported in Python 2.4). We will
+      probably provide a wrapper around it to be compatible with all Python
+      versions.
+
+    * Use ``numpy.inf`` and ``numpy.nan`` rather than
+      ``float('inf')`` / ``float('nan')`` (should be slightly more efficient even
+      if efficiency is typically not an issue here, the main goal being code
+      consistency). Also, always use ``numpy.isinf`` / ``numpy.isnan`` to
+      test infinite / NaN values. This is important because ``numpy.nan !=
+      float('nan')``.
+
     * Avoid backslashes whenever possible. They make it more
       difficult to edit code, and they are ugly (as well as potentially
       dangerous if there are trailing white spaces).
@@ -195,6 +355,50 @@
                                            my_everything]:
             ...
 
+    * Use the ``key`` argument instead of ``cmp`` when sorting (for Python 3
+      compatibility).
+
+      .. code-block:: python
+
+        # Good.
+        my_list.sort(key=abs)
+        # Bad.
+        my_list.sort(cmp=lambda x, y: cmp(abs(x), abs(y)))
+
+    * Whenever you read / write binary files, specify it in the mode ('rb' for
+      reading, 'wb' for writing). This is important for cross-platform and
+      Python 3 compatibility (e.g. when pickling / unpickling objects).
+
+      .. code-block:: python
+
+        # Good.
+        cPickle.dump(obj, open('my_obj.pkl', 'wb', protocol=-1))
+        # Bad.
+        cPickle.dump(obj, open('my_obj.pkl', 'w', protocol=-1))
+
+    * Avoid tuple parameter unpacking as it can lead to very ugly code when
+      converting to Python 3.
+
+      .. code-block:: python
+
+        # Good.
+        def f(x, y_z):
+            y, z = y_z
+            ...
+        # Bad.
+        def f(x, (y, z)):
+            ...
+
+    * Only use ``cPickle``, not ``pickle`` (except for debugging purpose since
+      error messages from ``pickle`` are sometimes easier to understand).
+
+    * A script's only top-level code should be something like:
+
+      .. code-block:: python
+
+        if __name__ == '__main__':
+            sys.exit(main())
+
 
 The ``logging`` Module vs. the ``warning`` Module
 =================================================
@@ -246,10 +450,73 @@
 Code Sample
 ===========
 
-The following code sample illustrates many of the coding guidelines one should
-follow in Pylearn.
+The following code sample illustrates some of the coding guidelines one should
+follow in Pylearn. This is still a work-in-progress.
 
 .. code-block:: python
 
+    #! /usr/env/bin python
+
+    """Sample code. There may still be mistakes / missing elements."""
+
+    __authors__ = "Olivier Delalleau"
+    __copyright__ = "(c) 2010, Universite de Montreal"
+    __license__ = "3-clause BSD License"
+    __contact__ = "Olivier Delalleau <delallea@iro>"
+
+    # Standard library imports are on a single line.
     import os, sys, time
 
+    # Third-party imports come after standard library imports, and there is
+    # only one import per line. Imports are sorted lexicographically.
+    import numpy
+    import scipy
+    import theano
+    # Put 'from' imports below.
+    from numpy import argmax
+    from theano import tensor
+    
+    # Application-specific imports come last.
+    from pylearn import dataset
+    from pylearn.optimization import minimize
+
+    def print_files_in(directory):
+        """Print the first line of each file in given directory."""
+        # TODO To be continued...
+
+    def main():
+        if len(sys.argv) != 2:
+            # Note: conventions on how to display script documentation and
+            # parse arguments are still to-be-determined.
+            print("""\
+    Usage: %s <directory>
+    Print first line of each file in given directory (in alphabetic order)."""
+                  % os.path.basename(sys.argv[0]))
+            return 1
+        print_files_in(sys.argv[1])
+        return 0
+
+    # Top-level executable code should be minimal.
+    if __name__ == '__main__':
+        sys.exit(main())
+    
+
+Automatic Code Verification
+===========================
+
+Tools will be available to make it easier to automatically ensure that code
+committed to Pylearn complies to above specifications. This work is not
+finalized yet, but David started a `Wiki page`_ with helpful configuration
+tips for Vim.
+
+.. _Wiki page: http://www.iro.umontreal.ca/~lisa/twiki/bin/view.cgi/Divers/VimPythonRecommendations
+
+TODO
+====
+
+Things still missing from this document, being discussed in coding_style.txt:
+    - Proper style for C code and Mercurial commits
+    - Enforcing 100% test coverage of the code base
+    - Providing ways to add type checking for function arguments
+    - Conventions for script usage documentation and argument parsing
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/v2_planning/API_formulas.txt	Fri Sep 17 16:13:58 2010 -0400
@@ -0,0 +1,96 @@
+.. _v2planning_formulas:
+
+Math formulas API
+=================
+
+Why we need a formulas API
+--------------------------
+
+Their is a few reasons why having a library of mathematical formula for theano is a good reason:
+
+* Some formula have some special thing needed for the gpu. 
+   * Sometimes we need to cast to floatX...
+* Some formula have numerical stability problem.
+* Some formula gradiant have numerical stability problem. (Happen more frequently then the previous ones)
+   * If theano don't always do some stability optimization, we could do it manually in the formulas
+* Some formula as complex to implement and take many try to do correctly. 
+* Can mimic the hierarchy of other library to ease the migration to theano
+
+Having a library help in that we solve those problem only once.
+
+What is a formula
+-----------------
+
+We define formulas as something that don't have a state. They are implemented as
+python function that take theano variable as input and they output theano 
+variable. If you want state, look at what the others commities will do.
+
+Formulas documentation
+----------------------
+
+We must respect what the coding commitee have set for the docstring of the file and of the function. 
+
+* A latex mathematical description of the formulas(for picture representation in generated documentation)
+* Tags(for searching):
+   * a list of lower level fct used
+   * category(name of the submodule itself)
+* Tell if we did some work to make it more numerical stable. Do theano do the optimization needed?
+* Tell if the grad is numericaly stable? Do theano do the optimization needed?
+* Tell if work/don't/unknow on gpu.
+* Tell alternate name
+* Tell the domaine, range of the input/output(range should use the english notation of including or excluding)
+
+Proposed hierarchy
+------------------
+
+Here is the proposed hierarchy for formulas:
+
+* pylearn.formulas.costs: generic / common cost functions, e.g. various cross-entropies, squared error, 
+  abs. error, various sparsity penalties (L1, Student)
+* pylearn.formulas.regularization: formulas for regularization
+* pylearn.formulas.linear: formulas for linear classifier, linear regression, factor analysis, PCA
+* pylearn.formulas.nnet: formulas for building layers of various kinds, various activation functions,
+  layers which could be plugged with various costs & penalties, and stacked
+* pylearn.formulas.ae: formulas for auto-encoders and denoising auto-encoder variants
+* pylearn.formulas.noise: formulas for corruption processes
+* pylearn.formulas.rbm: energies, free energies, conditional distributions, Gibbs sampling
+* pylearn.formulas.trees: formulas for decision trees
+* pylearn.formulas.boosting: formulas for boosting variants
+* pylearn.formulas.maths for other math formulas
+* pylearn.formulas.scipy.stats: example to implement the same interface as existing lib
+
+etc.
+
+Example
+-------
+.. code-block:: python
+
+        """
+        This script defines a few often used cost functions.
+        """
+        import theano
+        import theano.tensor as T
+        from tags import tags
+
+        @tags('cost','binary','cross-entropy')
+        def binary_crossentropy(output, target):
+            """ Compute the crossentropy of binary output wrt binary target.
+
+            .. math::
+                L_{CE} \equiv t\log(o) + (1-t)\log(1-o) 
+
+            :type output: Theano variable
+            :param output: Binary output or prediction :math:`\in[0,1]`
+            :type target: Theano variable
+            :param target: Binary target usually :math:`\in\{0,1\}`
+            """
+            return -(target * tensor.log(output) + (1.0 - target) * tensor.log(1.0 - output))
+
+
+TODO 
+----
+* define a list of search tag to start with
+* Add to the html page a list of the tag and a list of each fct associated to them.
+* move existing formulas to pylearn as examples and add other basics ones.
+* theano.tensor.nnet will probably be copied to pylearn.formulas.nnet and depricated.
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/v2_planning/API_learner.txt	Fri Sep 17 16:13:58 2010 -0400
@@ -0,0 +1,95 @@
+# A list of "task types"
+
+'''
+ List of tasks types:
+  Attributes
+
+   sequential
+   spatial
+   structured
+   semi-supervised
+   missing-values
+
+
+  Supervised (x,y)
+
+   classification
+   regression
+   probabilistic classification
+   ranking
+   conditional density estimation
+   collaborative filtering
+   ordinal regression ?= ranking 
+
+  Unsupervised (x)
+
+   de-noising
+   feature learning ( transformation ) PCA, DAA
+   density estimation
+   inference
+
+  Other
+
+   generation (sampling)
+   structure learning ???
+
+
+Notes on metrics & statistics:
+   - some are applied to an example, others on a batch
+   - most statistics are on the dataset
+'''
+
+
+class Learner(Object):
+    '''
+    Takes data as inputs, and learns a prediction function (or several).
+
+    A learner is parametrized by hyper-parameters, which can be set from the
+    outside (a "client" from Learner, that can be a HyperLearner, a
+    Tester,...).
+
+    The data can be given all at a time as a data set, or incrementally.
+    Some learner need to be fully trained in one step, whereas other can be
+    trained incrementally.
+
+    The question of statistics collection during training remains open.
+    '''
+    #def use_dataset(dataset)
+
+    # return a dictionary of hyperparameters names(keys)
+    # and value(values) 
+    def get_hyper_parameters()
+    def set_hyper_parameters(dictionary)
+
+
+    
+    
+    # Ver B
+    def eval(dataset)
+    def predict(dataset)
+
+    # Trainable
+    def train(dataset)   # train until complition
+
+    # Incremental
+    def use_dataset(dataset)
+    def adapt(n_steps =1)
+    def has_converged()
+
+    # 
+
+
+# Some example cases
+
+class HyperLearner(Learner):
+
+    ### def get_hyper_parameter_distribution(name)
+    def set_hyper_parameters_distribution(dictionary)
+
+
+def bagging(learner_factory):
+    for i in range(N):
+        learner_i = learner_factory.new()
+        # todo: get dataset_i ??
+        learner_i.use_dataset(dataset_i)
+        learner_i.train()
--- a/doc/v2_planning/coding_style.txt	Fri Sep 17 16:12:33 2010 -0400
+++ b/doc/v2_planning/coding_style.txt	Fri Sep 17 16:13:58 2010 -0400
@@ -11,6 +11,15 @@
 Open for public debate
 ----------------------
 
+    * File header:
+        - Do we put the accents in 'Universite de Montreal'?
+            OD: No (restricting code to ASCII characters is much safer)
+        - Do we put the Mercurial version number in each file?
+            OD: No (useless in my experience, if it's a release the version
+                number can be provided in the README for instance, and in
+                addition Mercurial IDs cannot be easily compared to figure
+                out which of two versions is most recent)
+
     * Avoid contractions in code comments (particularly in
       documentation): "We do not add blue to red because it does not look good"
       rather than "We don't add blue to red because it doesn't look good".
@@ -288,86 +297,13 @@
 
 Have a sample code that showcases everything one should comply to.
 
-Some coding guidelines (work-in-progress from OD)
--------------------------------------------------
-
-    
-    * Use the `key` argument instead of `cmp` when sorting (for Python 3
-      compatibility).
-    Yes:
-        my_list.sort(key=abs)
-    No:
-        my_list.sort(cmp=lambda x, y: cmp(abs(x), abs(y)))
-
-    * Use // for integer division (for readability and Python 3 compatibility).
-    Yes:
-        n_samples_per_split = n_samples // n_splits
-    No:
-        n_samples_per_split = n_samples / n_splits
-
-    * Only use ASCII characters in code files.
-
-    * Code indent must be done with four blank characters (not with tabs).
-
-    * Limit lines to 79 characters.
-
-    * Comments should start with a capital letter (unless the first word is a
-      code identifier) and end with a period (very short inline comments may
-      ignore this rule).
-
-    * Whenever you read / write binary files, specify it in the mode ('rb' for
-      reading, 'wb' for writing). This is important for cross-platform and
-      Python 3 compatibility (e.g. when pickling / unpickling objects).
-
-    * Avoid tuple parameter unpacking to avoid very ugly code when converting
-      to Python 3.
-    Yes:
-        def f(x, y_z):
-            y, z = y_z
-    No:
-        def f(x, (y, z))
+Fred's suggestion to solve issue with hashlib not available in Python 2.4:
+--------------------------------------------------------------------------
 
-    * Only use cPickle, not pickle.
-
-    * Always raise exception with
-        raise MyException(args)
-      where MyException inherits from Exception.
-
-    * Imports should be listed in alphabetical order. It makes it easier to
-      verify that something is imported, and avoids duplicated imports.
-
-    * Use absolute imports only. This is compatible across a wider range of
-      Python versions, and avoids confusion about what is being
-      imported.
-
-    * Use a leading underscore '_' for internal attributes / methods,
-      but avoid the double underscore '__' unless you know what you are
-      doing.
-
-    * A script's only top-level code should be something like:
-        if __name__ == '__main__':
-            sys.exit(main())
+You can do as in theano.gof.cc:
 
-    * No conditional expression (not supported in Python 2.4). These are
-      expressions of the form
-        x = y if condition else z
-
-    * Use either "try ... except" or "try ... finally", but do not mix
-      "except" with "finally" (which is not supported in Python 2.4).
-      You can make a try... except inside a try... finally if you need both.
+    ..code::
 
-    * Do not use the `all` and `any` builtin functions (they are not supported
-      in Python 2.4).
-      You can use numpy.{all,any} instead of import theano.gof.python25 that 
-      define all and any.
-      OD: I think we should have something like pylearn.compat.{all,any}.
-          numpy.{all,any} are meant to be used on arrays only.
-      OD: As agreed during committee's meeting, we will use
-          theano.gof.python25
-
-    * Do not use the `hashlib` module (not supported in Python 2.4).
-      You can do as in theano.gof.cc:
-      ..code::
 	if sys.version_info[:2] >= (2,5):
 	    import hashlib
     	    def hash_from_code(msg):
@@ -376,17 +312,7 @@
 	    import md5
 	    def hash_from_code(msg):
 	        return md5.new(msg).hexdigest()
-    OD: Yep, we could probably come up with such a wrapper in a pylearn.compat
-        module.
 
-    * Do not use mutable arguments as default values. Instead, use a helper
-      function:
-      Yes:
-        def f(array=None):
-            array = pylearn.if_none(array, [])
-      No:
-        def f(array=[]):
-            # Dangerous if `array` is modified down the road.
 
 Mercurial commits
 -----------------
@@ -509,15 +435,6 @@
 OD: This was discussed in committee's meeting. We agreed to provide ways to do
 this, but not to enforce its usage.
 
-Consistent inf / nan
---------------------
-
-OD: Use numpy.inf and numpy.nan rather than float('inf') / float('nan')?
-(should be slightly more efficient even if efficiency usually doesn't matter
-here - the main goal would be for everyone to use the same inf / nan to make
-the code consistent).
-OD: Approved during committee's meeting.
-
 Enforcing strict testing policy
 -------------------------------
 
@@ -561,23 +478,9 @@
         * Make public some configuration files / plugins for vim
         * Come up with official common file header (license in particular)
 
-Suggested per-file boilerplate
-------------------------------
-
-"""Module docstring as the first line, as usual."""
-
-__authors__ = "Olivier Delalleau, Frederic Bastien, David Warde-Farley"
-__copyright__ = "(c) 2010, Université de Montréal"
-__license__ = "3-clause BSD License"
-__contact__ = "Name Of Current Guardian of this file <email@address>"
+Script usage documentation
+--------------------------
 
-We could also pull Mercurial revision info and put it in __version__, this
-seems to be common.
+OD: It would be nice to have some standardized way of parsing a script's
+arguments and displaying the script usage doc to the user.
 
-Editor setup
-------------
-
-(DWF:) Some enhanced configuration files for Vim that I've put a little bit
-of work into modifying in some cases can be found at:
-
-http://www.iro.umontreal.ca/~lisa/twiki/bin/view.cgi/Divers/VimPythonRecommendations
--- a/doc/v2_planning/formulas.txt	Fri Sep 17 16:12:33 2010 -0400
+++ b/doc/v2_planning/formulas.txt	Fri Sep 17 16:13:58 2010 -0400
@@ -9,47 +9,6 @@
 - Olivier B.
 - Nicolas
 
-TODO 
-----
-* define a list of search tag to start with
-* propose an interface(many inputs, outputs, doc style, hierrache, to search, html output?)
-* find existing repositories with files for formulas.
-* move existing formulas to pylearn as examples and add other basics ones.
-** theano.tensor.nnet will probably be copied to pylearn.formulas.nnet and depricated.
-
-Why we need formulas
---------------------
-
-Their is a few reasons why having a library of mathematical formula for theano is a good reason:
-
-* Some formula have some special thing needed for the gpu. 
-   * Sometimes we need to cast to floatX...
-* Some formula have numerical stability problem.
-* Some formula gradiant have numerical stability problem. (Happen more frequently then the previous ones)
-   * If theano don't always do some stability optimization, we could do it manually in the formulas
-* Some formula as complex to implement and take many try to do correctly. 
-
-Having a library help in that we solve those problem only once.
-
-Formulas definition
--------------------
-
-We define formulas as something that don't have a state. They are implemented as python function 
-that take theano variable as input and output theano variable. If you want state, look at what the 
-learner commity will do.
-
-Formulas doc must have
-----------------------
-
-* A latex mathematical description of the formulas(for picture representation in generated documentation)
-* Tags(for searching):
-   * a list of lower lovel fct used
-   * category(name of the submodule itself)
-* Tell if we did some work to make it more numerical stable. Do theano do the optimization needed?
-* Tell if the grad is numericaly stable? Do theano do the optimization needed?
-* Tell if work on gpu/not/unknow
-* Tell alternate name
-* Tell the domaine, range of the input/output(range should use the english notation of including or excluding)
 
 List of existing repos
 ----------------------
@@ -57,33 +16,3 @@
 Olivier B. ?
 Xavier G.: git@github.com:glorotxa/DeepANN.git, see file deepANN/{Activations.py(to nnet),Noise.py,Reconstruction_cost.py(to costs),Regularization.py(to regularization}
 
-Proposed hierarchy
-------------------
-
-Here is the proposed hierarchy for formulas
-
-pylearn.formulas.costs: generic / common cost functions, e.g. various cross-entropies, squared error, 
-abs. error, various sparsity penalties (L1, Student)
-
-pylearn.formulas.regularization: formulas for regularization
-
-pylearn.formulas.linear: formulas for linear classifier, linear regression, factor analysis, PCA
-
-pylearn.formulas.nnet: formulas for building layers of various kinds, various activation functions,
-layers which could be plugged with various costs & penalties, and stacked
-
-pylearn.formulas.ae: formulas for auto-encoders and denoising auto-encoder variants
-
-pylearn.formulas.noise: formulas for corruption processes
-
-pylearn.formulas.rbm: energies, free energies, conditional distributions, Gibbs sampling
-
-pylearn.formulas.trees: formulas for decision trees
-
-pylearn.formulas.boosting: formulas for boosting variants
-
-pylearn.formulas.maths for other math formulas
-
-pylearn.formulas.scipy.stats: example to implement the same interface as existing lib
-
-etc.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/v2_planning/index.txt	Fri Sep 17 16:13:58 2010 -0400
@@ -0,0 +1,8 @@
+.. _libdoc:
+
+.. toctree::
+   :maxdepth: 1
+
+   API_formulas
+   API_coding_style
+   api_optimization
--- a/doc/v2_planning/learn_meeting.py	Fri Sep 17 16:12:33 2010 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,76 +0,0 @@
-
-
-def bagging(learner_factory):
-    for i in range(N):
-        learner_i = learner_factory.new()
-        # todo: get dataset_i ??
-        learner_i.use_dataset(dataset_i)
-        learner_i.train()
-'''
- List of tasks types:
-  Attributes
-
-   sequential
-   spatial
-   structured
-   semi-supervised
-   missing-values
-
-
-  Supervised (x,y)
-
-   classification
-   regression
-   probabilistic classification
-   ranking
-   conditional density estimation
-   collaborative filtering
-   ordinal regression ?= ranking 
-
-  Unsupervised (x)
-
-   de-noising
-   feature learning ( transformation ) PCA, DAA
-   density estimation
-   inference
-
-  Other
-
-   generation (sampling)
-   structure learning ???
-
-
-Notes on metrics & statistics:
-   - some are applied to an example, others on a batch
-   - most statistics are on the dataset
-'''
-class Learner(Object):
-    
-    #def use_dataset(dataset)
-
-    # return a dictionary of hyperparameters names(keys)
-    # and value(values) 
-    def get_hyper_parameters()
-    def set_hyper_parameters(dictionary)
-
-
-    
-    
-    # Ver B
-    def eval(dataset)
-    def predict(dataset)
-
-    # Trainable
-    def train(dataset)   # train until complition
-
-    # Incremental
-    def use_dataset(dataset)
-    def adapt(n_steps =1)
-    def has_converged()
-
-    # 
-
-class HyperLearner(Learner):
-
-    ### def get_hyper_parameter_distribution(name)
-    def set_hyper_parameters_distribution(dictionary)
--- a/doc/v2_planning/learner.txt	Fri Sep 17 16:12:33 2010 -0400
+++ b/doc/v2_planning/learner.txt	Fri Sep 17 16:13:58 2010 -0400
@@ -1,6 +1,6 @@
 
 Comittee: AB, PL, GM, IG, RP, NB, PV
-Leader: ?
+Leader: PL
 
 Discussion of Function Specification for Learner Types
 ======================================================